10
|
1 #!/usr/bin/env python3
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 # I m p o r t s
|
|
5
|
|
6 import array
|
|
7 import codecs
|
|
8 import collections
|
|
9 import struct
|
|
10 import sys
|
|
11
|
|
12 # C l a s s e s
|
|
13
|
|
14 class Runes(object):
|
|
15 """
|
|
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes
|
|
17 encoding and codec contain the name of the encoding and the codec
|
|
18 used to generate the UTF-16. The attribute buffer contains the
|
|
19 buffer (an array of 16-bit unsigned integers) used to back this
|
|
20 object; modifications to that array will be reflected in this
|
|
21 object.
|
|
22 """
|
|
23 # The most efficient 16-bit one on this platform
|
|
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
|
|
25 codec = codecs.lookup(encoding)
|
|
26
|
|
27 def __init__(self, based_on=None):
|
|
28 if isinstance(based_on, array.array):
|
|
29 if based_on.typecode == 'H':
|
|
30 self.buffer = based_on
|
|
31 else:
|
|
32 self.buffer = array.array('H', based_on)
|
|
33 elif isinstance(based_on, str):
|
|
34 # A string should always be able to encode to runes.
|
|
35 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0])
|
|
36 elif based_on is None:
|
|
37 self.buffer = array.array('H', bytes())
|
12
|
38 elif isinstance(based_on, Runes):
|
|
39 self.buffer = array.array('H', based_on.buffer)
|
10
|
40 else:
|
|
41 self.buffer = array.array('H', based_on)
|
|
42
|
|
43 def __str__(self):
|
|
44 """
|
|
45 Convert this object to a string. We deliberately do not have a
|
|
46 __repr__ method, to underscore that runes are not strings.
|
|
47 """
|
|
48 # Runes might not always be able to decode to a string.
|
|
49 return self.codec.decode(self.buffer, 'replace')[0]
|
|
50
|
|
51 def __bytes__(self):
|
|
52 return bytes(self.buffer)
|
|
53
|
|
54 def __len__(self):
|
|
55 return len(self.buffer)
|
|
56
|
|
57 def __lt__(self, other):
|
|
58 return self.buffer < other.buffer
|
|
59
|
|
60 def __le__(self, other):
|
|
61 return self.buffer <= other.buffer
|
|
62
|
|
63 def __gt__(self, other):
|
|
64 return self.buffer > other.buffer
|
|
65
|
|
66 def __ge__(self, other):
|
|
67 return self.buffer >= other.buffer
|
|
68
|
|
69 def __eq__(self, other):
|
|
70 return self.buffer == other.buffer
|
|
71
|
|
72 def __ne__(self, other):
|
|
73 return self.buffer != other.buffer
|
|
74
|
|
75 def __hash__(self):
|
|
76 return hash(self.buffer)
|
|
77
|
|
78 def __bool__(self):
|
|
79 return bool(self.buffer)
|
|
80
|
|
81 def __getitem__(self, key):
|
|
82 ret = self.buffer[key]
|
|
83 if isinstance(ret, array.array):
|
|
84 return Runes(ret)
|
|
85 else:
|
|
86 return ret
|
|
87
|
|
88 def __setitem__(self, key, value):
|
|
89 if isinstance(key, int):
|
|
90 if isinstance(value, int):
|
|
91 self.buffer[key] = value
|
|
92 else:
|
|
93 raise TypeError("integer required")
|
|
94 elif isinstance(value, Runes):
|
|
95 self.buffer[key] = value.buffer
|
|
96 else:
|
|
97 raise TypeError("runes required")
|
|
98
|
|
99 def __delitem__(self, key):
|
|
100 del self.buffer[key]
|
|
101
|
|
102 def clear(self):
|
|
103 del self[:]
|
|
104
|
|
105 def __iter__(self):
|
|
106 return iter(self.buffer)
|
|
107
|
|
108 def __reversed__(self):
|
|
109 return reversed(self.buffer)
|
|
110
|
|
111 def append(self, value):
|
|
112 if isinstance(value, int):
|
|
113 self.buffer.append(value)
|
|
114 elif isinstance(value, Runes):
|
|
115 self.buffer.extend(value.buffer)
|
|
116 else:
|
|
117 raise TypeError("integer or runes required")
|
|
118
|
|
119 def __contains__(self, value):
|
|
120 return value in self.buffer
|
|
121
|
|
122 def index(self, value):
|
|
123 return self.buffer.index(value)
|
|
124
|
|
125 def find(self, value):
|
|
126 try:
|
|
127 return self.index(value)
|
|
128 except ValueError:
|
|
129 return -1
|
|
130
|
|
131 class Workspace(Runes):
|
|
132 """
|
|
133 A Runes object that acts a bit more string-like, in that __setitem__
|
|
134 also accepts a string as an argument and __getitem__ always returns
|
|
135 a string. We also return empty strings instead of throwing IndexError
|
|
136 when attempting to read out-of-range values, because that makes life
|
|
137 easier for us when curling quotes.
|
|
138 """
|
|
139 def __setitem__(self, key, value):
|
|
140 if isinstance(value, str):
|
|
141 if isinstance(key, int):
|
|
142 Runes.__setitem__(self, key, self._ord(value))
|
|
143 else:
|
|
144 Runes.__setitem__(self, key, Runes(value))
|
|
145 else:
|
|
146 Runes.__setitem__(self, key, value)
|
|
147
|
|
148 def __getitem__(self, key):
|
|
149 try:
|
|
150 ret = Runes.__getitem__(self, key)
|
|
151 if isinstance (ret, int):
|
|
152 return chr(ret)
|
|
153 elif isinstance(ret, Runes):
|
|
154 return str(ret)
|
|
155 else:
|
|
156 raise AssertionError("this shouldn't happen")
|
|
157 except IndexError:
|
|
158 return ""
|
|
159
|
|
160 def append(self, value):
|
|
161 if isinstance(value, str):
|
|
162 Runes.append(self, Runes(value))
|
|
163 else:
|
|
164 Runes.append(self, value)
|
|
165
|
|
166 def index(self, value):
|
|
167 if isinstance(value, str):
|
|
168 return Runes.index(self, self._ord(value))
|
|
169 else:
|
|
170 return Runes.index(self, value)
|
|
171
|
|
172 def find(self, value):
|
|
173 try:
|
|
174 return self.index(value)
|
|
175 except ValueError:
|
|
176 return -1
|
|
177
|
|
178 def _ord(self, string):
|
|
179 length = len(string)
|
|
180 if length != 1:
|
|
181 raise ValueError("expected a character, but string of length {0} found".format(length))
|
|
182 raw = Runes(string)
|
|
183 if len(raw) != 1:
|
|
184 raise ValueError("character not in BMP")
|
|
185 return raw[0]
|