10
|
1 #!/usr/bin/env python3
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 # I m p o r t s
|
|
5
|
|
6 import array
|
|
7 import codecs
|
|
8 import collections
|
|
9 import struct
|
|
10 import sys
|
|
11
|
|
12 # C l a s s e s
|
|
13
|
|
14 class Runes(object):
|
|
15 """
|
|
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes
|
|
17 encoding and codec contain the name of the encoding and the codec
|
|
18 used to generate the UTF-16. The attribute buffer contains the
|
|
19 buffer (an array of 16-bit unsigned integers) used to back this
|
|
20 object; modifications to that array will be reflected in this
|
|
21 object.
|
|
22 """
|
|
23 # The most efficient 16-bit one on this platform
|
|
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
|
|
25 codec = codecs.lookup(encoding)
|
|
26
|
|
27 def __init__(self, based_on=None):
|
|
28 if isinstance(based_on, array.array):
|
|
29 if based_on.typecode == 'H':
|
|
30 self.buffer = based_on
|
|
31 else:
|
|
32 self.buffer = array.array('H', based_on)
|
|
33 elif isinstance(based_on, str):
|
|
34 # A string should always be able to encode to runes.
|
|
35 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0])
|
|
36 elif based_on is None:
|
|
37 self.buffer = array.array('H', bytes())
|
|
38 else:
|
|
39 self.buffer = array.array('H', based_on)
|
|
40
|
|
41 def __str__(self):
|
|
42 """
|
|
43 Convert this object to a string. We deliberately do not have a
|
|
44 __repr__ method, to underscore that runes are not strings.
|
|
45 """
|
|
46 # Runes might not always be able to decode to a string.
|
|
47 return self.codec.decode(self.buffer, 'replace')[0]
|
|
48
|
|
49 def __bytes__(self):
|
|
50 return bytes(self.buffer)
|
|
51
|
|
52 def __len__(self):
|
|
53 return len(self.buffer)
|
|
54
|
|
55 def __lt__(self, other):
|
|
56 return self.buffer < other.buffer
|
|
57
|
|
58 def __le__(self, other):
|
|
59 return self.buffer <= other.buffer
|
|
60
|
|
61 def __gt__(self, other):
|
|
62 return self.buffer > other.buffer
|
|
63
|
|
64 def __ge__(self, other):
|
|
65 return self.buffer >= other.buffer
|
|
66
|
|
67 def __eq__(self, other):
|
|
68 return self.buffer == other.buffer
|
|
69
|
|
70 def __ne__(self, other):
|
|
71 return self.buffer != other.buffer
|
|
72
|
|
73 def __hash__(self):
|
|
74 return hash(self.buffer)
|
|
75
|
|
76 def __bool__(self):
|
|
77 return bool(self.buffer)
|
|
78
|
|
79 def __getitem__(self, key):
|
|
80 ret = self.buffer[key]
|
|
81 if isinstance(ret, array.array):
|
|
82 return Runes(ret)
|
|
83 else:
|
|
84 return ret
|
|
85
|
|
86 def __setitem__(self, key, value):
|
|
87 if isinstance(key, int):
|
|
88 if isinstance(value, int):
|
|
89 self.buffer[key] = value
|
|
90 else:
|
|
91 raise TypeError("integer required")
|
|
92 elif isinstance(value, Runes):
|
|
93 self.buffer[key] = value.buffer
|
|
94 else:
|
|
95 raise TypeError("runes required")
|
|
96
|
|
97 def __delitem__(self, key):
|
|
98 del self.buffer[key]
|
|
99
|
|
100 def clear(self):
|
|
101 del self[:]
|
|
102
|
|
103 def __iter__(self):
|
|
104 return iter(self.buffer)
|
|
105
|
|
106 def __reversed__(self):
|
|
107 return reversed(self.buffer)
|
|
108
|
|
109 def append(self, value):
|
|
110 if isinstance(value, int):
|
|
111 self.buffer.append(value)
|
|
112 elif isinstance(value, Runes):
|
|
113 self.buffer.extend(value.buffer)
|
|
114 else:
|
|
115 raise TypeError("integer or runes required")
|
|
116
|
|
117 def __contains__(self, value):
|
|
118 return value in self.buffer
|
|
119
|
|
120 def index(self, value):
|
|
121 return self.buffer.index(value)
|
|
122
|
|
123 def find(self, value):
|
|
124 try:
|
|
125 return self.index(value)
|
|
126 except ValueError:
|
|
127 return -1
|
|
128
|
|
129 class Workspace(Runes):
|
|
130 """
|
|
131 A Runes object that acts a bit more string-like, in that __setitem__
|
|
132 also accepts a string as an argument and __getitem__ always returns
|
|
133 a string. We also return empty strings instead of throwing IndexError
|
|
134 when attempting to read out-of-range values, because that makes life
|
|
135 easier for us when curling quotes.
|
|
136 """
|
|
137 def __setitem__(self, key, value):
|
|
138 if isinstance(value, str):
|
|
139 if isinstance(key, int):
|
|
140 Runes.__setitem__(self, key, self._ord(value))
|
|
141 else:
|
|
142 Runes.__setitem__(self, key, Runes(value))
|
|
143 else:
|
|
144 Runes.__setitem__(self, key, value)
|
|
145
|
|
146 def __getitem__(self, key):
|
|
147 try:
|
|
148 ret = Runes.__getitem__(self, key)
|
|
149 if isinstance (ret, int):
|
|
150 return chr(ret)
|
|
151 elif isinstance(ret, Runes):
|
|
152 return str(ret)
|
|
153 else:
|
|
154 raise AssertionError("this shouldn't happen")
|
|
155 except IndexError:
|
|
156 return ""
|
|
157
|
|
158 def append(self, value):
|
|
159 if isinstance(value, str):
|
|
160 Runes.append(self, Runes(value))
|
|
161 else:
|
|
162 Runes.append(self, value)
|
|
163
|
|
164 def index(self, value):
|
|
165 if isinstance(value, str):
|
|
166 return Runes.index(self, self._ord(value))
|
|
167 else:
|
|
168 return Runes.index(self, value)
|
|
169
|
|
170 def find(self, value):
|
|
171 try:
|
|
172 return self.index(value)
|
|
173 except ValueError:
|
|
174 return -1
|
|
175
|
|
176 def _ord(self, string):
|
|
177 length = len(string)
|
|
178 if length != 1:
|
|
179 raise ValueError("expected a character, but string of length {0} found".format(length))
|
|
180 raw = Runes(string)
|
|
181 if len(raw) != 1:
|
|
182 raise ValueError("character not in BMP")
|
|
183 return raw[0]
|