Mercurial > cgi-bin > hgweb.cgi > curlyq
annotate runes.py @ 20:be0fd5c8121d
Can't use memoryview to base Runes on; different methods inside.
author | David Barts <n5jrn@me.com> |
---|---|
date | Sat, 28 Dec 2019 01:22:03 -0800 |
parents | 0c8d787bc7e1 |
children | 35f29952b51e |
rev | line source |
---|---|
10 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # I m p o r t s | |
5 | |
6 import array | |
7 import codecs | |
8 import collections | |
9 import struct | |
10 import sys | |
11 | |
12 # C l a s s e s | |
13 | |
14 class Runes(object): | |
15 """ | |
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes | |
17 encoding and codec contain the name of the encoding and the codec | |
18 used to generate the UTF-16. The attribute buffer contains the | |
19 buffer (an array of 16-bit unsigned integers) used to back this | |
20 object; modifications to that array will be reflected in this | |
21 object. | |
22 """ | |
23 # The most efficient 16-bit one on this platform | |
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" | |
25 codec = codecs.lookup(encoding) | |
26 | |
27 def __init__(self, based_on=None): | |
20
be0fd5c8121d
Can't use memoryview to base Runes on; different methods inside.
David Barts <n5jrn@me.com>
parents:
17
diff
changeset
|
28 if isinstance(based_on, array.array): |
be0fd5c8121d
Can't use memoryview to base Runes on; different methods inside.
David Barts <n5jrn@me.com>
parents:
17
diff
changeset
|
29 if based_on.typecode == 'H': |
10 | 30 self.buffer = based_on |
31 else: | |
32 self.buffer = array.array('H', based_on) | |
33 elif isinstance(based_on, str): | |
34 # A string should always be able to encode to runes. | |
35 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0]) | |
36 elif based_on is None: | |
37 self.buffer = array.array('H', bytes()) | |
12 | 38 elif isinstance(based_on, Runes): |
39 self.buffer = array.array('H', based_on.buffer) | |
10 | 40 else: |
41 self.buffer = array.array('H', based_on) | |
42 | |
43 def __str__(self): | |
44 """ | |
45 Convert this object to a string. We deliberately do not have a | |
46 __repr__ method, to underscore that runes are not strings. | |
47 """ | |
48 # Runes might not always be able to decode to a string. | |
49 return self.codec.decode(self.buffer, 'replace')[0] | |
50 | |
51 def __bytes__(self): | |
52 return bytes(self.buffer) | |
53 | |
54 def __len__(self): | |
55 return len(self.buffer) | |
56 | |
57 def __lt__(self, other): | |
58 return self.buffer < other.buffer | |
59 | |
60 def __le__(self, other): | |
61 return self.buffer <= other.buffer | |
62 | |
63 def __gt__(self, other): | |
64 return self.buffer > other.buffer | |
65 | |
66 def __ge__(self, other): | |
67 return self.buffer >= other.buffer | |
68 | |
69 def __eq__(self, other): | |
70 return self.buffer == other.buffer | |
71 | |
72 def __ne__(self, other): | |
73 return self.buffer != other.buffer | |
74 | |
75 def __hash__(self): | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
76 raise TypeError("unhashable type") |
10 | 77 |
78 def __bool__(self): | |
79 return bool(self.buffer) | |
80 | |
81 def __getitem__(self, key): | |
82 ret = self.buffer[key] | |
83 if isinstance(ret, array.array): | |
84 return Runes(ret) | |
85 else: | |
86 return ret | |
87 | |
88 def __setitem__(self, key, value): | |
89 if isinstance(key, int): | |
90 if isinstance(value, int): | |
91 self.buffer[key] = value | |
92 else: | |
93 raise TypeError("integer required") | |
94 elif isinstance(value, Runes): | |
95 self.buffer[key] = value.buffer | |
96 else: | |
97 raise TypeError("runes required") | |
98 | |
99 def __delitem__(self, key): | |
100 del self.buffer[key] | |
101 | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
102 def __del__(self): |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
103 # Paranoid |
17 | 104 if hasattr(self, 'buffer') and isinstance(self.buffer, memoryview): |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
105 self.buffer.release() |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
106 |
10 | 107 def clear(self): |
108 del self[:] | |
109 | |
110 def __iter__(self): | |
111 return iter(self.buffer) | |
112 | |
113 def __reversed__(self): | |
114 return reversed(self.buffer) | |
115 | |
116 def append(self, value): | |
117 if isinstance(value, int): | |
118 self.buffer.append(value) | |
119 elif isinstance(value, Runes): | |
120 self.buffer.extend(value.buffer) | |
121 else: | |
122 raise TypeError("integer or runes required") | |
123 | |
124 def __contains__(self, value): | |
125 return value in self.buffer | |
126 | |
127 def index(self, value): | |
128 return self.buffer.index(value) | |
129 | |
130 def find(self, value): | |
131 try: | |
132 return self.index(value) | |
133 except ValueError: | |
134 return -1 | |
135 | |
136 class Workspace(Runes): | |
137 """ | |
138 A Runes object that acts a bit more string-like, in that __setitem__ | |
139 also accepts a string as an argument and __getitem__ always returns | |
140 a string. We also return empty strings instead of throwing IndexError | |
141 when attempting to read out-of-range values, because that makes life | |
142 easier for us when curling quotes. | |
143 """ | |
144 def __setitem__(self, key, value): | |
145 if isinstance(value, str): | |
146 if isinstance(key, int): | |
147 Runes.__setitem__(self, key, self._ord(value)) | |
148 else: | |
149 Runes.__setitem__(self, key, Runes(value)) | |
150 else: | |
151 Runes.__setitem__(self, key, value) | |
152 | |
153 def __getitem__(self, key): | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
154 view = memoryview(self.buffer) |
10 | 155 try: |
15 | 156 result = view[key] |
16 | 157 if isinstance(result, int): |
15 | 158 return chr(result) |
159 if isinstance(result, memoryview): | |
160 ret = self.codec.decode(result, 'replace')[0] | |
161 result.release() | |
162 return ret | |
163 else: | |
164 raise AssertionError("this shouldn't happen") | |
10 | 165 except IndexError: |
166 return "" | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
167 finally: |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
168 view.release() |
10 | 169 |
170 def append(self, value): | |
171 if isinstance(value, str): | |
172 Runes.append(self, Runes(value)) | |
173 else: | |
174 Runes.append(self, value) | |
175 | |
176 def index(self, value): | |
177 if isinstance(value, str): | |
178 return Runes.index(self, self._ord(value)) | |
179 else: | |
180 return Runes.index(self, value) | |
181 | |
182 def find(self, value): | |
183 try: | |
184 return self.index(value) | |
185 except ValueError: | |
186 return -1 | |
187 | |
188 def _ord(self, string): | |
189 length = len(string) | |
190 if length != 1: | |
191 raise ValueError("expected a character, but string of length {0} found".format(length)) | |
192 raw = Runes(string) | |
193 if len(raw) != 1: | |
194 raise ValueError("character not in BMP") | |
195 return raw[0] |