Mercurial > cgi-bin > hgweb.cgi > curlyq
annotate runes.py @ 21:35f29952b51e
Remove deadwood.
author | David Barts <n5jrn@me.com> |
---|---|
date | Sat, 28 Dec 2019 06:32:53 -0800 |
parents | be0fd5c8121d |
children | a771878f6cf4 |
rev | line source |
---|---|
10 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # I m p o r t s | |
5 | |
6 import array | |
7 import codecs | |
8 import collections | |
9 import struct | |
10 import sys | |
11 | |
12 # C l a s s e s | |
13 | |
14 class Runes(object): | |
15 """ | |
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes | |
17 encoding and codec contain the name of the encoding and the codec | |
18 used to generate the UTF-16. The attribute buffer contains the | |
19 buffer (an array of 16-bit unsigned integers) used to back this | |
20 object; modifications to that array will be reflected in this | |
21 object. | |
22 """ | |
23 # The most efficient 16-bit one on this platform | |
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" | |
25 codec = codecs.lookup(encoding) | |
26 | |
27 def __init__(self, based_on=None): | |
20
be0fd5c8121d
Can't use memoryview to base Runes on; different methods inside.
David Barts <n5jrn@me.com>
parents:
17
diff
changeset
|
28 if isinstance(based_on, array.array): |
be0fd5c8121d
Can't use memoryview to base Runes on; different methods inside.
David Barts <n5jrn@me.com>
parents:
17
diff
changeset
|
29 if based_on.typecode == 'H': |
10 | 30 self.buffer = based_on |
31 else: | |
32 self.buffer = array.array('H', based_on) | |
33 elif isinstance(based_on, str): | |
34 # A string should always be able to encode to runes. | |
35 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0]) | |
36 elif based_on is None: | |
37 self.buffer = array.array('H', bytes()) | |
12 | 38 elif isinstance(based_on, Runes): |
39 self.buffer = array.array('H', based_on.buffer) | |
10 | 40 else: |
41 self.buffer = array.array('H', based_on) | |
42 | |
43 def __str__(self): | |
44 """ | |
45 Convert this object to a string. We deliberately do not have a | |
46 __repr__ method, to underscore that runes are not strings. | |
47 """ | |
48 # Runes might not always be able to decode to a string. | |
49 return self.codec.decode(self.buffer, 'replace')[0] | |
50 | |
51 def __bytes__(self): | |
52 return bytes(self.buffer) | |
53 | |
54 def __len__(self): | |
55 return len(self.buffer) | |
56 | |
57 def __lt__(self, other): | |
58 return self.buffer < other.buffer | |
59 | |
60 def __le__(self, other): | |
61 return self.buffer <= other.buffer | |
62 | |
63 def __gt__(self, other): | |
64 return self.buffer > other.buffer | |
65 | |
66 def __ge__(self, other): | |
67 return self.buffer >= other.buffer | |
68 | |
69 def __eq__(self, other): | |
70 return self.buffer == other.buffer | |
71 | |
72 def __ne__(self, other): | |
73 return self.buffer != other.buffer | |
74 | |
75 def __hash__(self): | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
76 raise TypeError("unhashable type") |
10 | 77 |
78 def __bool__(self): | |
79 return bool(self.buffer) | |
80 | |
81 def __getitem__(self, key): | |
82 ret = self.buffer[key] | |
83 if isinstance(ret, array.array): | |
84 return Runes(ret) | |
85 else: | |
86 return ret | |
87 | |
88 def __setitem__(self, key, value): | |
89 if isinstance(key, int): | |
90 if isinstance(value, int): | |
91 self.buffer[key] = value | |
92 else: | |
93 raise TypeError("integer required") | |
94 elif isinstance(value, Runes): | |
95 self.buffer[key] = value.buffer | |
96 else: | |
97 raise TypeError("runes required") | |
98 | |
99 def __delitem__(self, key): | |
100 del self.buffer[key] | |
101 | |
102 def clear(self): | |
103 del self[:] | |
104 | |
105 def __iter__(self): | |
106 return iter(self.buffer) | |
107 | |
108 def __reversed__(self): | |
109 return reversed(self.buffer) | |
110 | |
111 def append(self, value): | |
112 if isinstance(value, int): | |
113 self.buffer.append(value) | |
114 elif isinstance(value, Runes): | |
115 self.buffer.extend(value.buffer) | |
116 else: | |
117 raise TypeError("integer or runes required") | |
118 | |
119 def __contains__(self, value): | |
120 return value in self.buffer | |
121 | |
122 def index(self, value): | |
123 return self.buffer.index(value) | |
124 | |
125 def find(self, value): | |
126 try: | |
127 return self.index(value) | |
128 except ValueError: | |
129 return -1 | |
130 | |
131 class Workspace(Runes): | |
132 """ | |
133 A Runes object that acts a bit more string-like, in that __setitem__ | |
134 also accepts a string as an argument and __getitem__ always returns | |
135 a string. We also return empty strings instead of throwing IndexError | |
136 when attempting to read out-of-range values, because that makes life | |
137 easier for us when curling quotes. | |
138 """ | |
139 def __setitem__(self, key, value): | |
140 if isinstance(value, str): | |
141 if isinstance(key, int): | |
142 Runes.__setitem__(self, key, self._ord(value)) | |
143 else: | |
144 Runes.__setitem__(self, key, Runes(value)) | |
145 else: | |
146 Runes.__setitem__(self, key, value) | |
147 | |
148 def __getitem__(self, key): | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
149 view = memoryview(self.buffer) |
10 | 150 try: |
15 | 151 result = view[key] |
16 | 152 if isinstance(result, int): |
15 | 153 return chr(result) |
154 if isinstance(result, memoryview): | |
155 ret = self.codec.decode(result, 'replace')[0] | |
156 result.release() | |
157 return ret | |
158 else: | |
159 raise AssertionError("this shouldn't happen") | |
10 | 160 except IndexError: |
161 return "" | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
162 finally: |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
163 view.release() |
10 | 164 |
165 def append(self, value): | |
166 if isinstance(value, str): | |
167 Runes.append(self, Runes(value)) | |
168 else: | |
169 Runes.append(self, value) | |
170 | |
171 def index(self, value): | |
172 if isinstance(value, str): | |
173 return Runes.index(self, self._ord(value)) | |
174 else: | |
175 return Runes.index(self, value) | |
176 | |
177 def find(self, value): | |
178 try: | |
179 return self.index(value) | |
180 except ValueError: | |
181 return -1 | |
182 | |
183 def _ord(self, string): | |
184 length = len(string) | |
185 if length != 1: | |
186 raise ValueError("expected a character, but string of length {0} found".format(length)) | |
187 raw = Runes(string) | |
188 if len(raw) != 1: | |
189 raise ValueError("character not in BMP") | |
190 return raw[0] |