Mercurial > cgi-bin > hgweb.cgi > curlyq
annotate runes.py @ 15:0be0586104b7
Plug the leak properly (I hope).
author | David Barts <n5jrn@me.com> |
---|---|
date | Fri, 27 Dec 2019 13:35:51 -0800 |
parents | 152f6aa87d62 |
children | 61772bf1f77c |
rev | line source |
---|---|
10 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # I m p o r t s | |
5 | |
6 import array | |
7 import codecs | |
8 import collections | |
9 import struct | |
10 import sys | |
11 | |
12 # C l a s s e s | |
13 | |
14 class Runes(object): | |
15 """ | |
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes | |
17 encoding and codec contain the name of the encoding and the codec | |
18 used to generate the UTF-16. The attribute buffer contains the | |
19 buffer (an array of 16-bit unsigned integers) used to back this | |
20 object; modifications to that array will be reflected in this | |
21 object. | |
22 """ | |
23 # The most efficient 16-bit one on this platform | |
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" | |
25 codec = codecs.lookup(encoding) | |
26 | |
27 def __init__(self, based_on=None): | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
28 if isinstance(based_on, (array.array, memoryview)): |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
29 format = based_on.typecode if isinstance(based_on, array.array) else based_on.format |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
30 if format == 'H': |
10 | 31 self.buffer = based_on |
32 else: | |
33 self.buffer = array.array('H', based_on) | |
34 elif isinstance(based_on, str): | |
35 # A string should always be able to encode to runes. | |
36 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0]) | |
37 elif based_on is None: | |
38 self.buffer = array.array('H', bytes()) | |
12 | 39 elif isinstance(based_on, Runes): |
40 self.buffer = array.array('H', based_on.buffer) | |
10 | 41 else: |
42 self.buffer = array.array('H', based_on) | |
43 | |
44 def __str__(self): | |
45 """ | |
46 Convert this object to a string. We deliberately do not have a | |
47 __repr__ method, to underscore that runes are not strings. | |
48 """ | |
49 # Runes might not always be able to decode to a string. | |
50 return self.codec.decode(self.buffer, 'replace')[0] | |
51 | |
52 def __bytes__(self): | |
53 return bytes(self.buffer) | |
54 | |
55 def __len__(self): | |
56 return len(self.buffer) | |
57 | |
58 def __lt__(self, other): | |
59 return self.buffer < other.buffer | |
60 | |
61 def __le__(self, other): | |
62 return self.buffer <= other.buffer | |
63 | |
64 def __gt__(self, other): | |
65 return self.buffer > other.buffer | |
66 | |
67 def __ge__(self, other): | |
68 return self.buffer >= other.buffer | |
69 | |
70 def __eq__(self, other): | |
71 return self.buffer == other.buffer | |
72 | |
73 def __ne__(self, other): | |
74 return self.buffer != other.buffer | |
75 | |
76 def __hash__(self): | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
77 raise TypeError("unhashable type") |
10 | 78 |
79 def __bool__(self): | |
80 return bool(self.buffer) | |
81 | |
82 def __getitem__(self, key): | |
83 ret = self.buffer[key] | |
84 if isinstance(ret, array.array): | |
85 return Runes(ret) | |
86 else: | |
87 return ret | |
88 | |
89 def __setitem__(self, key, value): | |
90 if isinstance(key, int): | |
91 if isinstance(value, int): | |
92 self.buffer[key] = value | |
93 else: | |
94 raise TypeError("integer required") | |
95 elif isinstance(value, Runes): | |
96 self.buffer[key] = value.buffer | |
97 else: | |
98 raise TypeError("runes required") | |
99 | |
100 def __delitem__(self, key): | |
101 del self.buffer[key] | |
102 | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
103 def __del__(self): |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
104 # Paranoid |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
105 if isinstance(self.buffer, memoryview): |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
106 self.buffer.release() |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
107 |
10 | 108 def clear(self): |
109 del self[:] | |
110 | |
111 def __iter__(self): | |
112 return iter(self.buffer) | |
113 | |
114 def __reversed__(self): | |
115 return reversed(self.buffer) | |
116 | |
117 def append(self, value): | |
118 if isinstance(value, int): | |
119 self.buffer.append(value) | |
120 elif isinstance(value, Runes): | |
121 self.buffer.extend(value.buffer) | |
122 else: | |
123 raise TypeError("integer or runes required") | |
124 | |
125 def __contains__(self, value): | |
126 return value in self.buffer | |
127 | |
128 def index(self, value): | |
129 return self.buffer.index(value) | |
130 | |
131 def find(self, value): | |
132 try: | |
133 return self.index(value) | |
134 except ValueError: | |
135 return -1 | |
136 | |
137 class Workspace(Runes): | |
138 """ | |
139 A Runes object that acts a bit more string-like, in that __setitem__ | |
140 also accepts a string as an argument and __getitem__ always returns | |
141 a string. We also return empty strings instead of throwing IndexError | |
142 when attempting to read out-of-range values, because that makes life | |
143 easier for us when curling quotes. | |
144 """ | |
145 def __setitem__(self, key, value): | |
146 if isinstance(value, str): | |
147 if isinstance(key, int): | |
148 Runes.__setitem__(self, key, self._ord(value)) | |
149 else: | |
150 Runes.__setitem__(self, key, Runes(value)) | |
151 else: | |
152 Runes.__setitem__(self, key, value) | |
153 | |
154 def __getitem__(self, key): | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
155 view = memoryview(self.buffer) |
10 | 156 try: |
15 | 157 result = view[key] |
158 if isinstance (result, int): | |
159 return chr(result) | |
160 if isinstance(result, memoryview): | |
161 ret = self.codec.decode(result, 'replace')[0] | |
162 result.release() | |
163 return ret | |
164 else: | |
165 raise AssertionError("this shouldn't happen") | |
10 | 166 except IndexError: |
167 return "" | |
13
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
168 finally: |
6acda841a690
Add support for shared substrings.
David Barts <n5jrn@me.com>
parents:
12
diff
changeset
|
169 view.release() |
10 | 170 |
171 def append(self, value): | |
172 if isinstance(value, str): | |
173 Runes.append(self, Runes(value)) | |
174 else: | |
175 Runes.append(self, value) | |
176 | |
177 def index(self, value): | |
178 if isinstance(value, str): | |
179 return Runes.index(self, self._ord(value)) | |
180 else: | |
181 return Runes.index(self, value) | |
182 | |
183 def find(self, value): | |
184 try: | |
185 return self.index(value) | |
186 except ValueError: | |
187 return -1 | |
188 | |
189 def _ord(self, string): | |
190 length = len(string) | |
191 if length != 1: | |
192 raise ValueError("expected a character, but string of length {0} found".format(length)) | |
193 raw = Runes(string) | |
194 if len(raw) != 1: | |
195 raise ValueError("character not in BMP") | |
196 return raw[0] |