Mercurial > cgi-bin > hgweb.cgi > curlyq
comparison runes.py @ 10:397c178c5b98
Make it array-based.
author | David Barts <n5jrn@me.com> |
---|---|
date | Fri, 27 Dec 2019 11:26:00 -0800 |
parents | |
children | ab7d6e908034 |
comparison
equal
deleted
inserted
replaced
9:84adbbb69a9d | 10:397c178c5b98 |
---|---|
1 #!/usr/bin/env python3 | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # I m p o r t s | |
5 | |
6 import array | |
7 import codecs | |
8 import collections | |
9 import struct | |
10 import sys | |
11 | |
12 # C l a s s e s | |
13 | |
14 class Runes(object): | |
15 """ | |
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes | |
17 encoding and codec contain the name of the encoding and the codec | |
18 used to generate the UTF-16. The attribute buffer contains the | |
19 buffer (an array of 16-bit unsigned integers) used to back this | |
20 object; modifications to that array will be reflected in this | |
21 object. | |
22 """ | |
23 # The most efficient 16-bit one on this platform | |
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" | |
25 codec = codecs.lookup(encoding) | |
26 | |
27 def __init__(self, based_on=None): | |
28 if isinstance(based_on, array.array): | |
29 if based_on.typecode == 'H': | |
30 self.buffer = based_on | |
31 else: | |
32 self.buffer = array.array('H', based_on) | |
33 elif isinstance(based_on, str): | |
34 # A string should always be able to encode to runes. | |
35 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0]) | |
36 elif based_on is None: | |
37 self.buffer = array.array('H', bytes()) | |
38 else: | |
39 self.buffer = array.array('H', based_on) | |
40 | |
41 def __str__(self): | |
42 """ | |
43 Convert this object to a string. We deliberately do not have a | |
44 __repr__ method, to underscore that runes are not strings. | |
45 """ | |
46 # Runes might not always be able to decode to a string. | |
47 return self.codec.decode(self.buffer, 'replace')[0] | |
48 | |
49 def __bytes__(self): | |
50 return bytes(self.buffer) | |
51 | |
52 def __len__(self): | |
53 return len(self.buffer) | |
54 | |
55 def __lt__(self, other): | |
56 return self.buffer < other.buffer | |
57 | |
58 def __le__(self, other): | |
59 return self.buffer <= other.buffer | |
60 | |
61 def __gt__(self, other): | |
62 return self.buffer > other.buffer | |
63 | |
64 def __ge__(self, other): | |
65 return self.buffer >= other.buffer | |
66 | |
67 def __eq__(self, other): | |
68 return self.buffer == other.buffer | |
69 | |
70 def __ne__(self, other): | |
71 return self.buffer != other.buffer | |
72 | |
73 def __hash__(self): | |
74 return hash(self.buffer) | |
75 | |
76 def __bool__(self): | |
77 return bool(self.buffer) | |
78 | |
79 def __getitem__(self, key): | |
80 ret = self.buffer[key] | |
81 if isinstance(ret, array.array): | |
82 return Runes(ret) | |
83 else: | |
84 return ret | |
85 | |
86 def __setitem__(self, key, value): | |
87 if isinstance(key, int): | |
88 if isinstance(value, int): | |
89 self.buffer[key] = value | |
90 else: | |
91 raise TypeError("integer required") | |
92 elif isinstance(value, Runes): | |
93 self.buffer[key] = value.buffer | |
94 else: | |
95 raise TypeError("runes required") | |
96 | |
97 def __delitem__(self, key): | |
98 del self.buffer[key] | |
99 | |
100 def clear(self): | |
101 del self[:] | |
102 | |
103 def __iter__(self): | |
104 return iter(self.buffer) | |
105 | |
106 def __reversed__(self): | |
107 return reversed(self.buffer) | |
108 | |
109 def append(self, value): | |
110 if isinstance(value, int): | |
111 self.buffer.append(value) | |
112 elif isinstance(value, Runes): | |
113 self.buffer.extend(value.buffer) | |
114 else: | |
115 raise TypeError("integer or runes required") | |
116 | |
117 def __contains__(self, value): | |
118 return value in self.buffer | |
119 | |
120 def index(self, value): | |
121 return self.buffer.index(value) | |
122 | |
123 def find(self, value): | |
124 try: | |
125 return self.index(value) | |
126 except ValueError: | |
127 return -1 | |
128 | |
129 class Workspace(Runes): | |
130 """ | |
131 A Runes object that acts a bit more string-like, in that __setitem__ | |
132 also accepts a string as an argument and __getitem__ always returns | |
133 a string. We also return empty strings instead of throwing IndexError | |
134 when attempting to read out-of-range values, because that makes life | |
135 easier for us when curling quotes. | |
136 """ | |
137 def __setitem__(self, key, value): | |
138 if isinstance(value, str): | |
139 if isinstance(key, int): | |
140 Runes.__setitem__(self, key, self._ord(value)) | |
141 else: | |
142 Runes.__setitem__(self, key, Runes(value)) | |
143 else: | |
144 Runes.__setitem__(self, key, value) | |
145 | |
146 def __getitem__(self, key): | |
147 try: | |
148 ret = Runes.__getitem__(self, key) | |
149 if isinstance (ret, int): | |
150 return chr(ret) | |
151 elif isinstance(ret, Runes): | |
152 return str(ret) | |
153 else: | |
154 raise AssertionError("this shouldn't happen") | |
155 except IndexError: | |
156 return "" | |
157 | |
158 def append(self, value): | |
159 if isinstance(value, str): | |
160 Runes.append(self, Runes(value)) | |
161 else: | |
162 Runes.append(self, value) | |
163 | |
164 def index(self, value): | |
165 if isinstance(value, str): | |
166 return Runes.index(self, self._ord(value)) | |
167 else: | |
168 return Runes.index(self, value) | |
169 | |
170 def find(self, value): | |
171 try: | |
172 return self.index(value) | |
173 except ValueError: | |
174 return -1 | |
175 | |
176 def _ord(self, string): | |
177 length = len(string) | |
178 if length != 1: | |
179 raise ValueError("expected a character, but string of length {0} found".format(length)) | |
180 raw = Runes(string) | |
181 if len(raw) != 1: | |
182 raise ValueError("character not in BMP") | |
183 return raw[0] |