comparison runes.py @ 10:397c178c5b98

Make it array-based.
author David Barts <n5jrn@me.com>
date Fri, 27 Dec 2019 11:26:00 -0800
parents
children ab7d6e908034
comparison
equal deleted inserted replaced
9:84adbbb69a9d 10:397c178c5b98
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 # I m p o r t s
5
6 import array
7 import codecs
8 import collections
9 import struct
10 import sys
11
12 # C l a s s e s
13
14 class Runes(object):
15 """
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes
17 encoding and codec contain the name of the encoding and the codec
18 used to generate the UTF-16. The attribute buffer contains the
19 buffer (an array of 16-bit unsigned integers) used to back this
20 object; modifications to that array will be reflected in this
21 object.
22 """
23 # The most efficient 16-bit one on this platform
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
25 codec = codecs.lookup(encoding)
26
27 def __init__(self, based_on=None):
28 if isinstance(based_on, array.array):
29 if based_on.typecode == 'H':
30 self.buffer = based_on
31 else:
32 self.buffer = array.array('H', based_on)
33 elif isinstance(based_on, str):
34 # A string should always be able to encode to runes.
35 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0])
36 elif based_on is None:
37 self.buffer = array.array('H', bytes())
38 else:
39 self.buffer = array.array('H', based_on)
40
41 def __str__(self):
42 """
43 Convert this object to a string. We deliberately do not have a
44 __repr__ method, to underscore that runes are not strings.
45 """
46 # Runes might not always be able to decode to a string.
47 return self.codec.decode(self.buffer, 'replace')[0]
48
49 def __bytes__(self):
50 return bytes(self.buffer)
51
52 def __len__(self):
53 return len(self.buffer)
54
55 def __lt__(self, other):
56 return self.buffer < other.buffer
57
58 def __le__(self, other):
59 return self.buffer <= other.buffer
60
61 def __gt__(self, other):
62 return self.buffer > other.buffer
63
64 def __ge__(self, other):
65 return self.buffer >= other.buffer
66
67 def __eq__(self, other):
68 return self.buffer == other.buffer
69
70 def __ne__(self, other):
71 return self.buffer != other.buffer
72
73 def __hash__(self):
74 return hash(self.buffer)
75
76 def __bool__(self):
77 return bool(self.buffer)
78
79 def __getitem__(self, key):
80 ret = self.buffer[key]
81 if isinstance(ret, array.array):
82 return Runes(ret)
83 else:
84 return ret
85
86 def __setitem__(self, key, value):
87 if isinstance(key, int):
88 if isinstance(value, int):
89 self.buffer[key] = value
90 else:
91 raise TypeError("integer required")
92 elif isinstance(value, Runes):
93 self.buffer[key] = value.buffer
94 else:
95 raise TypeError("runes required")
96
97 def __delitem__(self, key):
98 del self.buffer[key]
99
100 def clear(self):
101 del self[:]
102
103 def __iter__(self):
104 return iter(self.buffer)
105
106 def __reversed__(self):
107 return reversed(self.buffer)
108
109 def append(self, value):
110 if isinstance(value, int):
111 self.buffer.append(value)
112 elif isinstance(value, Runes):
113 self.buffer.extend(value.buffer)
114 else:
115 raise TypeError("integer or runes required")
116
117 def __contains__(self, value):
118 return value in self.buffer
119
120 def index(self, value):
121 return self.buffer.index(value)
122
123 def find(self, value):
124 try:
125 return self.index(value)
126 except ValueError:
127 return -1
128
129 class Workspace(Runes):
130 """
131 A Runes object that acts a bit more string-like, in that __setitem__
132 also accepts a string as an argument and __getitem__ always returns
133 a string. We also return empty strings instead of throwing IndexError
134 when attempting to read out-of-range values, because that makes life
135 easier for us when curling quotes.
136 """
137 def __setitem__(self, key, value):
138 if isinstance(value, str):
139 if isinstance(key, int):
140 Runes.__setitem__(self, key, self._ord(value))
141 else:
142 Runes.__setitem__(self, key, Runes(value))
143 else:
144 Runes.__setitem__(self, key, value)
145
146 def __getitem__(self, key):
147 try:
148 ret = Runes.__getitem__(self, key)
149 if isinstance (ret, int):
150 return chr(ret)
151 elif isinstance(ret, Runes):
152 return str(ret)
153 else:
154 raise AssertionError("this shouldn't happen")
155 except IndexError:
156 return ""
157
158 def append(self, value):
159 if isinstance(value, str):
160 Runes.append(self, Runes(value))
161 else:
162 Runes.append(self, value)
163
164 def index(self, value):
165 if isinstance(value, str):
166 return Runes.index(self, self._ord(value))
167 else:
168 return Runes.index(self, value)
169
170 def find(self, value):
171 try:
172 return self.index(value)
173 except ValueError:
174 return -1
175
176 def _ord(self, string):
177 length = len(string)
178 if length != 1:
179 raise ValueError("expected a character, but string of length {0} found".format(length))
180 raw = Runes(string)
181 if len(raw) != 1:
182 raise ValueError("character not in BMP")
183 return raw[0]