annotate runes.py @ 10:397c178c5b98

Make it array-based.
author David Barts <n5jrn@me.com>
date Fri, 27 Dec 2019 11:26:00 -0800
parents
children ab7d6e908034
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
1 #!/usr/bin/env python3
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
3
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
4 # I m p o r t s
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
5
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
6 import array
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
7 import codecs
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
8 import collections
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
9 import struct
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
10 import sys
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
11
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
12 # C l a s s e s
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
13
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
14 class Runes(object):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
15 """
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
16 A mutable, fixed-length sequence of UTF-16 runes. The attributes
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
17 encoding and codec contain the name of the encoding and the codec
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
18 used to generate the UTF-16. The attribute buffer contains the
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
19 buffer (an array of 16-bit unsigned integers) used to back this
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
20 object; modifications to that array will be reflected in this
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
21 object.
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
22 """
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
23 # The most efficient 16-bit one on this platform
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
24 encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
25 codec = codecs.lookup(encoding)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
26
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
27 def __init__(self, based_on=None):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
28 if isinstance(based_on, array.array):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
29 if based_on.typecode == 'H':
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
30 self.buffer = based_on
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
31 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
32 self.buffer = array.array('H', based_on)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
33 elif isinstance(based_on, str):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
34 # A string should always be able to encode to runes.
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
35 self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0])
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
36 elif based_on is None:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
37 self.buffer = array.array('H', bytes())
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
38 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
39 self.buffer = array.array('H', based_on)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
40
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
41 def __str__(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
42 """
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
43 Convert this object to a string. We deliberately do not have a
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
44 __repr__ method, to underscore that runes are not strings.
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
45 """
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
46 # Runes might not always be able to decode to a string.
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
47 return self.codec.decode(self.buffer, 'replace')[0]
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
48
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
49 def __bytes__(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
50 return bytes(self.buffer)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
51
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
52 def __len__(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
53 return len(self.buffer)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
54
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
55 def __lt__(self, other):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
56 return self.buffer < other.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
57
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
58 def __le__(self, other):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
59 return self.buffer <= other.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
60
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
61 def __gt__(self, other):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
62 return self.buffer > other.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
63
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
64 def __ge__(self, other):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
65 return self.buffer >= other.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
66
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
67 def __eq__(self, other):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
68 return self.buffer == other.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
69
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
70 def __ne__(self, other):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
71 return self.buffer != other.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
72
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
73 def __hash__(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
74 return hash(self.buffer)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
75
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
76 def __bool__(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
77 return bool(self.buffer)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
78
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
79 def __getitem__(self, key):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
80 ret = self.buffer[key]
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
81 if isinstance(ret, array.array):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
82 return Runes(ret)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
83 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
84 return ret
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
85
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
86 def __setitem__(self, key, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
87 if isinstance(key, int):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
88 if isinstance(value, int):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
89 self.buffer[key] = value
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
90 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
91 raise TypeError("integer required")
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
92 elif isinstance(value, Runes):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
93 self.buffer[key] = value.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
94 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
95 raise TypeError("runes required")
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
96
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
97 def __delitem__(self, key):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
98 del self.buffer[key]
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
99
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
100 def clear(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
101 del self[:]
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
102
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
103 def __iter__(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
104 return iter(self.buffer)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
105
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
106 def __reversed__(self):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
107 return reversed(self.buffer)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
108
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
109 def append(self, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
110 if isinstance(value, int):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
111 self.buffer.append(value)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
112 elif isinstance(value, Runes):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
113 self.buffer.extend(value.buffer)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
114 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
115 raise TypeError("integer or runes required")
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
116
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
117 def __contains__(self, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
118 return value in self.buffer
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
119
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
120 def index(self, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
121 return self.buffer.index(value)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
122
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
123 def find(self, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
124 try:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
125 return self.index(value)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
126 except ValueError:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
127 return -1
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
128
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
129 class Workspace(Runes):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
130 """
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
131 A Runes object that acts a bit more string-like, in that __setitem__
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
132 also accepts a string as an argument and __getitem__ always returns
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
133 a string. We also return empty strings instead of throwing IndexError
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
134 when attempting to read out-of-range values, because that makes life
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
135 easier for us when curling quotes.
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
136 """
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
137 def __setitem__(self, key, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
138 if isinstance(value, str):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
139 if isinstance(key, int):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
140 Runes.__setitem__(self, key, self._ord(value))
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
141 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
142 Runes.__setitem__(self, key, Runes(value))
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
143 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
144 Runes.__setitem__(self, key, value)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
145
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
146 def __getitem__(self, key):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
147 try:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
148 ret = Runes.__getitem__(self, key)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
149 if isinstance (ret, int):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
150 return chr(ret)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
151 elif isinstance(ret, Runes):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
152 return str(ret)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
153 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
154 raise AssertionError("this shouldn't happen")
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
155 except IndexError:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
156 return ""
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
157
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
158 def append(self, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
159 if isinstance(value, str):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
160 Runes.append(self, Runes(value))
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
161 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
162 Runes.append(self, value)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
163
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
164 def index(self, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
165 if isinstance(value, str):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
166 return Runes.index(self, self._ord(value))
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
167 else:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
168 return Runes.index(self, value)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
169
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
170 def find(self, value):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
171 try:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
172 return self.index(value)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
173 except ValueError:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
174 return -1
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
175
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
176 def _ord(self, string):
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
177 length = len(string)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
178 if length != 1:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
179 raise ValueError("expected a character, but string of length {0} found".format(length))
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
180 raw = Runes(string)
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
181 if len(raw) != 1:
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
182 raise ValueError("character not in BMP")
397c178c5b98 Make it array-based.
David Barts <n5jrn@me.com>
parents:
diff changeset
183 return raw[0]