comparison workspace.py @ 1:173e86601dbc

Add views.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 12:00:03 -0800
parents 984876b6a095
children 8884b0bf779d
comparison
equal deleted inserted replaced
0:984876b6a095 1:173e86601dbc
37 """ 37 """
38 self._length = 0 38 self._length = 0
39 if initial_data is not None: 39 if initial_data is not None:
40 data = initial_data.encode(self.encoding, self.errors) 40 data = initial_data.encode(self.encoding, self.errors)
41 self._fp = io.BytesIO(data) 41 self._fp = io.BytesIO(data)
42 self._dirty = True
43 else: 42 else:
44 self._fp = io.BytesIO() 43 self._fp = io.BytesIO()
45 self._dirty = False
46 44
47 def close(self): 45 def close(self):
48 """ 46 """
49 Causes our buffer to be discarded and this workspace to become 47 Causes our buffer to be discarded and this workspace to become
50 unusable. 48 unusable.
84 """ 82 """
85 self._fp.write(string.encode(self.encoding, self.errors)) 83 self._fp.write(string.encode(self.encoding, self.errors))
86 84
87 def __len__(self): 85 def __len__(self):
88 """ 86 """
89 Length as a string. 87 Length in characters.
90 """ 88 """
91 if self._dirty: 89 return len(self._fp.getbuffer()) // 2
92 back = self.tell() 90
93 self._length = self.seek(0, io.SEEK_END) // 2 91 def _mapped(self, index):
94 self.seek(back) 92 if index < 0 or index >= len(self):
95 self._dirty = False 93 raise IndexError("index {0} out of range".format(index))
96 return self._length 94 i2 = index * 2
95 return slice(i2, i2 + 2)
97 96
98 def __getitem__(self, key): 97 def __getitem__(self, key):
99 """ 98 """
100 Direct access to a single character or range of characters. We do 99 Direct access to a single character or range of characters. We do
101 not support negative indices. Return value is based on what's most 100 not support negative indices. Return value is based on what's most
102 useful for curling quotes. 101 useful for curling quotes.
103 XXX - might return replacement chars from surrogate fragments. 102 XXX - might return replacement chars from surrogate fragments.
104 """ 103 """
105 if isinstance(key, int): 104 if isinstance(key, int):
106 if key < 0 or key >= len(self): 105 try:
106 key = self._mapped(key)
107 except IndexError:
107 return "" 108 return ""
108 k2 = 2 * key
109 key = slice(k2, k2 + 2)
110 elif isinstance(key, slice): 109 elif isinstance(key, slice):
111 if key.step is not None: 110 if key.step is not None:
112 raise ValueError("__getitem__ does not support steps in slices") 111 raise ValueError("__getitem__ does not support steps in slices")
113 length = len(self) 112 length = len(self)
114 start = 0 if key.start is None else key.start 113 start = 0 if key.start is None else key.start
117 stop = max(0, min(length, stop)) 116 stop = max(0, min(length, stop))
118 if stop <= start: 117 if stop <= start:
119 return "" 118 return ""
120 key = slice(start * 2, stop * 2) 119 key = slice(start * 2, stop * 2)
121 else: 120 else:
122 raise TypeError("__setitem__ only supports integers and slices") 121 raise TypeError("__getitem__ only supports integers and slices")
123 return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] 122 return self.codec.decode(self._fp.getbuffer()[key], "replace")[0]
124 123
125 def __setitem__(self, key, value): 124 def __setitem__(self, key, value):
126 """ 125 """
127 Direct access to a single character. We do not support negative 126 Direct access to a single character. We do not support negative
128 indices or replacing more than a single character at a time. 127 indices or replacing more than a single character at a time.
129 XXX - only works on characters in the BMP. 128 XXX - only works on characters in the BMP.
130 """ 129 """
131 if not isinstance(key, int): 130 if not isinstance(key, int):
132 raise TypeError("__setitem__ only supports integers") 131 raise TypeError("__setitem__ only supports integers")
133 if key < 0 or key >= len(self):
134 raise IndexError("index {0} out of range".format(key))
135 if not value: 132 if not value:
136 return 133 return
137 start = key * 2
138 end = start + 2
139 encoded = value[0].encode(self.encoding, self.errors) 134 encoded = value[0].encode(self.encoding, self.errors)
140 if len(encoded) != 2: 135 if len(encoded) != 2:
141 raise ValueError("{0!r} not in BMP".format(value[0])) 136 raise ValueError("{0!r} not in BMP".format(value[0]))
142 self._fp.getbuffer()[start:end] = encoded 137 self._fp.getbuffer()[self._mapped(key)] = encoded
143 138
144 def __del__(self): 139 def __del__(self):
145 """ 140 """
146 Equivalent to .close(). 141 Equivalent to .close().
147 """ 142 """
163 """ 158 """
164 Context manager: close on exit. 159 Context manager: close on exit.
165 """ 160 """
166 self.close() 161 self.close()
167 return False 162 return False
163
164 class Bounds(object):
165 def __init__(self, start, stop):
166 if start > stop or start < 0 or stop < 0:
167 raise ValueError("invalid bounds")
168 self.start = int(start)
169 self.stop = int(stop)
170
171 @classmethod
172 def from_object(cls, obj):
173 if isinstance(obj, slice):
174 return self(slice.start, slice.stop)
175 return self(obj[0], obj[1])
176
177 def __lt__(self, other):
178 return self.start < other.start
179
180 def __le__(self, other):
181 return self.start <= other.start
182
183 def __eq__(self, other):
184 return self.start == other.start
185
186 def __ne__(self, other):
187 return self.start != other.start
188
189 def __gt__(self, other):
190 return self.start > other.start
191
192 def __ge__(self, other):
193 return self.start >= other.start
194
195 def __contains__(self, scalar):
196 return self.start <= scalar < self.stop
197
198 def __repr__(self):
199 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop)
200
201 class Mapping(object):
202 def __init__(self, bounds, offset):
203 if not isinstance(bounds, Bounds):
204 raise TypeError("bounds must be a Bounds object")
205 if not isinstance(offset, int):
206 raise TypeError("offset must be an int")
207 self.bounds = bounds
208 self.offset = offset
209
210 def __repr__(self):
211 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.bounds, self.offset)
212
213 class SegmentedView(object):
214 """
215 Implements a view on a subscriptable object. The view is composed of
216 zero or more segments of the source object. Has the same idiosyncratic
217 behavior for out-of-bounds indices that Workspace has (and for the
218 same reason).
219 """
220 def __init__(self, indexable, bounds):
221 self.indexable = indexable
222 self._mmap = [ Mapping(Bounds(0, 0), 0) ]
223 pos = 0
224 for r in sorted(bounds):
225 if pos is not None and r.start <= pos and r.stop > pos:
226 # merge ranges
227 self._mmap[-1].bounds.stop = r.stop
228 pos = r.stop
229 continue
230 opos = pos
231 pos += r.stop - r.start
232 self._mmap.append(Mapping(Bounds(opos, pos), r.start))
233 self._length = pos
234
235 def _mapped(self, index):
236 mi = self._binsch(index)
237 m = None if mi is None else self._mmap[mi]
238 if m is None:
239 raise IndexError("index {0} out of range".format(index))
240 return index - m.bounds.start + m.offset
241
242 def _binsch(self, index):
243 a = 0
244 z = len(self._mmap) - 1
245 while a <= z:
246 m = (a + z) // 2
247 if index in self._mmap[m].bounds:
248 return m
249 if index < self._mmap[m].bounds.start:
250 z = m - 1
251 else:
252 assert index >= self._mmap[m].bounds.stop
253 a = m + 1
254 return None
255
256 def __setitem__(self, key, value):
257 if not isinstance(key, int):
258 raise TypeError("__setitem__ only supports integers")
259 self.indexable[self._mapped(key)] = value
260
261 # XXX - this is sorta brute-forced and could be more efficient
262 def __getitem__(self, key):
263 if isinstance(key, int):
264 return self._get1(key)
265 if not isinstance(key, slice):
266 raise TypeError("expecting int or slice")
267 with io.StringIO() as buf:
268 for i in range(key.start, key.stop, key.step or 1):
269 buf.write(self._get1(i))
270 return buf.getvalue()
271
272 def __len__(self):
273 return self._length
274
275 def _get1(self, index):
276 try:
277 return self.indexable[self._mapped(index)]
278 except IndexError:
279 return ""
280
281 def getvalue(self):
282 return self[0:len(self)]