Mercurial > cgi-bin > hgweb.cgi > curlyq
comparison workspace.py @ 3:091c03f1b2e8
Getting it working...
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 19:54:45 -0800 |
parents | 8884b0bf779d |
children | 7a83e82e65a6 |
comparison
equal
deleted
inserted
replaced
2:8884b0bf779d | 3:091c03f1b2e8 |
---|---|
1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
3 | 3 |
4 # A class that implements a workspace for curly-quoting a text. This is enough | 4 # Classes that implement a workspace for curly-quoting a text, and views |
5 # like a string that it can be accessed via subscripts and ranges, and enough | 5 # into the same. |
6 # like a TextIOBase object that it can be written to much like a stream. | 6 |
7 # (However, a Workspace is neither a string nor a TextIOBase object.) | 7 # I m p o r t s |
8 | |
9 import os, sys | |
10 import io | |
11 import codecs | |
12 | |
13 # V a r i a b l e s | |
14 | |
15 # C l a s s e s | |
16 | |
17 # Our workspace class. This is enough like a string that it can be | |
18 # accessed via subscripts and ranges, and enough like a TextIOBase object | |
19 # that it can be written to much like a stream. (However, a Workspace is | |
20 # neither a string nor a TextIOBase object.) | |
8 # | 21 # |
9 # The advantage of using UTF-16 (as we do here) is that all quotation marks | 22 # The advantage of using UTF-16 (as we do here) is that all quotation |
10 # of interest are represented in a single 16-bit value, so changing straight | 23 # marks of interest are represented in a single 16-bit value, so changing |
11 # quotes to curly ones can be accomplished most easily. | 24 # straight quotes to curly ones can be accomplished most easily. |
12 # | 25 # |
13 # It was a deliberate design decision to return empty strings when reading | 26 # It was a deliberate design decision to return empty strings when reading |
14 # out-of-range indices but to throw exceptions when attempting to write | 27 # out-of-range indices but to throw exceptions when attempting to write |
15 # them, because both decisions made coding easier in other modules. | 28 # them, because both decisions made coding easier in other modules. |
16 | |
17 # I m p o r t s | |
18 | |
19 import os, sys | |
20 import io | |
21 import codecs | |
22 | |
23 # V a r i a b l e s | |
24 | |
25 # C l a s s e s | |
26 | |
27 class Workspace(object): | 29 class Workspace(object): |
30 """ | |
31 A workspace for text-processing; a mutable hybrid of a string and an | |
32 in-memory file. | |
33 """ | |
28 # The most efficient 16-bit one on this platform | 34 # The most efficient 16-bit one on this platform |
29 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" | 35 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" |
30 codec = codecs.lookup(encoding) | 36 codec = codecs.lookup(encoding) |
31 # Errors should never happen; UTF-16 can represent all Unicode characters | 37 # Errors should never happen; UTF-16 can represent all Unicode characters |
32 errors = 'strict' | 38 errors = 'strict' |
33 | 39 |
34 def __init__(self, initial_data=None): | 40 def __init__(self, initial_data=None): |
35 """ | 41 """ |
36 Constructor. | 42 Constructor. |
37 """ | 43 """ |
38 self._length = 0 | |
39 if initial_data is not None: | 44 if initial_data is not None: |
40 data = initial_data.encode(self.encoding, self.errors) | 45 data = initial_data.encode(self.encoding, self.errors) |
41 self._fp = io.BytesIO(data) | 46 self._fp = io.BytesIO(data) |
42 else: | 47 else: |
43 self._fp = io.BytesIO() | 48 self._fp = io.BytesIO() |
79 def write(self, string): | 84 def write(self, string): |
80 """ | 85 """ |
81 Write characters. | 86 Write characters. |
82 """ | 87 """ |
83 self._fp.write(string.encode(self.encoding, self.errors)) | 88 self._fp.write(string.encode(self.encoding, self.errors)) |
89 | |
90 def truncate(self, size=None): | |
91 """ | |
92 Truncate. | |
93 XXX - can create a runt surrogate pair | |
94 """ | |
95 if size is None: | |
96 self._fp.truncate(None) | |
97 else: | |
98 self._fp.truncate(2 * size) | |
99 | |
100 def clear(self): | |
101 """ | |
102 Clear this object's contents. | |
103 """ | |
104 self.truncate(0) | |
105 self.seek(0, os.SEEK_SET) | |
84 | 106 |
85 def __len__(self): | 107 def __len__(self): |
86 """ | 108 """ |
87 Length in characters. | 109 Length in characters. |
88 """ | 110 """ |
160 """ | 182 """ |
161 self.close() | 183 self.close() |
162 return False | 184 return False |
163 | 185 |
164 class Bounds(object): | 186 class Bounds(object): |
187 """ | |
188 A set of index bounds. | |
189 """ | |
165 def __init__(self, start, stop): | 190 def __init__(self, start, stop): |
166 if start > stop or start < 0 or stop < 0: | 191 if start > stop or start < 0 or stop < 0: |
167 raise ValueError("invalid bounds") | 192 raise ValueError("invalid bounds") |
168 self.start = int(start) | 193 self.start = int(start) |
169 self.stop = int(stop) | 194 self.stop = int(stop) |
197 | 222 |
198 def __repr__(self): | 223 def __repr__(self): |
199 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop) | 224 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop) |
200 | 225 |
201 class Mapping(object): | 226 class Mapping(object): |
227 """ | |
228 Represents a mapping of a single view segment into an indexable | |
229 object. | |
230 """ | |
202 def __init__(self, bounds, offset): | 231 def __init__(self, bounds, offset): |
203 if not isinstance(bounds, Bounds): | 232 if not isinstance(bounds, Bounds): |
204 raise TypeError("bounds must be a Bounds object") | 233 raise TypeError("bounds must be a Bounds object") |
205 if not isinstance(offset, int): | 234 if not isinstance(offset, int): |
206 raise TypeError("offset must be an int") | 235 raise TypeError("offset must be an int") |
214 class SegmentedView(object): | 243 class SegmentedView(object): |
215 """ | 244 """ |
216 Implements a view on a subscriptable object. The view is composed of | 245 Implements a view on a subscriptable object. The view is composed of |
217 zero or more segments of the source object. Has the same idiosyncratic | 246 zero or more segments of the source object. Has the same idiosyncratic |
218 behavior for out-of-bounds indices that Workspace has (and for the | 247 behavior for out-of-bounds indices that Workspace has (and for the |
219 same reason). | 248 same reason). Mutating this object causes the parent object to also |
249 be mutated. | |
220 """ | 250 """ |
221 def __init__(self, indexable, bounds): | 251 def __init__(self, indexable, bounds): |
222 self.indexable = indexable | 252 self.indexable = indexable |
223 self._mmap = [ Mapping(Bounds(0, 0), 0) ] | 253 self._mmap = [ Mapping(Bounds(0, 0), 0) ] |
224 pos = 0 | 254 pos = 0 |
252 assert index >= self._mmap[m].bounds.stop | 282 assert index >= self._mmap[m].bounds.stop |
253 a = m + 1 | 283 a = m + 1 |
254 return None | 284 return None |
255 | 285 |
256 def __setitem__(self, key, value): | 286 def __setitem__(self, key, value): |
287 """ | |
288 Direct access to replace a single character. | |
289 """ | |
257 if not isinstance(key, int): | 290 if not isinstance(key, int): |
258 raise TypeError("__setitem__ only supports integers") | 291 raise TypeError("__setitem__ only supports integers") |
259 self.indexable[self._mapped(key)] = value | 292 self.indexable[self._mapped(key)] = value |
260 | 293 |
261 # XXX - this is sorta brute-forced and could be more efficient | |
262 def __getitem__(self, key): | 294 def __getitem__(self, key): |
295 """ | |
296 Direct access to a single character or range of characters. | |
297 """ | |
263 # Trivial cases | 298 # Trivial cases |
264 if isinstance(key, int): | 299 if isinstance(key, int): |
265 return self._get1(key) | 300 return self._get1(key) |
266 if not isinstance(key, slice): | 301 if not isinstance(key, slice): |
267 raise TypeError("expecting int or slice") | 302 raise TypeError("expecting int or slice") |