comparison workspace.py @ 3:091c03f1b2e8

Getting it working...
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 19:54:45 -0800
parents 8884b0bf779d
children 7a83e82e65a6
comparison
equal deleted inserted replaced
2:8884b0bf779d 3:091c03f1b2e8
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 # A class that implements a workspace for curly-quoting a text. This is enough 4 # Classes that implement a workspace for curly-quoting a text, and views
5 # like a string that it can be accessed via subscripts and ranges, and enough 5 # into the same.
6 # like a TextIOBase object that it can be written to much like a stream. 6
7 # (However, a Workspace is neither a string nor a TextIOBase object.) 7 # I m p o r t s
8
9 import os, sys
10 import io
11 import codecs
12
13 # V a r i a b l e s
14
15 # C l a s s e s
16
17 # Our workspace class. This is enough like a string that it can be
18 # accessed via subscripts and ranges, and enough like a TextIOBase object
19 # that it can be written to much like a stream. (However, a Workspace is
20 # neither a string nor a TextIOBase object.)
8 # 21 #
9 # The advantage of using UTF-16 (as we do here) is that all quotation marks 22 # The advantage of using UTF-16 (as we do here) is that all quotation
10 # of interest are represented in a single 16-bit value, so changing straight 23 # marks of interest are represented in a single 16-bit value, so changing
11 # quotes to curly ones can be accomplished most easily. 24 # straight quotes to curly ones can be accomplished most easily.
12 # 25 #
13 # It was a deliberate design decision to return empty strings when reading 26 # It was a deliberate design decision to return empty strings when reading
14 # out-of-range indices but to throw exceptions when attempting to write 27 # out-of-range indices but to throw exceptions when attempting to write
15 # them, because both decisions made coding easier in other modules. 28 # them, because both decisions made coding easier in other modules.
16
17 # I m p o r t s
18
19 import os, sys
20 import io
21 import codecs
22
23 # V a r i a b l e s
24
25 # C l a s s e s
26
27 class Workspace(object): 29 class Workspace(object):
30 """
31 A workspace for text-processing; a mutable hybrid of a string and an
32 in-memory file.
33 """
28 # The most efficient 16-bit one on this platform 34 # The most efficient 16-bit one on this platform
29 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" 35 encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
30 codec = codecs.lookup(encoding) 36 codec = codecs.lookup(encoding)
31 # Errors should never happen; UTF-16 can represent all Unicode characters 37 # Errors should never happen; UTF-16 can represent all Unicode characters
32 errors = 'strict' 38 errors = 'strict'
33 39
34 def __init__(self, initial_data=None): 40 def __init__(self, initial_data=None):
35 """ 41 """
36 Constructor. 42 Constructor.
37 """ 43 """
38 self._length = 0
39 if initial_data is not None: 44 if initial_data is not None:
40 data = initial_data.encode(self.encoding, self.errors) 45 data = initial_data.encode(self.encoding, self.errors)
41 self._fp = io.BytesIO(data) 46 self._fp = io.BytesIO(data)
42 else: 47 else:
43 self._fp = io.BytesIO() 48 self._fp = io.BytesIO()
79 def write(self, string): 84 def write(self, string):
80 """ 85 """
81 Write characters. 86 Write characters.
82 """ 87 """
83 self._fp.write(string.encode(self.encoding, self.errors)) 88 self._fp.write(string.encode(self.encoding, self.errors))
89
90 def truncate(self, size=None):
91 """
92 Truncate.
93 XXX - can create a runt surrogate pair
94 """
95 if size is None:
96 self._fp.truncate(None)
97 else:
98 self._fp.truncate(2 * size)
99
100 def clear(self):
101 """
102 Clear this object's contents.
103 """
104 self.truncate(0)
105 self.seek(0, os.SEEK_SET)
84 106
85 def __len__(self): 107 def __len__(self):
86 """ 108 """
87 Length in characters. 109 Length in characters.
88 """ 110 """
160 """ 182 """
161 self.close() 183 self.close()
162 return False 184 return False
163 185
164 class Bounds(object): 186 class Bounds(object):
187 """
188 A set of index bounds.
189 """
165 def __init__(self, start, stop): 190 def __init__(self, start, stop):
166 if start > stop or start < 0 or stop < 0: 191 if start > stop or start < 0 or stop < 0:
167 raise ValueError("invalid bounds") 192 raise ValueError("invalid bounds")
168 self.start = int(start) 193 self.start = int(start)
169 self.stop = int(stop) 194 self.stop = int(stop)
197 222
198 def __repr__(self): 223 def __repr__(self):
199 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop) 224 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop)
200 225
201 class Mapping(object): 226 class Mapping(object):
227 """
228 Represents a mapping of a single view segment into an indexable
229 object.
230 """
202 def __init__(self, bounds, offset): 231 def __init__(self, bounds, offset):
203 if not isinstance(bounds, Bounds): 232 if not isinstance(bounds, Bounds):
204 raise TypeError("bounds must be a Bounds object") 233 raise TypeError("bounds must be a Bounds object")
205 if not isinstance(offset, int): 234 if not isinstance(offset, int):
206 raise TypeError("offset must be an int") 235 raise TypeError("offset must be an int")
214 class SegmentedView(object): 243 class SegmentedView(object):
215 """ 244 """
216 Implements a view on a subscriptable object. The view is composed of 245 Implements a view on a subscriptable object. The view is composed of
217 zero or more segments of the source object. Has the same idiosyncratic 246 zero or more segments of the source object. Has the same idiosyncratic
218 behavior for out-of-bounds indices that Workspace has (and for the 247 behavior for out-of-bounds indices that Workspace has (and for the
219 same reason). 248 same reason). Mutating this object causes the parent object to also
249 be mutated.
220 """ 250 """
221 def __init__(self, indexable, bounds): 251 def __init__(self, indexable, bounds):
222 self.indexable = indexable 252 self.indexable = indexable
223 self._mmap = [ Mapping(Bounds(0, 0), 0) ] 253 self._mmap = [ Mapping(Bounds(0, 0), 0) ]
224 pos = 0 254 pos = 0
252 assert index >= self._mmap[m].bounds.stop 282 assert index >= self._mmap[m].bounds.stop
253 a = m + 1 283 a = m + 1
254 return None 284 return None
255 285
256 def __setitem__(self, key, value): 286 def __setitem__(self, key, value):
287 """
288 Direct access to replace a single character.
289 """
257 if not isinstance(key, int): 290 if not isinstance(key, int):
258 raise TypeError("__setitem__ only supports integers") 291 raise TypeError("__setitem__ only supports integers")
259 self.indexable[self._mapped(key)] = value 292 self.indexable[self._mapped(key)] = value
260 293
261 # XXX - this is sorta brute-forced and could be more efficient
262 def __getitem__(self, key): 294 def __getitem__(self, key):
295 """
296 Direct access to a single character or range of characters.
297 """
263 # Trivial cases 298 # Trivial cases
264 if isinstance(key, int): 299 if isinstance(key, int):
265 return self._get1(key) 300 return self._get1(key)
266 if not isinstance(key, slice): 301 if not isinstance(key, slice):
267 raise TypeError("expecting int or slice") 302 raise TypeError("expecting int or slice")