comparison workspace.py @ 0:984876b6a095

Initial commit of first two classes.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 08:09:11 -0800
parents
children 173e86601dbc
comparison
equal deleted inserted replaced
-1:000000000000 0:984876b6a095
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 # A class that implements a workspace for curly-quoting a text. This is enough
5 # like a string that it can be accessed via subscripts and ranges, and enough
6 # like a TextIOBase object that it can be written to much like a stream.
7 # (However, a Workspace is neither a string nor a TextIOBase object.)
8 #
9 # The advantage of using UTF-16 (as we do here) is that all quotation marks
10 # of interest are represented in a single 16-bit value, so changing straight
11 # quotes to curly ones can be accomplished most easily.
12 #
13 # It was a deliberate design decision to return empty strings when reading
14 # out-of-range indices but to throw exceptions when attempting to write
15 # them, because both decisions made coding easier in other modules.
16
17 # I m p o r t s
18
19 import os, sys
20 import io
21 import codecs
22
23 # V a r i a b l e s
24
25 # C l a s s e s
26
27 class Workspace(object):
28 # The most efficient 16-bit one on this platform
29 encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
30 codec = codecs.lookup(encoding)
31 # Errors should never happen; UTF-16 can represent all Unicode characters
32 errors = 'strict'
33
34 def __init__(self, initial_data=None):
35 """
36 Constructor.
37 """
38 self._length = 0
39 if initial_data is not None:
40 data = initial_data.encode(self.encoding, self.errors)
41 self._fp = io.BytesIO(data)
42 self._dirty = True
43 else:
44 self._fp = io.BytesIO()
45 self._dirty = False
46
47 def close(self):
48 """
49 Causes our buffer to be discarded and this workspace to become
50 unusable.
51 """
52 self._fp.close()
53
54 def flush(self):
55 """
56 Does nothing, but allowed.
57 """
58 pass
59
60 def seek(self, offset, whence=io.SEEK_SET):
61 """
62 Seeks to an absolute position.
63 """
64 return self._fp.seek(offset, whence)
65
66 def tell(self):
67 """
68 Returns current position.
69 """
70 return self._fp.tell()
71
72 def read(self, nchars=None):
73 """
74 Read characters.
75 XXX - might return replacement chars from surrogate fragments.
76 """
77 if nchars is not None and nchars >= 0:
78 nchars *= 2
79 return self._fp.read(nchars).decode(self.encoding, "replace")
80
81 def write(self, string):
82 """
83 Write characters.
84 """
85 self._fp.write(string.encode(self.encoding, self.errors))
86
87 def __len__(self):
88 """
89 Length as a string.
90 """
91 if self._dirty:
92 back = self.tell()
93 self._length = self.seek(0, io.SEEK_END) // 2
94 self.seek(back)
95 self._dirty = False
96 return self._length
97
98 def __getitem__(self, key):
99 """
100 Direct access to a single character or range of characters. We do
101 not support negative indices. Return value is based on what's most
102 useful for curling quotes.
103 XXX - might return replacement chars from surrogate fragments.
104 """
105 if isinstance(key, int):
106 if key < 0 or key >= len(self):
107 return ""
108 k2 = 2 * key
109 key = slice(k2, k2 + 2)
110 elif isinstance(key, slice):
111 if key.step is not None:
112 raise ValueError("__getitem__ does not support steps in slices")
113 length = len(self)
114 start = 0 if key.start is None else key.start
115 stop = length if key.stop is None else key.stop
116 start = max(0, min(length - 1, start))
117 stop = max(0, min(length, stop))
118 if stop <= start:
119 return ""
120 key = slice(start * 2, stop * 2)
121 else:
122 raise TypeError("__setitem__ only supports integers and slices")
123 return self.codec.decode(self._fp.getbuffer()[key], "replace")[0]
124
125 def __setitem__(self, key, value):
126 """
127 Direct access to a single character. We do not support negative
128 indices or replacing more than a single character at a time.
129 XXX - only works on characters in the BMP.
130 """
131 if not isinstance(key, int):
132 raise TypeError("__setitem__ only supports integers")
133 if key < 0 or key >= len(self):
134 raise IndexError("index {0} out of range".format(key))
135 if not value:
136 return
137 start = key * 2
138 end = start + 2
139 encoded = value[0].encode(self.encoding, self.errors)
140 if len(encoded) != 2:
141 raise ValueError("{0!r} not in BMP".format(value[0]))
142 self._fp.getbuffer()[start:end] = encoded
143
144 def __del__(self):
145 """
146 Equivalent to .close().
147 """
148 self.close()
149
150 def getvalue(self):
151 """
152 Gets the string represented by this workspace.
153 """
154 return self.codec.decode(self._fp.getbuffer(), self.errors)[0]
155
156 def __enter__(self):
157 """
158 Context manager.
159 """
160 return self
161
162 def __exit__(self, exc_type, exc_val, exc_tb):
163 """
164 Context manager: close on exit.
165 """
166 self.close()
167 return False