annotate workspace.py @ 0:984876b6a095

Initial commit of first two classes.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 08:09:11 -0800
parents
children 173e86601dbc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
1 #!/usr/bin/env python3
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
3
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
4 # A class that implements a workspace for curly-quoting a text. This is enough
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
5 # like a string that it can be accessed via subscripts and ranges, and enough
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
6 # like a TextIOBase object that it can be written to much like a stream.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
7 # (However, a Workspace is neither a string nor a TextIOBase object.)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
8 #
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
9 # The advantage of using UTF-16 (as we do here) is that all quotation marks
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
10 # of interest are represented in a single 16-bit value, so changing straight
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
11 # quotes to curly ones can be accomplished most easily.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
12 #
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
13 # It was a deliberate design decision to return empty strings when reading
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
14 # out-of-range indices but to throw exceptions when attempting to write
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
15 # them, because both decisions made coding easier in other modules.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
16
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
17 # I m p o r t s
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
18
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
19 import os, sys
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
20 import io
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
21 import codecs
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
22
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
23 # V a r i a b l e s
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
24
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
25 # C l a s s e s
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
26
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
27 class Workspace(object):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
28 # The most efficient 16-bit one on this platform
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
29 encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
30 codec = codecs.lookup(encoding)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
31 # Errors should never happen; UTF-16 can represent all Unicode characters
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
32 errors = 'strict'
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
33
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
34 def __init__(self, initial_data=None):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
35 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
36 Constructor.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
37 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
38 self._length = 0
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
39 if initial_data is not None:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
40 data = initial_data.encode(self.encoding, self.errors)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
41 self._fp = io.BytesIO(data)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
42 self._dirty = True
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
43 else:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
44 self._fp = io.BytesIO()
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
45 self._dirty = False
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
46
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
47 def close(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
48 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
49 Causes our buffer to be discarded and this workspace to become
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
50 unusable.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
51 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
52 self._fp.close()
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
53
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
54 def flush(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
55 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
56 Does nothing, but allowed.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
57 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
58 pass
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
59
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
60 def seek(self, offset, whence=io.SEEK_SET):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
61 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
62 Seeks to an absolute position.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
63 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
64 return self._fp.seek(offset, whence)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
65
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
66 def tell(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
67 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
68 Returns current position.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
69 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
70 return self._fp.tell()
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
71
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
72 def read(self, nchars=None):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
73 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
74 Read characters.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
75 XXX - might return replacement chars from surrogate fragments.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
76 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
77 if nchars is not None and nchars >= 0:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
78 nchars *= 2
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
79 return self._fp.read(nchars).decode(self.encoding, "replace")
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
80
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
81 def write(self, string):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
82 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
83 Write characters.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
84 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
85 self._fp.write(string.encode(self.encoding, self.errors))
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
86
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
87 def __len__(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
88 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
89 Length as a string.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
90 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
91 if self._dirty:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
92 back = self.tell()
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
93 self._length = self.seek(0, io.SEEK_END) // 2
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
94 self.seek(back)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
95 self._dirty = False
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
96 return self._length
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
97
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
98 def __getitem__(self, key):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
99 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
100 Direct access to a single character or range of characters. We do
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
101 not support negative indices. Return value is based on what's most
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
102 useful for curling quotes.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
103 XXX - might return replacement chars from surrogate fragments.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
104 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
105 if isinstance(key, int):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
106 if key < 0 or key >= len(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
107 return ""
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
108 k2 = 2 * key
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
109 key = slice(k2, k2 + 2)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
110 elif isinstance(key, slice):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
111 if key.step is not None:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
112 raise ValueError("__getitem__ does not support steps in slices")
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
113 length = len(self)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
114 start = 0 if key.start is None else key.start
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
115 stop = length if key.stop is None else key.stop
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
116 start = max(0, min(length - 1, start))
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
117 stop = max(0, min(length, stop))
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
118 if stop <= start:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
119 return ""
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
120 key = slice(start * 2, stop * 2)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
121 else:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
122 raise TypeError("__setitem__ only supports integers and slices")
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
123 return self.codec.decode(self._fp.getbuffer()[key], "replace")[0]
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
124
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
125 def __setitem__(self, key, value):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
126 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
127 Direct access to a single character. We do not support negative
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
128 indices or replacing more than a single character at a time.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
129 XXX - only works on characters in the BMP.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
130 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
131 if not isinstance(key, int):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
132 raise TypeError("__setitem__ only supports integers")
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
133 if key < 0 or key >= len(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
134 raise IndexError("index {0} out of range".format(key))
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
135 if not value:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
136 return
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
137 start = key * 2
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
138 end = start + 2
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
139 encoded = value[0].encode(self.encoding, self.errors)
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
140 if len(encoded) != 2:
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
141 raise ValueError("{0!r} not in BMP".format(value[0]))
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
142 self._fp.getbuffer()[start:end] = encoded
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
143
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
144 def __del__(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
145 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
146 Equivalent to .close().
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
147 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
148 self.close()
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
149
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
150 def getvalue(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
151 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
152 Gets the string represented by this workspace.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
153 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
154 return self.codec.decode(self._fp.getbuffer(), self.errors)[0]
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
155
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
156 def __enter__(self):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
157 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
158 Context manager.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
159 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
160 return self
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
161
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
162 def __exit__(self, exc_type, exc_val, exc_tb):
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
163 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
164 Context manager: close on exit.
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
165 """
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
166 self.close()
984876b6a095 Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff changeset
167 return False