Mercurial > cgi-bin > hgweb.cgi > curlyq
annotate workspace.py @ 8:05363e803272 v1_workspace
Improve help text.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 21:54:10 -0800 |
parents | 7a83e82e65a6 |
children |
rev | line source |
---|---|
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
3 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
4 # I m p o r t s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
5 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
6 import os, sys |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
7 import io |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
8 import codecs |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
9 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
10 # V a r i a b l e s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
11 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
12 # C l a s s e s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
13 |
3 | 14 # Our workspace class. This is enough like a string that it can be |
15 # accessed via subscripts and ranges, and enough like a TextIOBase object | |
16 # that it can be written to much like a stream. (However, a Workspace is | |
17 # neither a string nor a TextIOBase object.) | |
18 # | |
19 # The advantage of using UTF-16 (as we do here) is that all quotation | |
20 # marks of interest are represented in a single 16-bit value, so changing | |
21 # straight quotes to curly ones can be accomplished most easily. | |
22 # | |
23 # It was a deliberate design decision to return empty strings when reading | |
24 # out-of-range indices but to throw exceptions when attempting to write | |
25 # them, because both decisions made coding easier in other modules. | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
26 class Workspace(object): |
3 | 27 """ |
28 A workspace for text-processing; a mutable hybrid of a string and an | |
29 in-memory file. | |
30 """ | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
31 # The most efficient 16-bit one on this platform |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
32 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
33 codec = codecs.lookup(encoding) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
34 # Errors should never happen; UTF-16 can represent all Unicode characters |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
35 errors = 'strict' |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
36 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
37 def __init__(self, initial_data=None): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
38 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
39 Constructor. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
40 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
41 if initial_data is not None: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
42 data = initial_data.encode(self.encoding, self.errors) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
43 self._fp = io.BytesIO(data) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
44 else: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
45 self._fp = io.BytesIO() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
46 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
47 def close(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
48 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
49 Causes our buffer to be discarded and this workspace to become |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
50 unusable. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
51 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
52 self._fp.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
53 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
54 def flush(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
55 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
56 Does nothing, but allowed. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
57 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
58 pass |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
59 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
60 def seek(self, offset, whence=io.SEEK_SET): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
61 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
62 Seeks to an absolute position. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
63 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
64 return self._fp.seek(offset, whence) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
65 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
66 def tell(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
67 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
68 Returns current position. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
69 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
70 return self._fp.tell() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
71 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
72 def read(self, nchars=None): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
73 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
74 Read characters. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
75 XXX - might return replacement chars from surrogate fragments. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
76 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
77 if nchars is not None and nchars >= 0: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
78 nchars *= 2 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
79 return self._fp.read(nchars).decode(self.encoding, "replace") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
80 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
81 def write(self, string): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
82 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
83 Write characters. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
84 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
85 self._fp.write(string.encode(self.encoding, self.errors)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
86 |
3 | 87 def truncate(self, size=None): |
88 """ | |
89 Truncate. | |
90 XXX - can create a runt surrogate pair | |
91 """ | |
92 if size is None: | |
93 self._fp.truncate(None) | |
94 else: | |
95 self._fp.truncate(2 * size) | |
96 | |
97 def clear(self): | |
98 """ | |
99 Clear this object's contents. | |
100 """ | |
101 self.truncate(0) | |
102 self.seek(0, os.SEEK_SET) | |
103 | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
104 def __len__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
105 """ |
1 | 106 Length in characters. |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
107 """ |
1 | 108 return len(self._fp.getbuffer()) // 2 |
109 | |
110 def _mapped(self, index): | |
111 if index < 0 or index >= len(self): | |
112 raise IndexError("index {0} out of range".format(index)) | |
113 i2 = index * 2 | |
114 return slice(i2, i2 + 2) | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
115 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
116 def __getitem__(self, key): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
117 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
118 Direct access to a single character or range of characters. We do |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
119 not support negative indices. Return value is based on what's most |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
120 useful for curling quotes. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
121 XXX - might return replacement chars from surrogate fragments. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
122 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
123 if isinstance(key, int): |
1 | 124 try: |
125 key = self._mapped(key) | |
126 except IndexError: | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
127 return "" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
128 elif isinstance(key, slice): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
129 if key.step is not None: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
130 raise ValueError("__getitem__ does not support steps in slices") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
131 length = len(self) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
132 start = 0 if key.start is None else key.start |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
133 stop = length if key.stop is None else key.stop |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
134 start = max(0, min(length - 1, start)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
135 stop = max(0, min(length, stop)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
136 if stop <= start: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
137 return "" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
138 key = slice(start * 2, stop * 2) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
139 else: |
1 | 140 raise TypeError("__getitem__ only supports integers and slices") |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
141 return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
142 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
143 def __setitem__(self, key, value): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
144 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
145 Direct access to a single character. We do not support negative |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
146 indices or replacing more than a single character at a time. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
147 XXX - only works on characters in the BMP. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
148 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
149 if not isinstance(key, int): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
150 raise TypeError("__setitem__ only supports integers") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
151 if not value: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
152 return |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
153 encoded = value[0].encode(self.encoding, self.errors) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
154 if len(encoded) != 2: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
155 raise ValueError("{0!r} not in BMP".format(value[0])) |
1 | 156 self._fp.getbuffer()[self._mapped(key)] = encoded |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
157 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
158 def __del__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
159 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
160 Equivalent to .close(). |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
161 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
162 self.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
163 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
164 def getvalue(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
165 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
166 Gets the string represented by this workspace. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
167 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
168 return self.codec.decode(self._fp.getbuffer(), self.errors)[0] |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
169 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
170 def __enter__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
171 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
172 Context manager. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
173 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
174 return self |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
175 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
176 def __exit__(self, exc_type, exc_val, exc_tb): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
177 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
178 Context manager: close on exit. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
179 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
180 self.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
181 return False |