Mercurial > cgi-bin > hgweb.cgi > curlyq
annotate workspace.py @ 3:091c03f1b2e8
Getting it working...
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 19:54:45 -0800 |
parents | 8884b0bf779d |
children | 7a83e82e65a6 |
rev | line source |
---|---|
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
3 |
3 | 4 # Classes that implement a workspace for curly-quoting a text, and views |
5 # into the same. | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
6 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
7 # I m p o r t s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
8 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
9 import os, sys |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
10 import io |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
11 import codecs |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
12 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
13 # V a r i a b l e s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
14 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
15 # C l a s s e s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
16 |
3 | 17 # Our workspace class. This is enough like a string that it can be |
18 # accessed via subscripts and ranges, and enough like a TextIOBase object | |
19 # that it can be written to much like a stream. (However, a Workspace is | |
20 # neither a string nor a TextIOBase object.) | |
21 # | |
22 # The advantage of using UTF-16 (as we do here) is that all quotation | |
23 # marks of interest are represented in a single 16-bit value, so changing | |
24 # straight quotes to curly ones can be accomplished most easily. | |
25 # | |
26 # It was a deliberate design decision to return empty strings when reading | |
27 # out-of-range indices but to throw exceptions when attempting to write | |
28 # them, because both decisions made coding easier in other modules. | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
29 class Workspace(object): |
3 | 30 """ |
31 A workspace for text-processing; a mutable hybrid of a string and an | |
32 in-memory file. | |
33 """ | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
34 # The most efficient 16-bit one on this platform |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
35 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
36 codec = codecs.lookup(encoding) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
37 # Errors should never happen; UTF-16 can represent all Unicode characters |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
38 errors = 'strict' |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
39 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
40 def __init__(self, initial_data=None): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
41 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
42 Constructor. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
43 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
44 if initial_data is not None: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
45 data = initial_data.encode(self.encoding, self.errors) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
46 self._fp = io.BytesIO(data) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
47 else: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
48 self._fp = io.BytesIO() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
49 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
50 def close(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
51 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
52 Causes our buffer to be discarded and this workspace to become |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
53 unusable. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
54 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
55 self._fp.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
56 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
57 def flush(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
58 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
59 Does nothing, but allowed. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
60 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
61 pass |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
62 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
63 def seek(self, offset, whence=io.SEEK_SET): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
64 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
65 Seeks to an absolute position. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
66 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
67 return self._fp.seek(offset, whence) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
68 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
69 def tell(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
70 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
71 Returns current position. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
72 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
73 return self._fp.tell() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
74 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
75 def read(self, nchars=None): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
76 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
77 Read characters. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
78 XXX - might return replacement chars from surrogate fragments. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
79 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
80 if nchars is not None and nchars >= 0: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
81 nchars *= 2 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
82 return self._fp.read(nchars).decode(self.encoding, "replace") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
83 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
84 def write(self, string): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
85 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
86 Write characters. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
87 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
88 self._fp.write(string.encode(self.encoding, self.errors)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
89 |
3 | 90 def truncate(self, size=None): |
91 """ | |
92 Truncate. | |
93 XXX - can create a runt surrogate pair | |
94 """ | |
95 if size is None: | |
96 self._fp.truncate(None) | |
97 else: | |
98 self._fp.truncate(2 * size) | |
99 | |
100 def clear(self): | |
101 """ | |
102 Clear this object's contents. | |
103 """ | |
104 self.truncate(0) | |
105 self.seek(0, os.SEEK_SET) | |
106 | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
107 def __len__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
108 """ |
1 | 109 Length in characters. |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
110 """ |
1 | 111 return len(self._fp.getbuffer()) // 2 |
112 | |
113 def _mapped(self, index): | |
114 if index < 0 or index >= len(self): | |
115 raise IndexError("index {0} out of range".format(index)) | |
116 i2 = index * 2 | |
117 return slice(i2, i2 + 2) | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
118 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
119 def __getitem__(self, key): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
120 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
121 Direct access to a single character or range of characters. We do |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
122 not support negative indices. Return value is based on what's most |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
123 useful for curling quotes. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
124 XXX - might return replacement chars from surrogate fragments. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
125 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
126 if isinstance(key, int): |
1 | 127 try: |
128 key = self._mapped(key) | |
129 except IndexError: | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
130 return "" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
131 elif isinstance(key, slice): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
132 if key.step is not None: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
133 raise ValueError("__getitem__ does not support steps in slices") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
134 length = len(self) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
135 start = 0 if key.start is None else key.start |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
136 stop = length if key.stop is None else key.stop |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
137 start = max(0, min(length - 1, start)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
138 stop = max(0, min(length, stop)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
139 if stop <= start: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
140 return "" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
141 key = slice(start * 2, stop * 2) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
142 else: |
1 | 143 raise TypeError("__getitem__ only supports integers and slices") |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
144 return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
145 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
146 def __setitem__(self, key, value): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
147 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
148 Direct access to a single character. We do not support negative |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
149 indices or replacing more than a single character at a time. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
150 XXX - only works on characters in the BMP. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
151 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
152 if not isinstance(key, int): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
153 raise TypeError("__setitem__ only supports integers") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
154 if not value: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
155 return |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
156 encoded = value[0].encode(self.encoding, self.errors) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
157 if len(encoded) != 2: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
158 raise ValueError("{0!r} not in BMP".format(value[0])) |
1 | 159 self._fp.getbuffer()[self._mapped(key)] = encoded |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
160 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
161 def __del__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
162 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
163 Equivalent to .close(). |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
164 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
165 self.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
166 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
167 def getvalue(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
168 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
169 Gets the string represented by this workspace. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
170 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
171 return self.codec.decode(self._fp.getbuffer(), self.errors)[0] |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
172 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
173 def __enter__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
174 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
175 Context manager. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
176 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
177 return self |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
178 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
179 def __exit__(self, exc_type, exc_val, exc_tb): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
180 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
181 Context manager: close on exit. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
182 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
183 self.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
184 return False |
1 | 185 |
186 class Bounds(object): | |
3 | 187 """ |
188 A set of index bounds. | |
189 """ | |
1 | 190 def __init__(self, start, stop): |
191 if start > stop or start < 0 or stop < 0: | |
192 raise ValueError("invalid bounds") | |
193 self.start = int(start) | |
194 self.stop = int(stop) | |
195 | |
196 @classmethod | |
197 def from_object(cls, obj): | |
198 if isinstance(obj, slice): | |
199 return self(slice.start, slice.stop) | |
200 return self(obj[0], obj[1]) | |
201 | |
202 def __lt__(self, other): | |
203 return self.start < other.start | |
204 | |
205 def __le__(self, other): | |
206 return self.start <= other.start | |
207 | |
208 def __eq__(self, other): | |
209 return self.start == other.start | |
210 | |
211 def __ne__(self, other): | |
212 return self.start != other.start | |
213 | |
214 def __gt__(self, other): | |
215 return self.start > other.start | |
216 | |
217 def __ge__(self, other): | |
218 return self.start >= other.start | |
219 | |
220 def __contains__(self, scalar): | |
221 return self.start <= scalar < self.stop | |
222 | |
223 def __repr__(self): | |
224 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop) | |
225 | |
226 class Mapping(object): | |
3 | 227 """ |
228 Represents a mapping of a single view segment into an indexable | |
229 object. | |
230 """ | |
1 | 231 def __init__(self, bounds, offset): |
232 if not isinstance(bounds, Bounds): | |
233 raise TypeError("bounds must be a Bounds object") | |
234 if not isinstance(offset, int): | |
235 raise TypeError("offset must be an int") | |
236 self.bounds = bounds | |
237 self.offset = offset | |
2 | 238 self.delta = self.offset - self.bounds.start |
1 | 239 |
240 def __repr__(self): | |
241 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.bounds, self.offset) | |
242 | |
243 class SegmentedView(object): | |
244 """ | |
245 Implements a view on a subscriptable object. The view is composed of | |
246 zero or more segments of the source object. Has the same idiosyncratic | |
247 behavior for out-of-bounds indices that Workspace has (and for the | |
3 | 248 same reason). Mutating this object causes the parent object to also |
249 be mutated. | |
1 | 250 """ |
251 def __init__(self, indexable, bounds): | |
252 self.indexable = indexable | |
253 self._mmap = [ Mapping(Bounds(0, 0), 0) ] | |
254 pos = 0 | |
255 for r in sorted(bounds): | |
256 if pos is not None and r.start <= pos and r.stop > pos: | |
257 # merge ranges | |
258 self._mmap[-1].bounds.stop = r.stop | |
259 pos = r.stop | |
260 continue | |
261 opos = pos | |
262 pos += r.stop - r.start | |
263 self._mmap.append(Mapping(Bounds(opos, pos), r.start)) | |
264 self._length = pos | |
265 | |
266 def _mapped(self, index): | |
2 | 267 mmap_index = self._binsch(index) |
268 if mmap_index is None: | |
1 | 269 raise IndexError("index {0} out of range".format(index)) |
2 | 270 return index + self._mmap[mmap_index].delta |
1 | 271 |
272 def _binsch(self, index): | |
273 a = 0 | |
274 z = len(self._mmap) - 1 | |
275 while a <= z: | |
276 m = (a + z) // 2 | |
277 if index in self._mmap[m].bounds: | |
278 return m | |
279 if index < self._mmap[m].bounds.start: | |
280 z = m - 1 | |
281 else: | |
282 assert index >= self._mmap[m].bounds.stop | |
283 a = m + 1 | |
284 return None | |
285 | |
286 def __setitem__(self, key, value): | |
3 | 287 """ |
288 Direct access to replace a single character. | |
289 """ | |
1 | 290 if not isinstance(key, int): |
291 raise TypeError("__setitem__ only supports integers") | |
292 self.indexable[self._mapped(key)] = value | |
293 | |
294 def __getitem__(self, key): | |
3 | 295 """ |
296 Direct access to a single character or range of characters. | |
297 """ | |
2 | 298 # Trivial cases |
1 | 299 if isinstance(key, int): |
300 return self._get1(key) | |
301 if not isinstance(key, slice): | |
302 raise TypeError("expecting int or slice") | |
2 | 303 if key.step is not None: |
304 raise ValueError("__getitem__ does not support steps in slices") | |
305 | |
306 # Loop up the starting segment. | |
307 mi = self._binsch(key.start) | |
308 if mi is None: | |
309 return "" | |
310 m = self._mmap[mi] | |
311 | |
312 # Horray! There's only one segment, so we can optimize. | |
313 if key.stop <= m.bounds.stop: | |
314 start = key.start + m.delta | |
315 stop = key.stop + m.delta | |
316 return self.indexable[start:stop] | |
317 | |
318 # The most involved (multi-segment) case. | |
1 | 319 with io.StringIO() as buf: |
2 | 320 for m in self._mmap[mi:]: |
321 if m.bounds.start >= key.stop: | |
322 break | |
323 start = max(key.start, m.bounds.start) + m.delta | |
324 stop = min(key.stop, m.bounds.stop) + m.delta | |
325 buf.write(self.indexable[start:stop]) | |
1 | 326 return buf.getvalue() |
327 | |
328 def __len__(self): | |
329 return self._length | |
330 | |
331 def _get1(self, index): | |
332 try: | |
333 return self.indexable[self._mapped(index)] | |
334 except IndexError: | |
335 return "" | |
336 | |
337 def getvalue(self): | |
338 return self[0:len(self)] |