Mercurial > cgi-bin > hgweb.cgi > curlyq
annotate workspace.py @ 2:8884b0bf779d
Improve the efficiency.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 13:18:53 -0800 |
parents | 173e86601dbc |
children | 091c03f1b2e8 |
rev | line source |
---|---|
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
3 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
4 # A class that implements a workspace for curly-quoting a text. This is enough |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
5 # like a string that it can be accessed via subscripts and ranges, and enough |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
6 # like a TextIOBase object that it can be written to much like a stream. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
7 # (However, a Workspace is neither a string nor a TextIOBase object.) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
8 # |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
9 # The advantage of using UTF-16 (as we do here) is that all quotation marks |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
10 # of interest are represented in a single 16-bit value, so changing straight |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
11 # quotes to curly ones can be accomplished most easily. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
12 # |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
13 # It was a deliberate design decision to return empty strings when reading |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
14 # out-of-range indices but to throw exceptions when attempting to write |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
15 # them, because both decisions made coding easier in other modules. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
16 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
17 # I m p o r t s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
18 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
19 import os, sys |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
20 import io |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
21 import codecs |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
22 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
23 # V a r i a b l e s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
24 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
25 # C l a s s e s |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
26 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
27 class Workspace(object): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
28 # The most efficient 16-bit one on this platform |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
29 encoding = "UTF-16" + sys.byteorder[0].upper() + "E" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
30 codec = codecs.lookup(encoding) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
31 # Errors should never happen; UTF-16 can represent all Unicode characters |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
32 errors = 'strict' |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
33 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
34 def __init__(self, initial_data=None): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
35 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
36 Constructor. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
37 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
38 self._length = 0 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
39 if initial_data is not None: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
40 data = initial_data.encode(self.encoding, self.errors) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
41 self._fp = io.BytesIO(data) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
42 else: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
43 self._fp = io.BytesIO() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
44 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
45 def close(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
46 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
47 Causes our buffer to be discarded and this workspace to become |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
48 unusable. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
49 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
50 self._fp.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
51 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
52 def flush(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
53 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
54 Does nothing, but allowed. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
55 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
56 pass |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
57 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
58 def seek(self, offset, whence=io.SEEK_SET): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
59 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
60 Seeks to an absolute position. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
61 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
62 return self._fp.seek(offset, whence) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
63 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
64 def tell(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
65 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
66 Returns current position. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
67 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
68 return self._fp.tell() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
69 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
70 def read(self, nchars=None): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
71 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
72 Read characters. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
73 XXX - might return replacement chars from surrogate fragments. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
74 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
75 if nchars is not None and nchars >= 0: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
76 nchars *= 2 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
77 return self._fp.read(nchars).decode(self.encoding, "replace") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
78 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
79 def write(self, string): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
80 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
81 Write characters. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
82 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
83 self._fp.write(string.encode(self.encoding, self.errors)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
84 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
85 def __len__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
86 """ |
1 | 87 Length in characters. |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
88 """ |
1 | 89 return len(self._fp.getbuffer()) // 2 |
90 | |
91 def _mapped(self, index): | |
92 if index < 0 or index >= len(self): | |
93 raise IndexError("index {0} out of range".format(index)) | |
94 i2 = index * 2 | |
95 return slice(i2, i2 + 2) | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
96 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
97 def __getitem__(self, key): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
98 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
99 Direct access to a single character or range of characters. We do |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
100 not support negative indices. Return value is based on what's most |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
101 useful for curling quotes. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
102 XXX - might return replacement chars from surrogate fragments. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
103 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
104 if isinstance(key, int): |
1 | 105 try: |
106 key = self._mapped(key) | |
107 except IndexError: | |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
108 return "" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
109 elif isinstance(key, slice): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
110 if key.step is not None: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
111 raise ValueError("__getitem__ does not support steps in slices") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
112 length = len(self) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
113 start = 0 if key.start is None else key.start |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
114 stop = length if key.stop is None else key.stop |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
115 start = max(0, min(length - 1, start)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
116 stop = max(0, min(length, stop)) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
117 if stop <= start: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
118 return "" |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
119 key = slice(start * 2, stop * 2) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
120 else: |
1 | 121 raise TypeError("__getitem__ only supports integers and slices") |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
122 return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
123 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
124 def __setitem__(self, key, value): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
125 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
126 Direct access to a single character. We do not support negative |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
127 indices or replacing more than a single character at a time. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
128 XXX - only works on characters in the BMP. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
129 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
130 if not isinstance(key, int): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
131 raise TypeError("__setitem__ only supports integers") |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
132 if not value: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
133 return |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
134 encoded = value[0].encode(self.encoding, self.errors) |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
135 if len(encoded) != 2: |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
136 raise ValueError("{0!r} not in BMP".format(value[0])) |
1 | 137 self._fp.getbuffer()[self._mapped(key)] = encoded |
0
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
138 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
139 def __del__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
140 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
141 Equivalent to .close(). |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
142 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
143 self.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
144 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
145 def getvalue(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
146 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
147 Gets the string represented by this workspace. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
148 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
149 return self.codec.decode(self._fp.getbuffer(), self.errors)[0] |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
150 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
151 def __enter__(self): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
152 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
153 Context manager. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
154 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
155 return self |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
156 |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
157 def __exit__(self, exc_type, exc_val, exc_tb): |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
158 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
159 Context manager: close on exit. |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
160 """ |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
161 self.close() |
984876b6a095
Initial commit of first two classes.
David Barts <n5jrn@me.com>
parents:
diff
changeset
|
162 return False |
1 | 163 |
164 class Bounds(object): | |
165 def __init__(self, start, stop): | |
166 if start > stop or start < 0 or stop < 0: | |
167 raise ValueError("invalid bounds") | |
168 self.start = int(start) | |
169 self.stop = int(stop) | |
170 | |
171 @classmethod | |
172 def from_object(cls, obj): | |
173 if isinstance(obj, slice): | |
174 return self(slice.start, slice.stop) | |
175 return self(obj[0], obj[1]) | |
176 | |
177 def __lt__(self, other): | |
178 return self.start < other.start | |
179 | |
180 def __le__(self, other): | |
181 return self.start <= other.start | |
182 | |
183 def __eq__(self, other): | |
184 return self.start == other.start | |
185 | |
186 def __ne__(self, other): | |
187 return self.start != other.start | |
188 | |
189 def __gt__(self, other): | |
190 return self.start > other.start | |
191 | |
192 def __ge__(self, other): | |
193 return self.start >= other.start | |
194 | |
195 def __contains__(self, scalar): | |
196 return self.start <= scalar < self.stop | |
197 | |
198 def __repr__(self): | |
199 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop) | |
200 | |
201 class Mapping(object): | |
202 def __init__(self, bounds, offset): | |
203 if not isinstance(bounds, Bounds): | |
204 raise TypeError("bounds must be a Bounds object") | |
205 if not isinstance(offset, int): | |
206 raise TypeError("offset must be an int") | |
207 self.bounds = bounds | |
208 self.offset = offset | |
2 | 209 self.delta = self.offset - self.bounds.start |
1 | 210 |
211 def __repr__(self): | |
212 return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.bounds, self.offset) | |
213 | |
214 class SegmentedView(object): | |
215 """ | |
216 Implements a view on a subscriptable object. The view is composed of | |
217 zero or more segments of the source object. Has the same idiosyncratic | |
218 behavior for out-of-bounds indices that Workspace has (and for the | |
219 same reason). | |
220 """ | |
221 def __init__(self, indexable, bounds): | |
222 self.indexable = indexable | |
223 self._mmap = [ Mapping(Bounds(0, 0), 0) ] | |
224 pos = 0 | |
225 for r in sorted(bounds): | |
226 if pos is not None and r.start <= pos and r.stop > pos: | |
227 # merge ranges | |
228 self._mmap[-1].bounds.stop = r.stop | |
229 pos = r.stop | |
230 continue | |
231 opos = pos | |
232 pos += r.stop - r.start | |
233 self._mmap.append(Mapping(Bounds(opos, pos), r.start)) | |
234 self._length = pos | |
235 | |
236 def _mapped(self, index): | |
2 | 237 mmap_index = self._binsch(index) |
238 if mmap_index is None: | |
1 | 239 raise IndexError("index {0} out of range".format(index)) |
2 | 240 return index + self._mmap[mmap_index].delta |
1 | 241 |
242 def _binsch(self, index): | |
243 a = 0 | |
244 z = len(self._mmap) - 1 | |
245 while a <= z: | |
246 m = (a + z) // 2 | |
247 if index in self._mmap[m].bounds: | |
248 return m | |
249 if index < self._mmap[m].bounds.start: | |
250 z = m - 1 | |
251 else: | |
252 assert index >= self._mmap[m].bounds.stop | |
253 a = m + 1 | |
254 return None | |
255 | |
256 def __setitem__(self, key, value): | |
257 if not isinstance(key, int): | |
258 raise TypeError("__setitem__ only supports integers") | |
259 self.indexable[self._mapped(key)] = value | |
260 | |
261 # XXX - this is sorta brute-forced and could be more efficient | |
262 def __getitem__(self, key): | |
2 | 263 # Trivial cases |
1 | 264 if isinstance(key, int): |
265 return self._get1(key) | |
266 if not isinstance(key, slice): | |
267 raise TypeError("expecting int or slice") | |
2 | 268 if key.step is not None: |
269 raise ValueError("__getitem__ does not support steps in slices") | |
270 | |
271 # Loop up the starting segment. | |
272 mi = self._binsch(key.start) | |
273 if mi is None: | |
274 return "" | |
275 m = self._mmap[mi] | |
276 | |
277 # Horray! There's only one segment, so we can optimize. | |
278 if key.stop <= m.bounds.stop: | |
279 start = key.start + m.delta | |
280 stop = key.stop + m.delta | |
281 return self.indexable[start:stop] | |
282 | |
283 # The most involved (multi-segment) case. | |
1 | 284 with io.StringIO() as buf: |
2 | 285 for m in self._mmap[mi:]: |
286 if m.bounds.start >= key.stop: | |
287 break | |
288 start = max(key.start, m.bounds.start) + m.delta | |
289 stop = min(key.stop, m.bounds.stop) + m.delta | |
290 buf.write(self.indexable[start:stop]) | |
1 | 291 return buf.getvalue() |
292 | |
293 def __len__(self): | |
294 return self._length | |
295 | |
296 def _get1(self, index): | |
297 try: | |
298 return self.indexable[self._mapped(index)] | |
299 except IndexError: | |
300 return "" | |
301 | |
302 def getvalue(self): | |
303 return self[0:len(self)] |