# HG changeset patch # User David Barts # Date 1577390403 28800 # Node ID 173e86601dbc65656c23ff9dfe0b84991bf18c3e # Parent 984876b6a095ce3135c3d6495b513ba91cb47b4e Add views. diff -r 984876b6a095 -r 173e86601dbc workspace.py --- a/workspace.py Thu Dec 26 08:09:11 2019 -0800 +++ b/workspace.py Thu Dec 26 12:00:03 2019 -0800 @@ -39,10 +39,8 @@ if initial_data is not None: data = initial_data.encode(self.encoding, self.errors) self._fp = io.BytesIO(data) - self._dirty = True else: self._fp = io.BytesIO() - self._dirty = False def close(self): """ @@ -86,14 +84,15 @@ def __len__(self): """ - Length as a string. + Length in characters. """ - if self._dirty: - back = self.tell() - self._length = self.seek(0, io.SEEK_END) // 2 - self.seek(back) - self._dirty = False - return self._length + return len(self._fp.getbuffer()) // 2 + + def _mapped(self, index): + if index < 0 or index >= len(self): + raise IndexError("index {0} out of range".format(index)) + i2 = index * 2 + return slice(i2, i2 + 2) def __getitem__(self, key): """ @@ -103,10 +102,10 @@ XXX - might return replacement chars from surrogate fragments. """ if isinstance(key, int): - if key < 0 or key >= len(self): + try: + key = self._mapped(key) + except IndexError: return "" - k2 = 2 * key - key = slice(k2, k2 + 2) elif isinstance(key, slice): if key.step is not None: raise ValueError("__getitem__ does not support steps in slices") @@ -119,7 +118,7 @@ return "" key = slice(start * 2, stop * 2) else: - raise TypeError("__setitem__ only supports integers and slices") + raise TypeError("__getitem__ only supports integers and slices") return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] def __setitem__(self, key, value): @@ -130,16 +129,12 @@ """ if not isinstance(key, int): raise TypeError("__setitem__ only supports integers") - if key < 0 or key >= len(self): - raise IndexError("index {0} out of range".format(key)) if not value: return - start = key * 2 - end = start + 2 encoded = value[0].encode(self.encoding, self.errors) if len(encoded) != 2: raise ValueError("{0!r} not in BMP".format(value[0])) - self._fp.getbuffer()[start:end] = encoded + self._fp.getbuffer()[self._mapped(key)] = encoded def __del__(self): """ @@ -165,3 +160,123 @@ """ self.close() return False + +class Bounds(object): + def __init__(self, start, stop): + if start > stop or start < 0 or stop < 0: + raise ValueError("invalid bounds") + self.start = int(start) + self.stop = int(stop) + + @classmethod + def from_object(cls, obj): + if isinstance(obj, slice): + return self(slice.start, slice.stop) + return self(obj[0], obj[1]) + + def __lt__(self, other): + return self.start < other.start + + def __le__(self, other): + return self.start <= other.start + + def __eq__(self, other): + return self.start == other.start + + def __ne__(self, other): + return self.start != other.start + + def __gt__(self, other): + return self.start > other.start + + def __ge__(self, other): + return self.start >= other.start + + def __contains__(self, scalar): + return self.start <= scalar < self.stop + + def __repr__(self): + return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop) + +class Mapping(object): + def __init__(self, bounds, offset): + if not isinstance(bounds, Bounds): + raise TypeError("bounds must be a Bounds object") + if not isinstance(offset, int): + raise TypeError("offset must be an int") + self.bounds = bounds + self.offset = offset + + def __repr__(self): + return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.bounds, self.offset) + +class SegmentedView(object): + """ + Implements a view on a subscriptable object. The view is composed of + zero or more segments of the source object. Has the same idiosyncratic + behavior for out-of-bounds indices that Workspace has (and for the + same reason). + """ + def __init__(self, indexable, bounds): + self.indexable = indexable + self._mmap = [ Mapping(Bounds(0, 0), 0) ] + pos = 0 + for r in sorted(bounds): + if pos is not None and r.start <= pos and r.stop > pos: + # merge ranges + self._mmap[-1].bounds.stop = r.stop + pos = r.stop + continue + opos = pos + pos += r.stop - r.start + self._mmap.append(Mapping(Bounds(opos, pos), r.start)) + self._length = pos + + def _mapped(self, index): + mi = self._binsch(index) + m = None if mi is None else self._mmap[mi] + if m is None: + raise IndexError("index {0} out of range".format(index)) + return index - m.bounds.start + m.offset + + def _binsch(self, index): + a = 0 + z = len(self._mmap) - 1 + while a <= z: + m = (a + z) // 2 + if index in self._mmap[m].bounds: + return m + if index < self._mmap[m].bounds.start: + z = m - 1 + else: + assert index >= self._mmap[m].bounds.stop + a = m + 1 + return None + + def __setitem__(self, key, value): + if not isinstance(key, int): + raise TypeError("__setitem__ only supports integers") + self.indexable[self._mapped(key)] = value + + # XXX - this is sorta brute-forced and could be more efficient + def __getitem__(self, key): + if isinstance(key, int): + return self._get1(key) + if not isinstance(key, slice): + raise TypeError("expecting int or slice") + with io.StringIO() as buf: + for i in range(key.start, key.stop, key.step or 1): + buf.write(self._get1(i)) + return buf.getvalue() + + def __len__(self): + return self._length + + def _get1(self, index): + try: + return self.indexable[self._mapped(index)] + except IndexError: + return "" + + def getvalue(self): + return self[0:len(self)]