changeset 1:173e86601dbc

Add views.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 12:00:03 -0800
parents 984876b6a095
children 8884b0bf779d
files workspace.py
diffstat 1 files changed, 133 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/workspace.py	Thu Dec 26 08:09:11 2019 -0800
+++ b/workspace.py	Thu Dec 26 12:00:03 2019 -0800
@@ -39,10 +39,8 @@
         if initial_data is not None:
             data = initial_data.encode(self.encoding, self.errors)
             self._fp = io.BytesIO(data)
-            self._dirty = True
         else:
             self._fp = io.BytesIO()
-            self._dirty = False
 
     def close(self):
         """
@@ -86,14 +84,15 @@
 
     def __len__(self):
         """
-        Length as a string.
+        Length in characters.
         """
-        if self._dirty:
-            back = self.tell()
-            self._length = self.seek(0, io.SEEK_END) // 2
-            self.seek(back)
-            self._dirty = False
-        return self._length
+        return len(self._fp.getbuffer()) // 2
+
+    def _mapped(self, index):
+        if index < 0 or index >= len(self):
+            raise IndexError("index {0} out of range".format(index))
+        i2 = index * 2
+        return slice(i2, i2 + 2)
 
     def __getitem__(self, key):
         """
@@ -103,10 +102,10 @@
         XXX - might return replacement chars from surrogate fragments.
         """
         if isinstance(key, int):
-            if key < 0 or key >= len(self):
+            try:
+                key = self._mapped(key)
+            except IndexError:
                 return ""
-            k2 = 2 * key
-            key = slice(k2, k2 + 2)
         elif isinstance(key, slice):
             if key.step is not None:
                 raise ValueError("__getitem__ does not support steps in slices")
@@ -119,7 +118,7 @@
                 return ""
             key = slice(start * 2, stop * 2)
         else:
-            raise TypeError("__setitem__ only supports integers and slices")
+            raise TypeError("__getitem__ only supports integers and slices")
         return self.codec.decode(self._fp.getbuffer()[key], "replace")[0]
 
     def __setitem__(self, key, value):
@@ -130,16 +129,12 @@
         """
         if not isinstance(key, int):
             raise TypeError("__setitem__ only supports integers")
-        if key < 0 or key >= len(self):
-            raise IndexError("index {0} out of range".format(key))
         if not value:
             return
-        start = key * 2
-        end = start + 2
         encoded = value[0].encode(self.encoding, self.errors)
         if len(encoded) != 2:
             raise ValueError("{0!r} not in BMP".format(value[0]))
-        self._fp.getbuffer()[start:end] = encoded
+        self._fp.getbuffer()[self._mapped(key)] = encoded
 
     def __del__(self):
         """
@@ -165,3 +160,123 @@
         """
         self.close()
         return False
+
+class Bounds(object):
+    def __init__(self, start, stop):
+        if start > stop or start < 0 or stop < 0:
+            raise ValueError("invalid bounds")
+        self.start = int(start)
+        self.stop = int(stop)
+
+    @classmethod
+    def from_object(cls, obj):
+        if isinstance(obj, slice):
+            return self(slice.start, slice.stop)
+        return self(obj[0], obj[1])
+
+    def __lt__(self, other):
+        return self.start < other.start
+
+    def __le__(self, other):
+        return self.start <= other.start
+
+    def __eq__(self, other):
+        return self.start == other.start
+
+    def __ne__(self, other):
+        return self.start != other.start
+
+    def __gt__(self, other):
+        return self.start > other.start
+
+    def __ge__(self, other):
+        return self.start >= other.start
+
+    def __contains__(self, scalar):
+        return self.start <= scalar < self.stop
+
+    def __repr__(self):
+        return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop)
+
+class Mapping(object):
+    def __init__(self, bounds, offset):
+        if not isinstance(bounds, Bounds):
+            raise TypeError("bounds must be a Bounds object")
+        if not isinstance(offset, int):
+            raise TypeError("offset must be an int")
+        self.bounds = bounds
+        self.offset = offset
+
+    def __repr__(self):
+        return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.bounds, self.offset)
+
+class SegmentedView(object):
+    """
+    Implements a view on a subscriptable object. The view is composed of
+    zero or more segments of the source object. Has the same idiosyncratic
+    behavior for out-of-bounds indices that Workspace has (and for the
+    same reason).
+    """
+    def __init__(self, indexable, bounds):
+        self.indexable = indexable
+        self._mmap = [ Mapping(Bounds(0, 0), 0) ]
+        pos = 0
+        for r in sorted(bounds):
+            if pos is not None and r.start <= pos and r.stop > pos:
+                # merge ranges
+                self._mmap[-1].bounds.stop = r.stop
+                pos = r.stop
+                continue
+            opos = pos
+            pos += r.stop - r.start
+            self._mmap.append(Mapping(Bounds(opos, pos), r.start))
+        self._length = pos
+
+    def _mapped(self, index):
+        mi = self._binsch(index)
+        m = None if mi is None else self._mmap[mi]
+        if m is None:
+            raise IndexError("index {0} out of range".format(index))
+        return index - m.bounds.start + m.offset
+
+    def _binsch(self, index):
+        a = 0
+        z = len(self._mmap) - 1
+        while a <= z:
+            m = (a + z) // 2
+            if index in self._mmap[m].bounds:
+                return m
+            if index < self._mmap[m].bounds.start:
+                z = m - 1
+            else:
+                assert index >= self._mmap[m].bounds.stop
+                a = m + 1
+        return None
+
+    def __setitem__(self, key, value):
+        if not isinstance(key, int):
+            raise TypeError("__setitem__ only supports integers")
+        self.indexable[self._mapped(key)] = value
+
+    # XXX - this is sorta brute-forced and could be more efficient
+    def __getitem__(self, key):
+        if isinstance(key, int):
+            return self._get1(key)
+        if not isinstance(key, slice):
+            raise TypeError("expecting int or slice")
+        with io.StringIO() as buf:
+            for i in range(key.start, key.stop, key.step or 1):
+                buf.write(self._get1(i))
+            return buf.getvalue()
+
+    def __len__(self):
+        return self._length
+
+    def _get1(self, index):
+        try:
+            return self.indexable[self._mapped(index)]
+        except IndexError:
+            return ""
+
+    def getvalue(self):
+        return self[0:len(self)]