diff workspace.py @ 3:091c03f1b2e8

Getting it working...
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 19:54:45 -0800
parents 8884b0bf779d
children 7a83e82e65a6
line wrap: on
line diff
--- a/workspace.py	Thu Dec 26 13:18:53 2019 -0800
+++ b/workspace.py	Thu Dec 26 19:54:45 2019 -0800
@@ -1,18 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# A class that implements a workspace for curly-quoting a text. This is enough
-# like a string that it can be accessed via subscripts and ranges, and enough
-# like a TextIOBase object that it can be written to much like a stream.
-# (However, a Workspace is neither a string nor a TextIOBase object.)
-#
-# The advantage of using UTF-16 (as we do here) is that all quotation marks
-# of interest are represented in a single 16-bit value, so changing straight
-# quotes to curly ones can be accomplished most easily.
-#
-# It was a deliberate design decision to return empty strings when reading
-# out-of-range indices but to throw exceptions when attempting to write
-# them, because both decisions made coding easier in other modules.
+# Classes that implement a workspace for curly-quoting a text, and views
+# into the same.
 
 # I m p o r t s
 
@@ -24,7 +14,23 @@
 
 # C l a s s e s
 
+# Our workspace class. This is enough like a string that it can be
+# accessed via subscripts and ranges, and enough like a TextIOBase object
+# that it can be written to much like a stream. (However, a Workspace is
+# neither a string nor a TextIOBase object.)
+#
+# The advantage of using UTF-16 (as we do here) is that all quotation
+# marks of interest are represented in a single 16-bit value, so changing
+# straight quotes to curly ones can be accomplished most easily.
+#
+# It was a deliberate design decision to return empty strings when reading
+# out-of-range indices but to throw exceptions when attempting to write
+# them, because both decisions made coding easier in other modules.
 class Workspace(object):
+    """
+    A workspace for text-processing; a mutable hybrid of a string and an
+    in-memory file.
+    """
     # The most efficient 16-bit one on this platform
     encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
     codec = codecs.lookup(encoding)
@@ -35,7 +41,6 @@
         """
         Constructor.
         """
-        self._length = 0
         if initial_data is not None:
             data = initial_data.encode(self.encoding, self.errors)
             self._fp = io.BytesIO(data)
@@ -82,6 +87,23 @@
         """
         self._fp.write(string.encode(self.encoding, self.errors))
 
+    def truncate(self, size=None):
+        """
+        Truncate.
+        XXX - can create a runt surrogate pair
+        """
+        if size is None:
+            self._fp.truncate(None)
+        else:
+            self._fp.truncate(2 * size)
+
+    def clear(self):
+        """
+        Clear this object's contents.
+        """
+        self.truncate(0)
+        self.seek(0, os.SEEK_SET)
+
     def __len__(self):
         """
         Length in characters.
@@ -162,6 +184,9 @@
         return False
 
 class Bounds(object):
+    """
+    A set of index bounds.
+    """
     def __init__(self, start, stop):
         if start > stop or start < 0 or stop < 0:
             raise ValueError("invalid bounds")
@@ -199,6 +224,10 @@
         return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop)
 
 class Mapping(object):
+    """
+    Represents a mapping of a single view segment into an indexable
+    object.
+    """
     def __init__(self, bounds, offset):
         if not isinstance(bounds, Bounds):
             raise TypeError("bounds must be a Bounds object")
@@ -216,7 +245,8 @@
     Implements a view on a subscriptable object. The view is composed of
     zero or more segments of the source object. Has the same idiosyncratic
     behavior for out-of-bounds indices that Workspace has (and for the
-    same reason).
+    same reason). Mutating this object causes the parent object to also
+    be mutated.
     """
     def __init__(self, indexable, bounds):
         self.indexable = indexable
@@ -254,12 +284,17 @@
         return None
 
     def __setitem__(self, key, value):
+        """
+        Direct access to replace a single character.
+        """
         if not isinstance(key, int):
             raise TypeError("__setitem__ only supports integers")
         self.indexable[self._mapped(key)] = value
 
-    # XXX - this is sorta brute-forced and could be more efficient
     def __getitem__(self, key):
+        """
+        Direct access to a single character or range of characters.
+        """
         # Trivial cases
         if isinstance(key, int):
             return self._get1(key)