Mercurial > cgi-bin > hgweb.cgi > curlyq
diff workspace.py @ 3:091c03f1b2e8
Getting it working...
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 19:54:45 -0800 |
parents | 8884b0bf779d |
children | 7a83e82e65a6 |
line wrap: on
line diff
--- a/workspace.py Thu Dec 26 13:18:53 2019 -0800 +++ b/workspace.py Thu Dec 26 19:54:45 2019 -0800 @@ -1,18 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# A class that implements a workspace for curly-quoting a text. This is enough -# like a string that it can be accessed via subscripts and ranges, and enough -# like a TextIOBase object that it can be written to much like a stream. -# (However, a Workspace is neither a string nor a TextIOBase object.) -# -# The advantage of using UTF-16 (as we do here) is that all quotation marks -# of interest are represented in a single 16-bit value, so changing straight -# quotes to curly ones can be accomplished most easily. -# -# It was a deliberate design decision to return empty strings when reading -# out-of-range indices but to throw exceptions when attempting to write -# them, because both decisions made coding easier in other modules. +# Classes that implement a workspace for curly-quoting a text, and views +# into the same. # I m p o r t s @@ -24,7 +14,23 @@ # C l a s s e s +# Our workspace class. This is enough like a string that it can be +# accessed via subscripts and ranges, and enough like a TextIOBase object +# that it can be written to much like a stream. (However, a Workspace is +# neither a string nor a TextIOBase object.) +# +# The advantage of using UTF-16 (as we do here) is that all quotation +# marks of interest are represented in a single 16-bit value, so changing +# straight quotes to curly ones can be accomplished most easily. +# +# It was a deliberate design decision to return empty strings when reading +# out-of-range indices but to throw exceptions when attempting to write +# them, because both decisions made coding easier in other modules. class Workspace(object): + """ + A workspace for text-processing; a mutable hybrid of a string and an + in-memory file. + """ # The most efficient 16-bit one on this platform encoding = "UTF-16" + sys.byteorder[0].upper() + "E" codec = codecs.lookup(encoding) @@ -35,7 +41,6 @@ """ Constructor. """ - self._length = 0 if initial_data is not None: data = initial_data.encode(self.encoding, self.errors) self._fp = io.BytesIO(data) @@ -82,6 +87,23 @@ """ self._fp.write(string.encode(self.encoding, self.errors)) + def truncate(self, size=None): + """ + Truncate. + XXX - can create a runt surrogate pair + """ + if size is None: + self._fp.truncate(None) + else: + self._fp.truncate(2 * size) + + def clear(self): + """ + Clear this object's contents. + """ + self.truncate(0) + self.seek(0, os.SEEK_SET) + def __len__(self): """ Length in characters. @@ -162,6 +184,9 @@ return False class Bounds(object): + """ + A set of index bounds. + """ def __init__(self, start, stop): if start > stop or start < 0 or stop < 0: raise ValueError("invalid bounds") @@ -199,6 +224,10 @@ return "{0}({1!r}, {2!r})".format(self.__class__.__name__, self.start, self.stop) class Mapping(object): + """ + Represents a mapping of a single view segment into an indexable + object. + """ def __init__(self, bounds, offset): if not isinstance(bounds, Bounds): raise TypeError("bounds must be a Bounds object") @@ -216,7 +245,8 @@ Implements a view on a subscriptable object. The view is composed of zero or more segments of the source object. Has the same idiosyncratic behavior for out-of-bounds indices that Workspace has (and for the - same reason). + same reason). Mutating this object causes the parent object to also + be mutated. """ def __init__(self, indexable, bounds): self.indexable = indexable @@ -254,12 +284,17 @@ return None def __setitem__(self, key, value): + """ + Direct access to replace a single character. + """ if not isinstance(key, int): raise TypeError("__setitem__ only supports integers") self.indexable[self._mapped(key)] = value - # XXX - this is sorta brute-forced and could be more efficient def __getitem__(self, key): + """ + Direct access to a single character or range of characters. + """ # Trivial cases if isinstance(key, int): return self._get1(key)