Mercurial > cgi-bin > hgweb.cgi > curlyq
view workspace.py @ 0:984876b6a095
Initial commit of first two classes.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 08:09:11 -0800 |
parents | |
children | 173e86601dbc |
line wrap: on
line source
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # A class that implements a workspace for curly-quoting a text. This is enough # like a string that it can be accessed via subscripts and ranges, and enough # like a TextIOBase object that it can be written to much like a stream. # (However, a Workspace is neither a string nor a TextIOBase object.) # # The advantage of using UTF-16 (as we do here) is that all quotation marks # of interest are represented in a single 16-bit value, so changing straight # quotes to curly ones can be accomplished most easily. # # It was a deliberate design decision to return empty strings when reading # out-of-range indices but to throw exceptions when attempting to write # them, because both decisions made coding easier in other modules. # I m p o r t s import os, sys import io import codecs # V a r i a b l e s # C l a s s e s class Workspace(object): # The most efficient 16-bit one on this platform encoding = "UTF-16" + sys.byteorder[0].upper() + "E" codec = codecs.lookup(encoding) # Errors should never happen; UTF-16 can represent all Unicode characters errors = 'strict' def __init__(self, initial_data=None): """ Constructor. """ self._length = 0 if initial_data is not None: data = initial_data.encode(self.encoding, self.errors) self._fp = io.BytesIO(data) self._dirty = True else: self._fp = io.BytesIO() self._dirty = False def close(self): """ Causes our buffer to be discarded and this workspace to become unusable. """ self._fp.close() def flush(self): """ Does nothing, but allowed. """ pass def seek(self, offset, whence=io.SEEK_SET): """ Seeks to an absolute position. """ return self._fp.seek(offset, whence) def tell(self): """ Returns current position. """ return self._fp.tell() def read(self, nchars=None): """ Read characters. XXX - might return replacement chars from surrogate fragments. """ if nchars is not None and nchars >= 0: nchars *= 2 return self._fp.read(nchars).decode(self.encoding, "replace") def write(self, string): """ Write characters. """ self._fp.write(string.encode(self.encoding, self.errors)) def __len__(self): """ Length as a string. """ if self._dirty: back = self.tell() self._length = self.seek(0, io.SEEK_END) // 2 self.seek(back) self._dirty = False return self._length def __getitem__(self, key): """ Direct access to a single character or range of characters. We do not support negative indices. Return value is based on what's most useful for curling quotes. XXX - might return replacement chars from surrogate fragments. """ if isinstance(key, int): if key < 0 or key >= len(self): return "" k2 = 2 * key key = slice(k2, k2 + 2) elif isinstance(key, slice): if key.step is not None: raise ValueError("__getitem__ does not support steps in slices") length = len(self) start = 0 if key.start is None else key.start stop = length if key.stop is None else key.stop start = max(0, min(length - 1, start)) stop = max(0, min(length, stop)) if stop <= start: return "" key = slice(start * 2, stop * 2) else: raise TypeError("__setitem__ only supports integers and slices") return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] def __setitem__(self, key, value): """ Direct access to a single character. We do not support negative indices or replacing more than a single character at a time. XXX - only works on characters in the BMP. """ if not isinstance(key, int): raise TypeError("__setitem__ only supports integers") if key < 0 or key >= len(self): raise IndexError("index {0} out of range".format(key)) if not value: return start = key * 2 end = start + 2 encoded = value[0].encode(self.encoding, self.errors) if len(encoded) != 2: raise ValueError("{0!r} not in BMP".format(value[0])) self._fp.getbuffer()[start:end] = encoded def __del__(self): """ Equivalent to .close(). """ self.close() def getvalue(self): """ Gets the string represented by this workspace. """ return self.codec.decode(self._fp.getbuffer(), self.errors)[0] def __enter__(self): """ Context manager. """ return self def __exit__(self, exc_type, exc_val, exc_tb): """ Context manager: close on exit. """ self.close() return False