Mercurial > cgi-bin > hgweb.cgi > curlyq
diff workspace.py @ 0:984876b6a095
Initial commit of first two classes.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 08:09:11 -0800 |
parents | |
children | 173e86601dbc |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/workspace.py Thu Dec 26 08:09:11 2019 -0800 @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# A class that implements a workspace for curly-quoting a text. This is enough +# like a string that it can be accessed via subscripts and ranges, and enough +# like a TextIOBase object that it can be written to much like a stream. +# (However, a Workspace is neither a string nor a TextIOBase object.) +# +# The advantage of using UTF-16 (as we do here) is that all quotation marks +# of interest are represented in a single 16-bit value, so changing straight +# quotes to curly ones can be accomplished most easily. +# +# It was a deliberate design decision to return empty strings when reading +# out-of-range indices but to throw exceptions when attempting to write +# them, because both decisions made coding easier in other modules. + +# I m p o r t s + +import os, sys +import io +import codecs + +# V a r i a b l e s + +# C l a s s e s + +class Workspace(object): + # The most efficient 16-bit one on this platform + encoding = "UTF-16" + sys.byteorder[0].upper() + "E" + codec = codecs.lookup(encoding) + # Errors should never happen; UTF-16 can represent all Unicode characters + errors = 'strict' + + def __init__(self, initial_data=None): + """ + Constructor. + """ + self._length = 0 + if initial_data is not None: + data = initial_data.encode(self.encoding, self.errors) + self._fp = io.BytesIO(data) + self._dirty = True + else: + self._fp = io.BytesIO() + self._dirty = False + + def close(self): + """ + Causes our buffer to be discarded and this workspace to become + unusable. + """ + self._fp.close() + + def flush(self): + """ + Does nothing, but allowed. + """ + pass + + def seek(self, offset, whence=io.SEEK_SET): + """ + Seeks to an absolute position. + """ + return self._fp.seek(offset, whence) + + def tell(self): + """ + Returns current position. + """ + return self._fp.tell() + + def read(self, nchars=None): + """ + Read characters. + XXX - might return replacement chars from surrogate fragments. + """ + if nchars is not None and nchars >= 0: + nchars *= 2 + return self._fp.read(nchars).decode(self.encoding, "replace") + + def write(self, string): + """ + Write characters. + """ + self._fp.write(string.encode(self.encoding, self.errors)) + + def __len__(self): + """ + Length as a string. + """ + if self._dirty: + back = self.tell() + self._length = self.seek(0, io.SEEK_END) // 2 + self.seek(back) + self._dirty = False + return self._length + + def __getitem__(self, key): + """ + Direct access to a single character or range of characters. We do + not support negative indices. Return value is based on what's most + useful for curling quotes. + XXX - might return replacement chars from surrogate fragments. + """ + if isinstance(key, int): + if key < 0 or key >= len(self): + return "" + k2 = 2 * key + key = slice(k2, k2 + 2) + elif isinstance(key, slice): + if key.step is not None: + raise ValueError("__getitem__ does not support steps in slices") + length = len(self) + start = 0 if key.start is None else key.start + stop = length if key.stop is None else key.stop + start = max(0, min(length - 1, start)) + stop = max(0, min(length, stop)) + if stop <= start: + return "" + key = slice(start * 2, stop * 2) + else: + raise TypeError("__setitem__ only supports integers and slices") + return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] + + def __setitem__(self, key, value): + """ + Direct access to a single character. We do not support negative + indices or replacing more than a single character at a time. + XXX - only works on characters in the BMP. + """ + if not isinstance(key, int): + raise TypeError("__setitem__ only supports integers") + if key < 0 or key >= len(self): + raise IndexError("index {0} out of range".format(key)) + if not value: + return + start = key * 2 + end = start + 2 + encoded = value[0].encode(self.encoding, self.errors) + if len(encoded) != 2: + raise ValueError("{0!r} not in BMP".format(value[0])) + self._fp.getbuffer()[start:end] = encoded + + def __del__(self): + """ + Equivalent to .close(). + """ + self.close() + + def getvalue(self): + """ + Gets the string represented by this workspace. + """ + return self.codec.decode(self._fp.getbuffer(), self.errors)[0] + + def __enter__(self): + """ + Context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Context manager: close on exit. + """ + self.close() + return False