# HG changeset patch # User David Barts # Date 1577474798 28800 # Node ID 1f5e471101b0049d95e7954420c025d92d048f23 # Parent 397c178c5b98ecf3e8a68ef32bcd84ad1088fd9a Deadwood; remove it. diff -r 397c178c5b98 -r 1f5e471101b0 workspace.py --- a/workspace.py Fri Dec 27 11:26:00 2019 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# I m p o r t s - -import os, sys -import io -import codecs - -# V a r i a b l e s - -# C l a s s e s - -# Our workspace class. This is enough like a string that it can be -# accessed via subscripts and ranges, and enough like a TextIOBase object -# that it can be written to much like a stream. (However, a Workspace is -# neither a string nor a TextIOBase object.) -# -# The advantage of using UTF-16 (as we do here) is that all quotation -# marks of interest are represented in a single 16-bit value, so changing -# straight quotes to curly ones can be accomplished most easily. -# -# It was a deliberate design decision to return empty strings when reading -# out-of-range indices but to throw exceptions when attempting to write -# them, because both decisions made coding easier in other modules. -class Workspace(object): - """ - A workspace for text-processing; a mutable hybrid of a string and an - in-memory file. - """ - # The most efficient 16-bit one on this platform - encoding = "UTF-16" + sys.byteorder[0].upper() + "E" - codec = codecs.lookup(encoding) - # Errors should never happen; UTF-16 can represent all Unicode characters - errors = 'strict' - - def __init__(self, initial_data=None): - """ - Constructor. - """ - if initial_data is not None: - data = initial_data.encode(self.encoding, self.errors) - self._fp = io.BytesIO(data) - else: - self._fp = io.BytesIO() - - def close(self): - """ - Causes our buffer to be discarded and this workspace to become - unusable. - """ - self._fp.close() - - def flush(self): - """ - Does nothing, but allowed. - """ - pass - - def seek(self, offset, whence=io.SEEK_SET): - """ - Seeks to an absolute position. - """ - return self._fp.seek(offset, whence) - - def tell(self): - """ - Returns current position. - """ - return self._fp.tell() - - def read(self, nchars=None): - """ - Read characters. - XXX - might return replacement chars from surrogate fragments. - """ - if nchars is not None and nchars >= 0: - nchars *= 2 - return self._fp.read(nchars).decode(self.encoding, "replace") - - def write(self, string): - """ - Write characters. - """ - self._fp.write(string.encode(self.encoding, self.errors)) - - def truncate(self, size=None): - """ - Truncate. - XXX - can create a runt surrogate pair - """ - if size is None: - self._fp.truncate(None) - else: - self._fp.truncate(2 * size) - - def clear(self): - """ - Clear this object's contents. - """ - self.truncate(0) - self.seek(0, os.SEEK_SET) - - def __len__(self): - """ - Length in characters. - """ - return len(self._fp.getbuffer()) // 2 - - def _mapped(self, index): - if index < 0 or index >= len(self): - raise IndexError("index {0} out of range".format(index)) - i2 = index * 2 - return slice(i2, i2 + 2) - - def __getitem__(self, key): - """ - Direct access to a single character or range of characters. We do - not support negative indices. Return value is based on what's most - useful for curling quotes. - XXX - might return replacement chars from surrogate fragments. - """ - if isinstance(key, int): - try: - key = self._mapped(key) - except IndexError: - return "" - elif isinstance(key, slice): - if key.step is not None: - raise ValueError("__getitem__ does not support steps in slices") - length = len(self) - start = 0 if key.start is None else key.start - stop = length if key.stop is None else key.stop - start = max(0, min(length - 1, start)) - stop = max(0, min(length, stop)) - if stop <= start: - return "" - key = slice(start * 2, stop * 2) - else: - raise TypeError("__getitem__ only supports integers and slices") - return self.codec.decode(self._fp.getbuffer()[key], "replace")[0] - - def __setitem__(self, key, value): - """ - Direct access to a single character. We do not support negative - indices or replacing more than a single character at a time. - XXX - only works on characters in the BMP. - """ - if not isinstance(key, int): - raise TypeError("__setitem__ only supports integers") - if not value: - return - encoded = value[0].encode(self.encoding, self.errors) - if len(encoded) != 2: - raise ValueError("{0!r} not in BMP".format(value[0])) - self._fp.getbuffer()[self._mapped(key)] = encoded - - def __del__(self): - """ - Equivalent to .close(). - """ - self.close() - - def getvalue(self): - """ - Gets the string represented by this workspace. - """ - return self.codec.decode(self._fp.getbuffer(), self.errors)[0] - - def __enter__(self): - """ - Context manager. - """ - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Context manager: close on exit. - """ - self.close() - return False