curlyq: workspace.py comparison

comparison workspace.py @ 0:984876b6a095

Initial commit of first two classes.

author	David Barts <n5jrn@me.com>
date	Thu, 26 Dec 2019 08:09:11 -0800
parents
children	173e86601dbc

comparison

equal deleted inserted replaced

--1:000000000000
+:984876b6a095
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# A class that implements a workspace for curly-quoting a text. This is enough
+# like a string that it can be accessed via subscripts and ranges, and enough
+# like a TextIOBase object that it can be written to much like a stream.
+# (However, a Workspace is neither a string nor a TextIOBase object.)
+#
+# The advantage of using UTF-16 (as we do here) is that all quotation marks
+# of interest are represented in a single 16-bit value, so changing straight
+# quotes to curly ones can be accomplished most easily.
+#
+# It was a deliberate design decision to return empty strings when reading
+# out-of-range indices but to throw exceptions when attempting to write
+# them, because both decisions made coding easier in other modules.
+# I m p o r t s
+import os, sys
+import io
+import codecs
+# V a r i a b l e s
+# C l a s s e s
+class Workspace(object):
+# The most efficient 16-bit one on this platform
+encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
+codec = codecs.lookup(encoding)
+# Errors should never happen; UTF-16 can represent all Unicode characters
+errors = 'strict'
+def __init__(self, initial_data=None):
+"""
+Constructor.
+"""
+self._length = 0
+if initial_data is not None:
+data = initial_data.encode(self.encoding, self.errors)
+self._fp = io.BytesIO(data)
+self._dirty = True
+else:
+self._fp = io.BytesIO()
+self._dirty = False
+def close(self):
+"""
+Causes our buffer to be discarded and this workspace to become
+unusable.
+"""
+self._fp.close()
+def flush(self):
+"""
+Does nothing, but allowed.
+"""
+pass
+def seek(self, offset, whence=io.SEEK_SET):
+"""
+Seeks to an absolute position.
+"""
+return self._fp.seek(offset, whence)
+def tell(self):
+"""
+Returns current position.
+"""
+return self._fp.tell()
+def read(self, nchars=None):
+"""
+Read characters.
+XXX - might return replacement chars from surrogate fragments.
+"""
+if nchars is not None and nchars >= 0:
+nchars *= 2
+return self._fp.read(nchars).decode(self.encoding, "replace")
+def write(self, string):
+"""
+Write characters.
+"""
+self._fp.write(string.encode(self.encoding, self.errors))
+def __len__(self):
+"""
+Length as a string.
+"""
+if self._dirty:
+back = self.tell()
+self._length = self.seek(0, io.SEEK_END) // 2
+self.seek(back)
+self._dirty = False
+return self._length
+def __getitem__(self, key):
+"""
+Direct access to a single character or range of characters. We do
+not support negative indices. Return value is based on what's most
+useful for curling quotes.
+XXX - might return replacement chars from surrogate fragments.
+"""
+if isinstance(key, int):
+if key < 0 or key >= len(self):
+return ""
+k2 = 2 * key
+key = slice(k2, k2 + 2)
+elif isinstance(key, slice):
+if key.step is not None:
+raise ValueError("__getitem__ does not support steps in slices")
+length = len(self)
+start = 0 if key.start is None else key.start
+stop = length if key.stop is None else key.stop
+start = max(0, min(length - 1, start))
+stop = max(0, min(length, stop))
+if stop <= start:
+return ""
+key = slice(start * 2, stop * 2)
+else:
+raise TypeError("__setitem__ only supports integers and slices")
+return self.codec.decode(self._fp.getbuffer()[key], "replace")[0]
+def __setitem__(self, key, value):
+"""
+Direct access to a single character. We do not support negative
+indices or replacing more than a single character at a time.
+XXX - only works on characters in the BMP.
+"""
+if not isinstance(key, int):
+raise TypeError("__setitem__ only supports integers")
+if key < 0 or key >= len(self):
+raise IndexError("index {0} out of range".format(key))
+if not value:
+return
+start = key * 2
+end = start + 2
+encoded = value[0].encode(self.encoding, self.errors)
+if len(encoded) != 2:
+raise ValueError("{0!r} not in BMP".format(value[0]))
+self._fp.getbuffer()[start:end] = encoded
+def __del__(self):
+"""
+Equivalent to .close().
+"""
+self.close()
+def getvalue(self):
+"""
+Gets the string represented by this workspace.
+"""
+return self.codec.decode(self._fp.getbuffer(), self.errors)[0]
+def __enter__(self):
+"""
+Context manager.
+"""
+return self
+def __exit__(self, exc_type, exc_val, exc_tb):
+"""
+Context manager: close on exit.
+"""
+self.close()
+return False

Mercurial > cgi-bin > hgweb.cgi > curlyq

comparison workspace.py @ 0:984876b6a095