Mercurial > cgi-bin > hgweb.cgi > curlyq
diff runes.py @ 10:397c178c5b98
Make it array-based.
author | David Barts <n5jrn@me.com> |
---|---|
date | Fri, 27 Dec 2019 11:26:00 -0800 |
parents | |
children | ab7d6e908034 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/runes.py Fri Dec 27 11:26:00 2019 -0800 @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# I m p o r t s + +import array +import codecs +import collections +import struct +import sys + +# C l a s s e s + +class Runes(object): + """ + A mutable, fixed-length sequence of UTF-16 runes. The attributes + encoding and codec contain the name of the encoding and the codec + used to generate the UTF-16. The attribute buffer contains the + buffer (an array of 16-bit unsigned integers) used to back this + object; modifications to that array will be reflected in this + object. + """ + # The most efficient 16-bit one on this platform + encoding = "UTF-16" + sys.byteorder[0].upper() + "E" + codec = codecs.lookup(encoding) + + def __init__(self, based_on=None): + if isinstance(based_on, array.array): + if based_on.typecode == 'H': + self.buffer = based_on + else: + self.buffer = array.array('H', based_on) + elif isinstance(based_on, str): + # A string should always be able to encode to runes. + self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0]) + elif based_on is None: + self.buffer = array.array('H', bytes()) + else: + self.buffer = array.array('H', based_on) + + def __str__(self): + """ + Convert this object to a string. We deliberately do not have a + __repr__ method, to underscore that runes are not strings. + """ + # Runes might not always be able to decode to a string. + return self.codec.decode(self.buffer, 'replace')[0] + + def __bytes__(self): + return bytes(self.buffer) + + def __len__(self): + return len(self.buffer) + + def __lt__(self, other): + return self.buffer < other.buffer + + def __le__(self, other): + return self.buffer <= other.buffer + + def __gt__(self, other): + return self.buffer > other.buffer + + def __ge__(self, other): + return self.buffer >= other.buffer + + def __eq__(self, other): + return self.buffer == other.buffer + + def __ne__(self, other): + return self.buffer != other.buffer + + def __hash__(self): + return hash(self.buffer) + + def __bool__(self): + return bool(self.buffer) + + def __getitem__(self, key): + ret = self.buffer[key] + if isinstance(ret, array.array): + return Runes(ret) + else: + return ret + + def __setitem__(self, key, value): + if isinstance(key, int): + if isinstance(value, int): + self.buffer[key] = value + else: + raise TypeError("integer required") + elif isinstance(value, Runes): + self.buffer[key] = value.buffer + else: + raise TypeError("runes required") + + def __delitem__(self, key): + del self.buffer[key] + + def clear(self): + del self[:] + + def __iter__(self): + return iter(self.buffer) + + def __reversed__(self): + return reversed(self.buffer) + + def append(self, value): + if isinstance(value, int): + self.buffer.append(value) + elif isinstance(value, Runes): + self.buffer.extend(value.buffer) + else: + raise TypeError("integer or runes required") + + def __contains__(self, value): + return value in self.buffer + + def index(self, value): + return self.buffer.index(value) + + def find(self, value): + try: + return self.index(value) + except ValueError: + return -1 + +class Workspace(Runes): + """ + A Runes object that acts a bit more string-like, in that __setitem__ + also accepts a string as an argument and __getitem__ always returns + a string. We also return empty strings instead of throwing IndexError + when attempting to read out-of-range values, because that makes life + easier for us when curling quotes. + """ + def __setitem__(self, key, value): + if isinstance(value, str): + if isinstance(key, int): + Runes.__setitem__(self, key, self._ord(value)) + else: + Runes.__setitem__(self, key, Runes(value)) + else: + Runes.__setitem__(self, key, value) + + def __getitem__(self, key): + try: + ret = Runes.__getitem__(self, key) + if isinstance (ret, int): + return chr(ret) + elif isinstance(ret, Runes): + return str(ret) + else: + raise AssertionError("this shouldn't happen") + except IndexError: + return "" + + def append(self, value): + if isinstance(value, str): + Runes.append(self, Runes(value)) + else: + Runes.append(self, value) + + def index(self, value): + if isinstance(value, str): + return Runes.index(self, self._ord(value)) + else: + return Runes.index(self, value) + + def find(self, value): + try: + return self.index(value) + except ValueError: + return -1 + + def _ord(self, string): + length = len(string) + if length != 1: + raise ValueError("expected a character, but string of length {0} found".format(length)) + raw = Runes(string) + if len(raw) != 1: + raise ValueError("character not in BMP") + return raw[0]