Mercurial > cgi-bin > hgweb.cgi > curlyq
view runes.py @ 10:397c178c5b98
Make it array-based.
author | David Barts <n5jrn@me.com> |
---|---|
date | Fri, 27 Dec 2019 11:26:00 -0800 |
parents | |
children | ab7d6e908034 |
line wrap: on
line source
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # I m p o r t s import array import codecs import collections import struct import sys # C l a s s e s class Runes(object): """ A mutable, fixed-length sequence of UTF-16 runes. The attributes encoding and codec contain the name of the encoding and the codec used to generate the UTF-16. The attribute buffer contains the buffer (an array of 16-bit unsigned integers) used to back this object; modifications to that array will be reflected in this object. """ # The most efficient 16-bit one on this platform encoding = "UTF-16" + sys.byteorder[0].upper() + "E" codec = codecs.lookup(encoding) def __init__(self, based_on=None): if isinstance(based_on, array.array): if based_on.typecode == 'H': self.buffer = based_on else: self.buffer = array.array('H', based_on) elif isinstance(based_on, str): # A string should always be able to encode to runes. self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0]) elif based_on is None: self.buffer = array.array('H', bytes()) else: self.buffer = array.array('H', based_on) def __str__(self): """ Convert this object to a string. We deliberately do not have a __repr__ method, to underscore that runes are not strings. """ # Runes might not always be able to decode to a string. return self.codec.decode(self.buffer, 'replace')[0] def __bytes__(self): return bytes(self.buffer) def __len__(self): return len(self.buffer) def __lt__(self, other): return self.buffer < other.buffer def __le__(self, other): return self.buffer <= other.buffer def __gt__(self, other): return self.buffer > other.buffer def __ge__(self, other): return self.buffer >= other.buffer def __eq__(self, other): return self.buffer == other.buffer def __ne__(self, other): return self.buffer != other.buffer def __hash__(self): return hash(self.buffer) def __bool__(self): return bool(self.buffer) def __getitem__(self, key): ret = self.buffer[key] if isinstance(ret, array.array): return Runes(ret) else: return ret def __setitem__(self, key, value): if isinstance(key, int): if isinstance(value, int): self.buffer[key] = value else: raise TypeError("integer required") elif isinstance(value, Runes): self.buffer[key] = value.buffer else: raise TypeError("runes required") def __delitem__(self, key): del self.buffer[key] def clear(self): del self[:] def __iter__(self): return iter(self.buffer) def __reversed__(self): return reversed(self.buffer) def append(self, value): if isinstance(value, int): self.buffer.append(value) elif isinstance(value, Runes): self.buffer.extend(value.buffer) else: raise TypeError("integer or runes required") def __contains__(self, value): return value in self.buffer def index(self, value): return self.buffer.index(value) def find(self, value): try: return self.index(value) except ValueError: return -1 class Workspace(Runes): """ A Runes object that acts a bit more string-like, in that __setitem__ also accepts a string as an argument and __getitem__ always returns a string. We also return empty strings instead of throwing IndexError when attempting to read out-of-range values, because that makes life easier for us when curling quotes. """ def __setitem__(self, key, value): if isinstance(value, str): if isinstance(key, int): Runes.__setitem__(self, key, self._ord(value)) else: Runes.__setitem__(self, key, Runes(value)) else: Runes.__setitem__(self, key, value) def __getitem__(self, key): try: ret = Runes.__getitem__(self, key) if isinstance (ret, int): return chr(ret) elif isinstance(ret, Runes): return str(ret) else: raise AssertionError("this shouldn't happen") except IndexError: return "" def append(self, value): if isinstance(value, str): Runes.append(self, Runes(value)) else: Runes.append(self, value) def index(self, value): if isinstance(value, str): return Runes.index(self, self._ord(value)) else: return Runes.index(self, value) def find(self, value): try: return self.index(value) except ValueError: return -1 def _ord(self, string): length = len(string) if length != 1: raise ValueError("expected a character, but string of length {0} found".format(length)) raw = Runes(string) if len(raw) != 1: raise ValueError("character not in BMP") return raw[0]