diff runes.py @ 10:397c178c5b98

Make it array-based.
author David Barts <n5jrn@me.com>
date Fri, 27 Dec 2019 11:26:00 -0800
parents
children ab7d6e908034
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/runes.py	Fri Dec 27 11:26:00 2019 -0800
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# I m p o r t s
+
+import array
+import codecs
+import collections
+import struct
+import sys
+
+# C l a s s e s
+
+class Runes(object):
+    """
+    A mutable, fixed-length sequence of UTF-16 runes. The attributes
+    encoding and codec contain the name of the encoding and the codec
+    used to generate the UTF-16. The attribute buffer contains the
+    buffer (an array of 16-bit unsigned integers) used to back this
+    object; modifications to that array will be reflected in this
+    object.
+    """
+    # The most efficient 16-bit one on this platform
+    encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
+    codec = codecs.lookup(encoding)
+
+    def __init__(self, based_on=None):
+        if isinstance(based_on, array.array):
+            if based_on.typecode == 'H':
+                self.buffer = based_on
+            else:
+                self.buffer = array.array('H', based_on)
+        elif isinstance(based_on, str):
+            # A string should always be able to encode to runes.
+            self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0])
+        elif based_on is None:
+            self.buffer = array.array('H', bytes())
+        else:
+            self.buffer = array.array('H', based_on)
+
+    def __str__(self):
+        """
+        Convert this object to a string. We deliberately do not have a
+        __repr__ method, to underscore that runes are not strings.
+        """
+        # Runes might not always be able to decode to a string.
+        return self.codec.decode(self.buffer, 'replace')[0]
+
+    def __bytes__(self):
+        return bytes(self.buffer)
+
+    def __len__(self):
+        return len(self.buffer)
+
+    def __lt__(self, other):
+        return self.buffer < other.buffer
+
+    def __le__(self, other):
+        return self.buffer <= other.buffer
+
+    def __gt__(self, other):
+        return self.buffer > other.buffer
+
+    def __ge__(self, other):
+        return self.buffer >= other.buffer
+
+    def __eq__(self, other):
+        return self.buffer == other.buffer
+
+    def __ne__(self, other):
+        return self.buffer != other.buffer
+
+    def __hash__(self):
+        return hash(self.buffer)
+
+    def __bool__(self):
+        return bool(self.buffer)
+
+    def __getitem__(self, key):
+        ret = self.buffer[key]
+        if isinstance(ret, array.array):
+            return Runes(ret)
+        else:
+            return ret
+
+    def __setitem__(self, key, value):
+        if isinstance(key, int):
+            if isinstance(value, int):
+                self.buffer[key] = value
+            else:
+                raise TypeError("integer required")
+        elif isinstance(value, Runes):
+            self.buffer[key] = value.buffer
+        else:
+            raise TypeError("runes required")
+
+    def __delitem__(self, key):
+        del self.buffer[key]
+
+    def clear(self):
+        del self[:]
+
+    def __iter__(self):
+        return iter(self.buffer)
+
+    def __reversed__(self):
+        return reversed(self.buffer)
+
+    def append(self, value):
+        if isinstance(value, int):
+            self.buffer.append(value)
+        elif isinstance(value, Runes):
+            self.buffer.extend(value.buffer)
+        else:
+            raise TypeError("integer or runes required")
+
+    def __contains__(self, value):
+        return value in self.buffer
+
+    def index(self, value):
+        return self.buffer.index(value)
+
+    def find(self, value):
+        try:
+            return self.index(value)
+        except ValueError:
+            return -1
+
+class Workspace(Runes):
+    """
+    A Runes object that acts a bit more string-like, in that __setitem__
+    also accepts a string as an argument and __getitem__ always returns
+    a string. We also return empty strings instead of throwing IndexError
+    when attempting to read out-of-range values, because that makes life
+    easier for us when curling quotes.
+    """
+    def __setitem__(self, key, value):
+        if isinstance(value, str):
+            if isinstance(key, int):
+                Runes.__setitem__(self, key, self._ord(value))
+            else:
+                Runes.__setitem__(self, key, Runes(value))
+        else:
+            Runes.__setitem__(self, key, value)
+
+    def __getitem__(self, key):
+        try:
+            ret = Runes.__getitem__(self, key)
+            if isinstance (ret, int):
+                return chr(ret)
+            elif isinstance(ret, Runes):
+                return str(ret)
+            else:
+                raise AssertionError("this shouldn't happen")
+        except IndexError:
+            return ""
+
+    def append(self, value):
+        if isinstance(value, str):
+            Runes.append(self, Runes(value))
+        else:
+            Runes.append(self, value)
+
+    def index(self, value):
+        if isinstance(value, str):
+            return Runes.index(self, self._ord(value))
+        else:
+            return Runes.index(self, value)
+
+    def find(self, value):
+        try:
+            return self.index(value)
+        except ValueError:
+            return -1
+
+    def _ord(self, string):
+        length = len(string)
+        if length != 1:
+            raise ValueError("expected a character, but string of length {0} found".format(length))
+        raw = Runes(string)
+        if len(raw) != 1:
+            raise ValueError("character not in BMP")
+        return raw[0]