view runes.py @ 10:397c178c5b98

Make it array-based.
author David Barts <n5jrn@me.com>
date Fri, 27 Dec 2019 11:26:00 -0800
parents
children ab7d6e908034
line wrap: on
line source

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# I m p o r t s

import array
import codecs
import collections
import struct
import sys

# C l a s s e s

class Runes(object):
    """
    A mutable, fixed-length sequence of UTF-16 runes. The attributes
    encoding and codec contain the name of the encoding and the codec
    used to generate the UTF-16. The attribute buffer contains the
    buffer (an array of 16-bit unsigned integers) used to back this
    object; modifications to that array will be reflected in this
    object.
    """
    # The most efficient 16-bit one on this platform
    encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
    codec = codecs.lookup(encoding)

    def __init__(self, based_on=None):
        if isinstance(based_on, array.array):
            if based_on.typecode == 'H':
                self.buffer = based_on
            else:
                self.buffer = array.array('H', based_on)
        elif isinstance(based_on, str):
            # A string should always be able to encode to runes.
            self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0])
        elif based_on is None:
            self.buffer = array.array('H', bytes())
        else:
            self.buffer = array.array('H', based_on)

    def __str__(self):
        """
        Convert this object to a string. We deliberately do not have a
        __repr__ method, to underscore that runes are not strings.
        """
        # Runes might not always be able to decode to a string.
        return self.codec.decode(self.buffer, 'replace')[0]

    def __bytes__(self):
        return bytes(self.buffer)

    def __len__(self):
        return len(self.buffer)

    def __lt__(self, other):
        return self.buffer < other.buffer

    def __le__(self, other):
        return self.buffer <= other.buffer

    def __gt__(self, other):
        return self.buffer > other.buffer

    def __ge__(self, other):
        return self.buffer >= other.buffer

    def __eq__(self, other):
        return self.buffer == other.buffer

    def __ne__(self, other):
        return self.buffer != other.buffer

    def __hash__(self):
        return hash(self.buffer)

    def __bool__(self):
        return bool(self.buffer)

    def __getitem__(self, key):
        ret = self.buffer[key]
        if isinstance(ret, array.array):
            return Runes(ret)
        else:
            return ret

    def __setitem__(self, key, value):
        if isinstance(key, int):
            if isinstance(value, int):
                self.buffer[key] = value
            else:
                raise TypeError("integer required")
        elif isinstance(value, Runes):
            self.buffer[key] = value.buffer
        else:
            raise TypeError("runes required")

    def __delitem__(self, key):
        del self.buffer[key]

    def clear(self):
        del self[:]

    def __iter__(self):
        return iter(self.buffer)

    def __reversed__(self):
        return reversed(self.buffer)

    def append(self, value):
        if isinstance(value, int):
            self.buffer.append(value)
        elif isinstance(value, Runes):
            self.buffer.extend(value.buffer)
        else:
            raise TypeError("integer or runes required")

    def __contains__(self, value):
        return value in self.buffer

    def index(self, value):
        return self.buffer.index(value)

    def find(self, value):
        try:
            return self.index(value)
        except ValueError:
            return -1

class Workspace(Runes):
    """
    A Runes object that acts a bit more string-like, in that __setitem__
    also accepts a string as an argument and __getitem__ always returns
    a string. We also return empty strings instead of throwing IndexError
    when attempting to read out-of-range values, because that makes life
    easier for us when curling quotes.
    """
    def __setitem__(self, key, value):
        if isinstance(value, str):
            if isinstance(key, int):
                Runes.__setitem__(self, key, self._ord(value))
            else:
                Runes.__setitem__(self, key, Runes(value))
        else:
            Runes.__setitem__(self, key, value)

    def __getitem__(self, key):
        try:
            ret = Runes.__getitem__(self, key)
            if isinstance (ret, int):
                return chr(ret)
            elif isinstance(ret, Runes):
                return str(ret)
            else:
                raise AssertionError("this shouldn't happen")
        except IndexError:
            return ""

    def append(self, value):
        if isinstance(value, str):
            Runes.append(self, Runes(value))
        else:
            Runes.append(self, value)

    def index(self, value):
        if isinstance(value, str):
            return Runes.index(self, self._ord(value))
        else:
            return Runes.index(self, value)

    def find(self, value):
        try:
            return self.index(value)
        except ValueError:
            return -1

    def _ord(self, string):
        length = len(string)
        if length != 1:
            raise ValueError("expected a character, but string of length {0} found".format(length))
        raw = Runes(string)
        if len(raw) != 1:
            raise ValueError("character not in BMP")
        return raw[0]