view runes.py @ 17:0c8d787bc7e1

... but not paranoid enough.
author David Barts <n5jrn@me.com>
date Fri, 27 Dec 2019 15:53:22 -0800
parents 61772bf1f77c
children be0fd5c8121d
line wrap: on
line source

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# I m p o r t s

import array
import codecs
import collections
import struct
import sys

# C l a s s e s

class Runes(object):
    """
    A mutable, fixed-length sequence of UTF-16 runes. The attributes
    encoding and codec contain the name of the encoding and the codec
    used to generate the UTF-16. The attribute buffer contains the
    buffer (an array of 16-bit unsigned integers) used to back this
    object; modifications to that array will be reflected in this
    object.
    """
    # The most efficient 16-bit one on this platform
    encoding = "UTF-16" + sys.byteorder[0].upper() + "E"
    codec = codecs.lookup(encoding)

    def __init__(self, based_on=None):
        if isinstance(based_on, (array.array, memoryview)):
            format = based_on.typecode if isinstance(based_on, array.array) else based_on.format
            if format == 'H':
                self.buffer = based_on
            else:
                self.buffer = array.array('H', based_on)
        elif isinstance(based_on, str):
            # A string should always be able to encode to runes.
            self.buffer = array.array('H', self.codec.encode(based_on, 'strict')[0])
        elif based_on is None:
            self.buffer = array.array('H', bytes())
        elif isinstance(based_on, Runes):
            self.buffer = array.array('H', based_on.buffer)
        else:
            self.buffer = array.array('H', based_on)

    def __str__(self):
        """
        Convert this object to a string. We deliberately do not have a
        __repr__ method, to underscore that runes are not strings.
        """
        # Runes might not always be able to decode to a string.
        return self.codec.decode(self.buffer, 'replace')[0]

    def __bytes__(self):
        return bytes(self.buffer)

    def __len__(self):
        return len(self.buffer)

    def __lt__(self, other):
        return self.buffer < other.buffer

    def __le__(self, other):
        return self.buffer <= other.buffer

    def __gt__(self, other):
        return self.buffer > other.buffer

    def __ge__(self, other):
        return self.buffer >= other.buffer

    def __eq__(self, other):
        return self.buffer == other.buffer

    def __ne__(self, other):
        return self.buffer != other.buffer

    def __hash__(self):
        raise TypeError("unhashable type")

    def __bool__(self):
        return bool(self.buffer)

    def __getitem__(self, key):
        ret = self.buffer[key]
        if isinstance(ret, array.array):
            return Runes(ret)
        else:
            return ret

    def __setitem__(self, key, value):
        if isinstance(key, int):
            if isinstance(value, int):
                self.buffer[key] = value
            else:
                raise TypeError("integer required")
        elif isinstance(value, Runes):
            self.buffer[key] = value.buffer
        else:
            raise TypeError("runes required")

    def __delitem__(self, key):
        del self.buffer[key]

    def __del__(self):
        # Paranoid
        if hasattr(self, 'buffer') and isinstance(self.buffer, memoryview):
            self.buffer.release()

    def clear(self):
        del self[:]

    def __iter__(self):
        return iter(self.buffer)

    def __reversed__(self):
        return reversed(self.buffer)

    def append(self, value):
        if isinstance(value, int):
            self.buffer.append(value)
        elif isinstance(value, Runes):
            self.buffer.extend(value.buffer)
        else:
            raise TypeError("integer or runes required")

    def __contains__(self, value):
        return value in self.buffer

    def index(self, value):
        return self.buffer.index(value)

    def find(self, value):
        try:
            return self.index(value)
        except ValueError:
            return -1

class Workspace(Runes):
    """
    A Runes object that acts a bit more string-like, in that __setitem__
    also accepts a string as an argument and __getitem__ always returns
    a string. We also return empty strings instead of throwing IndexError
    when attempting to read out-of-range values, because that makes life
    easier for us when curling quotes.
    """
    def __setitem__(self, key, value):
        if isinstance(value, str):
            if isinstance(key, int):
                Runes.__setitem__(self, key, self._ord(value))
            else:
                Runes.__setitem__(self, key, Runes(value))
        else:
            Runes.__setitem__(self, key, value)

    def __getitem__(self, key):
        view = memoryview(self.buffer)
        try:
            result = view[key]
            if isinstance(result, int):
                return chr(result)
            if isinstance(result, memoryview):
                ret = self.codec.decode(result, 'replace')[0]
                result.release()
                return ret
            else:
                raise AssertionError("this shouldn't happen")
        except IndexError:
            return ""
        finally:
            view.release()

    def append(self, value):
        if isinstance(value, str):
            Runes.append(self, Runes(value))
        else:
            Runes.append(self, value)

    def index(self, value):
        if isinstance(value, str):
            return Runes.index(self, self._ord(value))
        else:
            return Runes.index(self, value)

    def find(self, value):
        try:
            return self.index(value)
        except ValueError:
            return -1

    def _ord(self, string):
        length = len(string)
        if length != 1:
            raise ValueError("expected a character, but string of length {0} found".format(length))
        raw = Runes(string)
        if len(raw) != 1:
            raise ValueError("character not in BMP")
        return raw[0]