Mercurial > cgi-bin > hgweb.cgi > curlyq
view writer.py @ 0:984876b6a095
Initial commit of first two classes.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 08:09:11 -0800 |
parents | |
children | 091c03f1b2e8 |
line wrap: on
line source
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # A simple HTML writer, so we can process HTML in a streamwise fashion # via callbacks, which compared to a document tree tends to be all of: # easier to program, uses less memory, and uses less processor time. # I m p o r t s import os, sys import codecs import io import html # V a r i a b l e s # We only support ASCII, ISO-8859-1, and anything capable of encoding # the full Unicode set. Anything else is too sticky a wicket to want # to mess with. _CODECS_TO_NAME = {} for i in [ "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE", "UTF-32", "UTF-32LE", "UTF-32BE" ]: _CODECS_TO_NAME[codecs.lookup(i)] = i del i _MAXCHAR = { "US-ASCII": 127, "ISO-8859-1": 255 } # There are WAY more HTML entities than this, but we're pessimistic about # what browsers "in the wild" support, so we stick to what XML supports. _QUOTE_ENTITIES = { "\"": "quot", "'": "apos" } _OTHER_ENTITIES = { "&": "amp", "<": "lt", ">": "gt" } # C l a s s e s class HtmlStreamWriter(object): """ A simple HTML writer, intended to be used in a streamwise fashion. This class takes REASONABLE precautions against writing garbage, but does not check every last thing. It will happily write tags like "<garb<<age>>>" etc. if you feed it the right garbage in. """ def __init__(self, stream, encoding): """ Initialize this writer. An encoding is mandatory, even though we produce character output, because the encoding governs which characters we can send on for I/O without entity-escaping them. The supplied stream should be buffered or performance will suffer. """ # Stream we're using is available to the caller as .stream self.stream = stream try: # A codec to use is available to the caller as .codec self.codec = codecs.lookup(encoding) # Normalized encoding name is available to the caller as .encoding self.encoding = _CODECS_TO_NAME[self.codec] except (KeyError, LookupError) as e: raise ValueError("invalid encoding {0!r}".format(encoding)) self._maxchar = _MAXCHAR.get(self.encoding, 0x7fffffff) # html.escape drops the ball, badly. It is too optimistic about what # entity names are likely to be understood, and is too stupid to know # that ASCII streams need lots of things escaped. def _escape(self, string, quote=False): for ch in string: entity = None if quote and ch in _QUOTE_ENTITIES: entity = _QUOTE_ENTITIES[ch] elif ch in _OTHER_ENTITIES: entity = _OTHER_ENTITIES[ch] if entity: self.stream.write("&") self.stream.write(entity) self.stream.write(";") continue ordch = ord(ch) if ordch > self._maxchar: self.stream.write("&#") self.stream.write(str(ordch)) self.stream.write(";") continue self.stream.write(ch) def write_starttag(self, tag, attrs): """ Write a start tag. """ self.stream.write("<") self.stream.write(tag) self._writeattrs(attrs) self.stream.write(">") def _writeattrs(self, attrs): for k, v in attrs: self.stream.write(" ") self.stream.write(k) self.stream.write("=\"") self._escape(v, quote=True) self.stream.write("\"") def write_endtag(self, tag): """ Write an end tag. """ self.stream.write("</") self.stream.write(tag) self.stream.write(">") def write_startendtag(self, tag, attrs): """ Write a "start-end" (i.e. empty) tag. """ self.stream.write("<") self.stream.write(tag) self._writeattrs(attrs) self.stream.write("/>") def write_data(self, data): """ Write text data. """ self._escape(data) def write_raw_data(self, data): """ Write raw data (e.g. style sheets, scripts, etc.) """ self.stream.write(data) def write_charref(self, name): """ Write character reference (normally not needed). """ is_number = False try: junk = int(name) is_number = True except ValueError: pass if name.startswith("x"): try: junk = int(name[1:], 16) is_number = True except ValueError: pass self.stream.write("&") if is_number: self.stream.write("#") self.stream.write(name) else: self.stream.write(name) self.stream.write(";") def write_comment(self, data): """ Write a comment. """ self.stream.write("<!--") self.stream.write(data) self.stream.write("-->") def write_decl(self, decl): """ Write a declarationm. """ self.stream.write("<!") self.stream.write(decl) self.stream.write(">") def write_pi(self, data): self.stream.write("<?") self.stream.write(decl) self.stream.write(">")