comparison writer.py @ 0:984876b6a095

Initial commit of first two classes.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 08:09:11 -0800
parents
children 091c03f1b2e8
comparison
equal deleted inserted replaced
-1:000000000000 0:984876b6a095
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 # A simple HTML writer, so we can process HTML in a streamwise fashion
5 # via callbacks, which compared to a document tree tends to be all of:
6 # easier to program, uses less memory, and uses less processor time.
7
8 # I m p o r t s
9
10 import os, sys
11 import codecs
12 import io
13 import html
14
15 # V a r i a b l e s
16
17 # We only support ASCII, ISO-8859-1, and anything capable of encoding
18 # the full Unicode set. Anything else is too sticky a wicket to want
19 # to mess with.
20 _CODECS_TO_NAME = {}
21 for i in [ "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE", "UTF-32", "UTF-32LE", "UTF-32BE" ]:
22 _CODECS_TO_NAME[codecs.lookup(i)] = i
23 del i
24 _MAXCHAR = { "US-ASCII": 127, "ISO-8859-1": 255 }
25
26 # There are WAY more HTML entities than this, but we're pessimistic about
27 # what browsers "in the wild" support, so we stick to what XML supports.
28 _QUOTE_ENTITIES = {
29 "\"": "quot",
30 "'": "apos"
31 }
32 _OTHER_ENTITIES = {
33 "&": "amp",
34 "<": "lt",
35 ">": "gt"
36 }
37
38 # C l a s s e s
39
40 class HtmlStreamWriter(object):
41 """
42 A simple HTML writer, intended to be used in a streamwise fashion.
43 This class takes REASONABLE precautions against writing garbage, but
44 does not check every last thing. It will happily write tags like
45 "<garb<<age>>>" etc. if you feed it the right garbage in.
46 """
47 def __init__(self, stream, encoding):
48 """
49 Initialize this writer. An encoding is mandatory, even though we
50 produce character output, because the encoding governs which
51 characters we can send on for I/O without entity-escaping them.
52 The supplied stream should be buffered or performance will suffer.
53 """
54 # Stream we're using is available to the caller as .stream
55 self.stream = stream
56 try:
57 # A codec to use is available to the caller as .codec
58 self.codec = codecs.lookup(encoding)
59 # Normalized encoding name is available to the caller as .encoding
60 self.encoding = _CODECS_TO_NAME[self.codec]
61 except (KeyError, LookupError) as e:
62 raise ValueError("invalid encoding {0!r}".format(encoding))
63 self._maxchar = _MAXCHAR.get(self.encoding, 0x7fffffff)
64
65 # html.escape drops the ball, badly. It is too optimistic about what
66 # entity names are likely to be understood, and is too stupid to know
67 # that ASCII streams need lots of things escaped.
68 def _escape(self, string, quote=False):
69 for ch in string:
70 entity = None
71 if quote and ch in _QUOTE_ENTITIES:
72 entity = _QUOTE_ENTITIES[ch]
73 elif ch in _OTHER_ENTITIES:
74 entity = _OTHER_ENTITIES[ch]
75 if entity:
76 self.stream.write("&")
77 self.stream.write(entity)
78 self.stream.write(";")
79 continue
80 ordch = ord(ch)
81 if ordch > self._maxchar:
82 self.stream.write("&#")
83 self.stream.write(str(ordch))
84 self.stream.write(";")
85 continue
86 self.stream.write(ch)
87
88 def write_starttag(self, tag, attrs):
89 """
90 Write a start tag.
91 """
92 self.stream.write("<")
93 self.stream.write(tag)
94 self._writeattrs(attrs)
95 self.stream.write(">")
96
97 def _writeattrs(self, attrs):
98 for k, v in attrs:
99 self.stream.write(" ")
100 self.stream.write(k)
101 self.stream.write("=\"")
102 self._escape(v, quote=True)
103 self.stream.write("\"")
104
105 def write_endtag(self, tag):
106 """
107 Write an end tag.
108 """
109 self.stream.write("</")
110 self.stream.write(tag)
111 self.stream.write(">")
112
113 def write_startendtag(self, tag, attrs):
114 """
115 Write a "start-end" (i.e. empty) tag.
116 """
117 self.stream.write("<")
118 self.stream.write(tag)
119 self._writeattrs(attrs)
120 self.stream.write("/>")
121
122 def write_data(self, data):
123 """
124 Write text data.
125 """
126 self._escape(data)
127
128 def write_raw_data(self, data):
129 """
130 Write raw data (e.g. style sheets, scripts, etc.)
131 """
132 self.stream.write(data)
133
134 def write_charref(self, name):
135 """
136 Write character reference (normally not needed).
137 """
138 is_number = False
139 try:
140 junk = int(name)
141 is_number = True
142 except ValueError:
143 pass
144 if name.startswith("x"):
145 try:
146 junk = int(name[1:], 16)
147 is_number = True
148 except ValueError:
149 pass
150 self.stream.write("&")
151 if is_number:
152 self.stream.write("#")
153 self.stream.write(name)
154 else:
155 self.stream.write(name)
156 self.stream.write(";")
157
158 def write_comment(self, data):
159 """
160 Write a comment.
161 """
162 self.stream.write("<!--")
163 self.stream.write(data)
164 self.stream.write("-->")
165
166 def write_decl(self, decl):
167 """
168 Write a declarationm.
169 """
170 self.stream.write("<!")
171 self.stream.write(decl)
172 self.stream.write(">")
173
174 def write_pi(self, data):
175 self.stream.write("<?")
176 self.stream.write(decl)
177 self.stream.write(">")