Mercurial > cgi-bin > hgweb.cgi > curlyq
comparison curlers.py @ 6:da3fb2312c88
Leave bodies of <pre> tags alone.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 20:38:37 -0800 |
parents | 7a83e82e65a6 |
children | 9df9ff8cecde |
comparison
equal
deleted
inserted
replaced
5:d5198c7ec54d | 6:da3fb2312c88 |
---|---|
19 # Words that start with an apostrophe. Cribbed from Wordpress. | 19 # Words that start with an apostrophe. Cribbed from Wordpress. |
20 _ASTART = [ "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", | 20 _ASTART = [ "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", |
21 "'bout", "'nuff", "'round", "'cause" , "'em" ] | 21 "'bout", "'nuff", "'round", "'cause" , "'em" ] |
22 | 22 |
23 # HTML tags that enclose raw data | 23 # HTML tags that enclose raw data |
24 _RAW = set(["script", "style"]) | 24 _RAW = set(["script", "style", "pre"]) |
25 | 25 |
26 # HTML block elements | 26 # HTML block elements |
27 _BLOCK = set([ | 27 _BLOCK = set([ |
28 "address", "blockquote", "div", "dl", "fieldset", "form", "h1", | 28 "address", "blockquote", "div", "dl", "fieldset", "form", "h1", "h2", |
29 "h2", "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "pre", | 29 "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "table", "ul" |
30 "table", "ul" | |
31 ]) | 30 ]) |
32 | 31 |
33 # F u n c t i o n s | 32 # F u n c t i o n s |
34 | 33 |
35 def uncurl(ws): | 34 def uncurl(ws): |
210 ws = self.workspace | 209 ws = self.workspace |
211 end = pos + len(self._endtag) | 210 end = pos + len(self._endtag) |
212 # only a matching end tag gets us out of the raw state | 211 # only a matching end tag gets us out of the raw state |
213 if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()): | 212 if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()): |
214 self._ltpos = pos | 213 self._ltpos = pos |
215 self._state = self._seen_lt | 214 self._state = self._norm if self._endtag == "</pre" else self._ltstate |
216 | 215 |
217 def _seen_ld(self): | 216 def _seen_ld(self): |
218 pos = self._pos | 217 pos = self._pos |
219 ws = self.workspace | 218 ws = self.workspace |
220 char = ws[pos] | 219 char = ws[pos] |