Mercurial > cgi-bin > hgweb.cgi > curlyq
comparison curlers.py @ 7:9df9ff8cecde
Undo that; ignoring <pre> is a sticky wicket.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 26 Dec 2019 20:56:38 -0800 |
parents | da3fb2312c88 |
children | 397c178c5b98 |
comparison
equal
deleted
inserted
replaced
6:da3fb2312c88 | 7:9df9ff8cecde |
---|---|
19 # Words that start with an apostrophe. Cribbed from Wordpress. | 19 # Words that start with an apostrophe. Cribbed from Wordpress. |
20 _ASTART = [ "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", | 20 _ASTART = [ "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", |
21 "'bout", "'nuff", "'round", "'cause" , "'em" ] | 21 "'bout", "'nuff", "'round", "'cause" , "'em" ] |
22 | 22 |
23 # HTML tags that enclose raw data | 23 # HTML tags that enclose raw data |
24 _RAW = set(["script", "style", "pre"]) | 24 _RAW = set(["script", "style"]) |
25 | 25 |
26 # HTML block elements | 26 # HTML block elements |
27 _BLOCK = set([ | 27 _BLOCK = set([ |
28 "address", "blockquote", "div", "dl", "fieldset", "form", "h1", "h2", | 28 "address", "blockquote", "div", "dl", "fieldset", "form", "h1", |
29 "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "table", "ul" | 29 "h2", "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "pre", |
30 "table", "ul" | |
30 ]) | 31 ]) |
31 | 32 |
32 # F u n c t i o n s | 33 # F u n c t i o n s |
33 | 34 |
34 def uncurl(ws): | 35 def uncurl(ws): |
209 ws = self.workspace | 210 ws = self.workspace |
210 end = pos + len(self._endtag) | 211 end = pos + len(self._endtag) |
211 # only a matching end tag gets us out of the raw state | 212 # only a matching end tag gets us out of the raw state |
212 if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()): | 213 if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()): |
213 self._ltpos = pos | 214 self._ltpos = pos |
214 self._state = self._norm if self._endtag == "</pre" else self._ltstate | 215 self._state = self._seen_lt |
215 | 216 |
216 def _seen_ld(self): | 217 def _seen_ld(self): |
217 pos = self._pos | 218 pos = self._pos |
218 ws = self.workspace | 219 ws = self.workspace |
219 char = ws[pos] | 220 char = ws[pos] |