comparison curlers.py @ 6:da3fb2312c88

Leave bodies of <pre> tags alone.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 20:38:37 -0800
parents 7a83e82e65a6
children 9df9ff8cecde
comparison
equal deleted inserted replaced
5:d5198c7ec54d 6:da3fb2312c88
19 # Words that start with an apostrophe. Cribbed from Wordpress. 19 # Words that start with an apostrophe. Cribbed from Wordpress.
20 _ASTART = [ "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", 20 _ASTART = [ "'tain't", "'twere", "'twas", "'tis", "'twill", "'til",
21 "'bout", "'nuff", "'round", "'cause" , "'em" ] 21 "'bout", "'nuff", "'round", "'cause" , "'em" ]
22 22
23 # HTML tags that enclose raw data 23 # HTML tags that enclose raw data
24 _RAW = set(["script", "style"]) 24 _RAW = set(["script", "style", "pre"])
25 25
26 # HTML block elements 26 # HTML block elements
27 _BLOCK = set([ 27 _BLOCK = set([
28 "address", "blockquote", "div", "dl", "fieldset", "form", "h1", 28 "address", "blockquote", "div", "dl", "fieldset", "form", "h1", "h2",
29 "h2", "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "pre", 29 "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "table", "ul"
30 "table", "ul"
31 ]) 30 ])
32 31
33 # F u n c t i o n s 32 # F u n c t i o n s
34 33
35 def uncurl(ws): 34 def uncurl(ws):
210 ws = self.workspace 209 ws = self.workspace
211 end = pos + len(self._endtag) 210 end = pos + len(self._endtag)
212 # only a matching end tag gets us out of the raw state 211 # only a matching end tag gets us out of the raw state
213 if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()): 212 if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()):
214 self._ltpos = pos 213 self._ltpos = pos
215 self._state = self._seen_lt 214 self._state = self._norm if self._endtag == "</pre" else self._ltstate
216 215
217 def _seen_ld(self): 216 def _seen_ld(self):
218 pos = self._pos 217 pos = self._pos
219 ws = self.workspace 218 ws = self.workspace
220 char = ws[pos] 219 char = ws[pos]