changeset 7:9df9ff8cecde

Undo that; ignoring <pre> is a sticky wicket.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 20:56:38 -0800
parents da3fb2312c88
children 05363e803272
files curlers.py
diffstat 1 files changed, 5 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/curlers.py	Thu Dec 26 20:38:37 2019 -0800
+++ b/curlers.py	Thu Dec 26 20:56:38 2019 -0800
@@ -21,12 +21,13 @@
     "'bout", "'nuff", "'round", "'cause" , "'em" ]
 
 # HTML tags that enclose raw data
-_RAW = set(["script", "style", "pre"])
+_RAW = set(["script", "style"])
 
 # HTML block elements
 _BLOCK = set([
-    "address", "blockquote", "div", "dl", "fieldset", "form", "h1", "h2",
-    "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "table", "ul"
+    "address", "blockquote", "div", "dl", "fieldset", "form", "h1",
+    "h2", "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "pre",
+    "table", "ul"
 ])
 
 # F u n c t i o n s
@@ -211,7 +212,7 @@
         # only a matching end tag gets us out of the raw state
         if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()):
             self._ltpos = pos
-            self._state = self._norm if self._endtag == "</pre" else self._ltstate
+            self._state = self._seen_lt
 
     def _seen_ld(self):
         pos = self._pos