changeset 6:da3fb2312c88

Leave bodies of <pre> tags alone.
author David Barts <n5jrn@me.com>
date Thu, 26 Dec 2019 20:38:37 -0800
parents d5198c7ec54d
children 9df9ff8cecde
files curlers.py
diffstat 1 files changed, 4 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/curlers.py	Thu Dec 26 20:24:32 2019 -0800
+++ b/curlers.py	Thu Dec 26 20:38:37 2019 -0800
@@ -21,13 +21,12 @@
     "'bout", "'nuff", "'round", "'cause" , "'em" ]
 
 # HTML tags that enclose raw data
-_RAW = set(["script", "style"])
+_RAW = set(["script", "style", "pre"])
 
 # HTML block elements
 _BLOCK = set([
-    "address", "blockquote", "div", "dl", "fieldset", "form", "h1",
-    "h2", "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "pre",
-    "table", "ul"
+    "address", "blockquote", "div", "dl", "fieldset", "form", "h1", "h2",
+    "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "table", "ul"
 ])
 
 # F u n c t i o n s
@@ -212,7 +211,7 @@
         # only a matching end tag gets us out of the raw state
         if ws[pos] == '<' and ws[pos:end].lower() == self._endtag and (not ws[end].isalnum()):
             self._ltpos = pos
-            self._state = self._seen_lt
+            self._state = self._norm if self._endtag == "</pre" else self._ltstate
 
     def _seen_ld(self):
         pos = self._pos