Mercurial > cgi-bin > hgweb.cgi > ClipMan
comparison src/name/blackcap/clipman/Coerce.kt @ 31:0c6c18a733b7
Compiles, new menu still a mess.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 30 Jan 2020 16:01:51 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
30:0e88c6bed11e | 31:0c6c18a733b7 |
---|---|
1 /* | |
2 * Font coercion. Would be easier if we didn't have to rummage through | |
3 * a doc tree and could use SAX-style callbacks instead. Would run faster | |
4 * if we didn't have a doc tree. Would use less memory if we didn't have | |
5 * a doc tree. But all HTML parsers these days force a doc tree on you. | |
6 * Sigh. | |
7 */ | |
8 package name.blackcap.clipman | |
9 | |
10 import org.jsoup.Jsoup | |
11 import org.jsoup.nodes.* | |
12 import java.util.Formatter | |
13 import java.util.logging.Level | |
14 import java.util.logging.Logger | |
15 | |
16 /** | |
17 * Coerce fonts in HTML document. | |
18 * @param uncoerced String HTML document in | |
19 * @param pFamily String proportionally-spaced font family | |
20 * @param pSize Float proportionally-spaced font size | |
21 * @param mFamily String monospaced font family | |
22 * @param mSize Float monospaced font size | |
23 * @return HTML document with coerced fonts | |
24 */ | |
25 fun coerceHTML(uncoerced: String, pFamily: String, pSize: Float, mFamily: String, mSize: Float): String { | |
26 val doc = Jsoup.parse(uncoerced) | |
27 | |
28 /* apply standard scrubbing */ | |
29 val head = _scrub(doc) | |
30 | |
31 /* remove all stylesheets (and their content) and references to same */ | |
32 doc.select("link[rel=stylesheet],style").remove() | |
33 | |
34 /* add a style sheet of our own */ | |
35 head?.appendElement("style")?.appendChild(DataNode(Formatter().run { | |
36 format("%nbody { font-family: \"%s\"; font-size: %.2f; }%n", | |
37 pFamily, pSize) | |
38 format("code, kbd, pre, samp, tt { font-family: \"%s\"; font-size: %.2f; }%n", | |
39 mFamily, mSize) | |
40 toString() | |
41 })) | |
42 | |
43 /* remove all styling tags, but keep their content */ | |
44 doc.select("basefont,big,div,font,small,span").forEach { discardTag -> | |
45 discardTag.getChildrenAsArray().forEach { | |
46 discardTag.before(it) | |
47 } | |
48 discardTag.remove() | |
49 } | |
50 | |
51 /* remove all styling attributes */ | |
52 val hitList = arrayOf("bgcolor", "class", "color", "style") | |
53 val selector = hitList.joinToString(prefix = "[", separator = "],[", postfix = "]") | |
54 doc.select(selector).forEach { | |
55 it.removeAll(*hitList) | |
56 } | |
57 | |
58 /* remove body styling attributes */ | |
59 doc.selectFirst(":root>body")?.removeAll("background", "text", "link", "alink", "vlink") | |
60 | |
61 /* clean up horizontal rules */ | |
62 doc.select("hr").forEach { | |
63 it.removeAll("shade", "noshade") | |
64 } | |
65 | |
66 /* that's all! */ | |
67 return _output(doc) | |
68 } | |
69 | |
70 /** | |
71 * "Scrub" an HTML document (make it Java and clipboard-friendly) | |
72 * @param unscrubbed String HTML document in | |
73 * @return scrubbed HTML document | |
74 */ | |
75 fun scrub(unscrubbed: String): String { | |
76 val doc = Jsoup.parse(unscrubbed) | |
77 _scrub(doc) | |
78 return _output(doc) | |
79 } | |
80 | |
81 private fun _scrub(doc: Document): Element? { | |
82 /* remove any doctype or XML declarations */ | |
83 doc.getChildrenAsArray().forEach { | |
84 if (it is DocumentType || it is XmlDeclaration) { | |
85 it.remove() | |
86 } | |
87 } | |
88 | |
89 /* remove all non-HTML-4.01 attributes from <html> */ | |
90 doc.attributes().map { it.key } .forEach { | |
91 if (it !in setOf("lang", "dir")) { | |
92 doc.removeAttr(it) | |
93 } | |
94 } | |
95 | |
96 /* remove any conflicting charset declarations */ | |
97 doc.select("meta[http-equiv=content-type],meta[charset]").remove() | |
98 | |
99 /* add a charset declaration of our own */ | |
100 val head = doc.selectFirst(":root>head") | |
101 if (head == null) { | |
102 LOGGER.log(Level.SEVERE, "no head found!") | |
103 return null | |
104 } | |
105 head.prependElement("meta").run { | |
106 attr("http-equiv", "Content-Type") | |
107 attr("content", "text/html; charset=" + CHARSET_NAME) | |
108 } | |
109 return head | |
110 } | |
111 | |
112 private fun _output(doc: Document): String { | |
113 doc.outputSettings() | |
114 .charset(CHARSET_NAME) | |
115 .syntax(Document.OutputSettings.Syntax.xml); | |
116 return doc.outerHtml() | |
117 } | |
118 | |
119 /** | |
120 * Turns node list to array. Needed to avoid ConcurrentModificationException. | |
121 * Ah, the joys of the doc tree. | |
122 * @return Array<Node> | |
123 */ | |
124 fun Node.getChildrenAsArray(): Array<Node> = childNodes().run { | |
125 Array<Node>(size) { get(it) } | |
126 } | |
127 | |
128 /** | |
129 * Remove all specified attributes from the element. | |
130 */ | |
131 fun Element.removeAll(vararg hitList: String) { | |
132 hitList.forEach { | |
133 if (hasAttr(it)) { | |
134 removeAttr(it) | |
135 } | |
136 } | |
137 } |