comparison src/name/blackcap/clipman/Coerce.kt @ 31:0c6c18a733b7

Compiles, new menu still a mess.
author David Barts <n5jrn@me.com>
date Thu, 30 Jan 2020 16:01:51 -0800
parents
children
comparison
equal deleted inserted replaced
30:0e88c6bed11e 31:0c6c18a733b7
1 /*
2 * Font coercion. Would be easier if we didn't have to rummage through
3 * a doc tree and could use SAX-style callbacks instead. Would run faster
4 * if we didn't have a doc tree. Would use less memory if we didn't have
5 * a doc tree. But all HTML parsers these days force a doc tree on you.
6 * Sigh.
7 */
8 package name.blackcap.clipman
9
10 import org.jsoup.Jsoup
11 import org.jsoup.nodes.*
12 import java.util.Formatter
13 import java.util.logging.Level
14 import java.util.logging.Logger
15
16 /**
17 * Coerce fonts in HTML document.
18 * @param uncoerced String HTML document in
19 * @param pFamily String proportionally-spaced font family
20 * @param pSize Float proportionally-spaced font size
21 * @param mFamily String monospaced font family
22 * @param mSize Float monospaced font size
23 * @return HTML document with coerced fonts
24 */
25 fun coerceHTML(uncoerced: String, pFamily: String, pSize: Float, mFamily: String, mSize: Float): String {
26 val doc = Jsoup.parse(uncoerced)
27
28 /* apply standard scrubbing */
29 val head = _scrub(doc)
30
31 /* remove all stylesheets (and their content) and references to same */
32 doc.select("link[rel=stylesheet],style").remove()
33
34 /* add a style sheet of our own */
35 head?.appendElement("style")?.appendChild(DataNode(Formatter().run {
36 format("%nbody { font-family: \"%s\"; font-size: %.2f; }%n",
37 pFamily, pSize)
38 format("code, kbd, pre, samp, tt { font-family: \"%s\"; font-size: %.2f; }%n",
39 mFamily, mSize)
40 toString()
41 }))
42
43 /* remove all styling tags, but keep their content */
44 doc.select("basefont,big,div,font,small,span").forEach { discardTag ->
45 discardTag.getChildrenAsArray().forEach {
46 discardTag.before(it)
47 }
48 discardTag.remove()
49 }
50
51 /* remove all styling attributes */
52 val hitList = arrayOf("bgcolor", "class", "color", "style")
53 val selector = hitList.joinToString(prefix = "[", separator = "],[", postfix = "]")
54 doc.select(selector).forEach {
55 it.removeAll(*hitList)
56 }
57
58 /* remove body styling attributes */
59 doc.selectFirst(":root>body")?.removeAll("background", "text", "link", "alink", "vlink")
60
61 /* clean up horizontal rules */
62 doc.select("hr").forEach {
63 it.removeAll("shade", "noshade")
64 }
65
66 /* that's all! */
67 return _output(doc)
68 }
69
70 /**
71 * "Scrub" an HTML document (make it Java and clipboard-friendly)
72 * @param unscrubbed String HTML document in
73 * @return scrubbed HTML document
74 */
75 fun scrub(unscrubbed: String): String {
76 val doc = Jsoup.parse(unscrubbed)
77 _scrub(doc)
78 return _output(doc)
79 }
80
81 private fun _scrub(doc: Document): Element? {
82 /* remove any doctype or XML declarations */
83 doc.getChildrenAsArray().forEach {
84 if (it is DocumentType || it is XmlDeclaration) {
85 it.remove()
86 }
87 }
88
89 /* remove all non-HTML-4.01 attributes from <html> */
90 doc.attributes().map { it.key } .forEach {
91 if (it !in setOf("lang", "dir")) {
92 doc.removeAttr(it)
93 }
94 }
95
96 /* remove any conflicting charset declarations */
97 doc.select("meta[http-equiv=content-type],meta[charset]").remove()
98
99 /* add a charset declaration of our own */
100 val head = doc.selectFirst(":root>head")
101 if (head == null) {
102 LOGGER.log(Level.SEVERE, "no head found!")
103 return null
104 }
105 head.prependElement("meta").run {
106 attr("http-equiv", "Content-Type")
107 attr("content", "text/html; charset=" + CHARSET_NAME)
108 }
109 return head
110 }
111
112 private fun _output(doc: Document): String {
113 doc.outputSettings()
114 .charset(CHARSET_NAME)
115 .syntax(Document.OutputSettings.Syntax.xml);
116 return doc.outerHtml()
117 }
118
119 /**
120 * Turns node list to array. Needed to avoid ConcurrentModificationException.
121 * Ah, the joys of the doc tree.
122 * @return Array<Node>
123 */
124 fun Node.getChildrenAsArray(): Array<Node> = childNodes().run {
125 Array<Node>(size) { get(it) }
126 }
127
128 /**
129 * Remove all specified attributes from the element.
130 */
131 fun Element.removeAll(vararg hitList: String) {
132 hitList.forEach {
133 if (hasAttr(it)) {
134 removeAttr(it)
135 }
136 }
137 }