diff src/main/kotlin/name/blackcap/passman/Shplitter.kt @ 19:7d80cbcb67bb

add shlex-style splitter and tests
author David Barts <n5jrn@me.com>
date Sun, 30 Jun 2024 20:37:36 -0700
parents
children 4391afcf6bd0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/kotlin/name/blackcap/passman/Shplitter.kt	Sun Jun 30 20:37:36 2024 -0700
@@ -0,0 +1,139 @@
+package name.blackcap.passman
+
+// A state is represented by a state-executing function (see below).
+typealias State = (Char) -> Boolean
+
+// Support for simplified *nix shell style splitting into tokens. We are
+// focused on splitting things into tokens only. No variable expansion,
+// no ~home expansion, etc. Backslash quotes the next character. Single
+// quotes quote everything literally up to the next closing single quote.
+// Double quotes quote to the closing double quote but honor backslashes
+// (i.e. "\"\nice\"" -> "nice"). No \t, \n, \r etc. backslash escapes
+// supported (KISS).
+class Shplitter() {
+    private val QUOTING = setOf<State>(::inSingle, ::inDouble)
+    private val WHITESPACE = setOf<Char>(' ', '\t', '\n')
+    private var oldStates = mutableListOf<State>()
+    private var state: State = ::space
+    private var accum = mutableListOf<String>()
+    private var current = StringBuilder()
+
+    val complete: Boolean
+        get() = state == ::space || state == ::nonspace
+
+    // Feeds more input into this tokenizer
+    fun feed(input: String) : Unit {
+        for (ch in input) {
+            while (state(ch))
+                ;
+        }
+    }
+
+    fun split(): Iterable<String> {
+        if (complete) {
+            if (current.isNotEmpty()) {
+                accum.add(current.toString())
+                current.clear()
+            }
+            if (state == ::nonspace) {
+                popState()
+            }
+            return accum
+        } else {
+            throw IllegalStateException("incomplete quoted expression")
+        }
+    }
+
+    // State transitions
+
+    private fun pushState(newState: State): Unit {
+        oldStates.add(state)
+        state = newState
+    }
+
+    private fun popState(): Unit {
+        state = oldStates.removeLast()
+    }
+
+    private fun lastState(): State = oldStates.last()
+
+    private fun endQuote(): Unit {
+        if (lastState() == ::space) {
+            accum.add(current.toString())
+            current.clear()
+        }
+        popState()
+    }
+
+    // States. A state is represented by a function that accepts the
+    // character currently being processed, and returns whether it should
+    // immediately transition to the next state without reading a new
+    // character.
+
+    private fun space(ch: Char): Boolean =
+        when (ch) {
+            in WHITESPACE -> { false }
+            '\'' -> { pushState(::inSingle); false }
+            '"' -> { pushState(::inDouble); false }
+            '\\' -> { pushState(::backslash); false }
+            else -> { pushState(::nonspace); true }
+        }
+
+    private fun nonspace(ch: Char): Boolean  =
+        when (ch) {
+            in WHITESPACE -> {
+                accum.add(current.toString())
+                current.clear()
+                popState()
+                false
+            }
+            '\'' -> {
+                pushState(::inSingle)
+                false
+            }
+            '"' -> {
+                pushState(::inDouble)
+                false
+            }
+            '\\' -> {
+                pushState(::backslash)
+                false
+            }
+            else -> {
+                current.append(ch)
+                false
+            }
+        }
+
+    private fun inSingle(ch: Char): Boolean =
+        when (ch) {
+            '\'' -> { endQuote(); false }
+            else -> { current.append(ch); false }
+        }
+
+    private fun inDouble(ch: Char): Boolean =
+        when (ch) {
+            '\\' -> { pushState(::backslash); false }
+            '"' -> { endQuote(); false }
+            else -> { current.append(ch); false }
+        }
+
+    private fun backslash(ch: Char): Boolean {
+        val last = lastState()
+        if (ch == '\n' && last !in QUOTING) {
+            // if not quoting, \\n makes a normal whitespace out of command terminator
+            popState()
+            return true
+        } else if (last == ::space) {
+            // start a new unquoted string no matter what
+            current.append(ch)
+            state = ::nonspace
+            return false
+        } else {
+            // continue existing string no matter what
+            current.append(ch)
+            popState()
+            return false
+        }
+    }
+}