Mercurial > cgi-bin > hgweb.cgi > PassMan
diff src/main/kotlin/name/blackcap/passman/Shplitter.kt @ 19:7d80cbcb67bb
add shlex-style splitter and tests
author | David Barts <n5jrn@me.com> |
---|---|
date | Sun, 30 Jun 2024 20:37:36 -0700 |
parents | |
children | 4391afcf6bd0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/main/kotlin/name/blackcap/passman/Shplitter.kt Sun Jun 30 20:37:36 2024 -0700 @@ -0,0 +1,139 @@ +package name.blackcap.passman + +// A state is represented by a state-executing function (see below). +typealias State = (Char) -> Boolean + +// Support for simplified *nix shell style splitting into tokens. We are +// focused on splitting things into tokens only. No variable expansion, +// no ~home expansion, etc. Backslash quotes the next character. Single +// quotes quote everything literally up to the next closing single quote. +// Double quotes quote to the closing double quote but honor backslashes +// (i.e. "\"\nice\"" -> "nice"). No \t, \n, \r etc. backslash escapes +// supported (KISS). +class Shplitter() { + private val QUOTING = setOf<State>(::inSingle, ::inDouble) + private val WHITESPACE = setOf<Char>(' ', '\t', '\n') + private var oldStates = mutableListOf<State>() + private var state: State = ::space + private var accum = mutableListOf<String>() + private var current = StringBuilder() + + val complete: Boolean + get() = state == ::space || state == ::nonspace + + // Feeds more input into this tokenizer + fun feed(input: String) : Unit { + for (ch in input) { + while (state(ch)) + ; + } + } + + fun split(): Iterable<String> { + if (complete) { + if (current.isNotEmpty()) { + accum.add(current.toString()) + current.clear() + } + if (state == ::nonspace) { + popState() + } + return accum + } else { + throw IllegalStateException("incomplete quoted expression") + } + } + + // State transitions + + private fun pushState(newState: State): Unit { + oldStates.add(state) + state = newState + } + + private fun popState(): Unit { + state = oldStates.removeLast() + } + + private fun lastState(): State = oldStates.last() + + private fun endQuote(): Unit { + if (lastState() == ::space) { + accum.add(current.toString()) + current.clear() + } + popState() + } + + // States. A state is represented by a function that accepts the + // character currently being processed, and returns whether it should + // immediately transition to the next state without reading a new + // character. + + private fun space(ch: Char): Boolean = + when (ch) { + in WHITESPACE -> { false } + '\'' -> { pushState(::inSingle); false } + '"' -> { pushState(::inDouble); false } + '\\' -> { pushState(::backslash); false } + else -> { pushState(::nonspace); true } + } + + private fun nonspace(ch: Char): Boolean = + when (ch) { + in WHITESPACE -> { + accum.add(current.toString()) + current.clear() + popState() + false + } + '\'' -> { + pushState(::inSingle) + false + } + '"' -> { + pushState(::inDouble) + false + } + '\\' -> { + pushState(::backslash) + false + } + else -> { + current.append(ch) + false + } + } + + private fun inSingle(ch: Char): Boolean = + when (ch) { + '\'' -> { endQuote(); false } + else -> { current.append(ch); false } + } + + private fun inDouble(ch: Char): Boolean = + when (ch) { + '\\' -> { pushState(::backslash); false } + '"' -> { endQuote(); false } + else -> { current.append(ch); false } + } + + private fun backslash(ch: Char): Boolean { + val last = lastState() + if (ch == '\n' && last !in QUOTING) { + // if not quoting, \\n makes a normal whitespace out of command terminator + popState() + return true + } else if (last == ::space) { + // start a new unquoted string no matter what + current.append(ch) + state = ::nonspace + return false + } else { + // continue existing string no matter what + current.append(ch) + popState() + return false + } + } +}