view src/main/kotlin/name/blackcap/passman/Shplitter.kt @ 19:7d80cbcb67bb

add shlex-style splitter and tests
author David Barts <n5jrn@me.com>
date Sun, 30 Jun 2024 20:37:36 -0700 (6 months ago)
parents
children 4391afcf6bd0
line wrap: on
line source
package name.blackcap.passman

// A state is represented by a state-executing function (see below).
typealias State = (Char) -> Boolean

// Support for simplified *nix shell style splitting into tokens. We are
// focused on splitting things into tokens only. No variable expansion,
// no ~home expansion, etc. Backslash quotes the next character. Single
// quotes quote everything literally up to the next closing single quote.
// Double quotes quote to the closing double quote but honor backslashes
// (i.e. "\"\nice\"" -> "nice"). No \t, \n, \r etc. backslash escapes
// supported (KISS).
class Shplitter() {
    private val QUOTING = setOf<State>(::inSingle, ::inDouble)
    private val WHITESPACE = setOf<Char>(' ', '\t', '\n')
    private var oldStates = mutableListOf<State>()
    private var state: State = ::space
    private var accum = mutableListOf<String>()
    private var current = StringBuilder()

    val complete: Boolean
        get() = state == ::space || state == ::nonspace

    // Feeds more input into this tokenizer
    fun feed(input: String) : Unit {
        for (ch in input) {
            while (state(ch))
                ;
        }
    }

    fun split(): Iterable<String> {
        if (complete) {
            if (current.isNotEmpty()) {
                accum.add(current.toString())
                current.clear()
            }
            if (state == ::nonspace) {
                popState()
            }
            return accum
        } else {
            throw IllegalStateException("incomplete quoted expression")
        }
    }

    // State transitions

    private fun pushState(newState: State): Unit {
        oldStates.add(state)
        state = newState
    }

    private fun popState(): Unit {
        state = oldStates.removeLast()
    }

    private fun lastState(): State = oldStates.last()

    private fun endQuote(): Unit {
        if (lastState() == ::space) {
            accum.add(current.toString())
            current.clear()
        }
        popState()
    }

    // States. A state is represented by a function that accepts the
    // character currently being processed, and returns whether it should
    // immediately transition to the next state without reading a new
    // character.

    private fun space(ch: Char): Boolean =
        when (ch) {
            in WHITESPACE -> { false }
            '\'' -> { pushState(::inSingle); false }
            '"' -> { pushState(::inDouble); false }
            '\\' -> { pushState(::backslash); false }
            else -> { pushState(::nonspace); true }
        }

    private fun nonspace(ch: Char): Boolean  =
        when (ch) {
            in WHITESPACE -> {
                accum.add(current.toString())
                current.clear()
                popState()
                false
            }
            '\'' -> {
                pushState(::inSingle)
                false
            }
            '"' -> {
                pushState(::inDouble)
                false
            }
            '\\' -> {
                pushState(::backslash)
                false
            }
            else -> {
                current.append(ch)
                false
            }
        }

    private fun inSingle(ch: Char): Boolean =
        when (ch) {
            '\'' -> { endQuote(); false }
            else -> { current.append(ch); false }
        }

    private fun inDouble(ch: Char): Boolean =
        when (ch) {
            '\\' -> { pushState(::backslash); false }
            '"' -> { endQuote(); false }
            else -> { current.append(ch); false }
        }

    private fun backslash(ch: Char): Boolean {
        val last = lastState()
        if (ch == '\n' && last !in QUOTING) {
            // if not quoting, \\n makes a normal whitespace out of command terminator
            popState()
            return true
        } else if (last == ::space) {
            // start a new unquoted string no matter what
            current.append(ch)
            state = ::nonspace
            return false
        } else {
            // continue existing string no matter what
            current.append(ch)
            popState()
            return false
        }
    }
}