changeset 19:7d80cbcb67bb

add shlex-style splitter and tests
author David Barts <n5jrn@me.com>
date Sun, 30 Jun 2024 20:37:36 -0700
parents 8f3ddebb4295
children 4391afcf6bd0
files pom.xml src/main/kotlin/name/blackcap/passman/Database.kt src/main/kotlin/name/blackcap/passman/Shplitter.kt src/test/kotlin/name/blackcap/passman/ShplitterTest.kt
diffstat 4 files changed, 277 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/pom.xml	Tue Apr 04 20:38:52 2023 -0700
+++ b/pom.xml	Sun Jun 30 20:37:36 2024 -0700
@@ -111,8 +111,20 @@
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter</artifactId>
+            <version>5.10.3</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
             <artifactId>junit-jupiter-engine</artifactId>
-            <version>5.8.2</version>
+            <version>5.10.3</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <version>5.10.3</version>
             <scope>test</scope>
         </dependency>
         <dependency>
@@ -128,7 +140,7 @@
         <dependency>
             <groupId>org.xerial</groupId>
             <artifactId>sqlite-jdbc</artifactId>
-            <version>3.36.0.3</version>
+            <version>3.41.2.2</version>
         </dependency>
         <dependency>
             <groupId>commons-cli</groupId>
@@ -138,7 +150,7 @@
         <dependency>
             <groupId>com.opencsv</groupId>
             <artifactId>opencsv</artifactId>
-            <version>5.5</version>
+            <version>5.9</version>
         </dependency>
     </dependencies>
 
--- a/src/main/kotlin/name/blackcap/passman/Database.kt	Tue Apr 04 20:38:52 2023 -0700
+++ b/src/main/kotlin/name/blackcap/passman/Database.kt	Sun Jun 30 20:37:36 2024 -0700
@@ -52,10 +52,10 @@
         private fun init(connection: Connection, masterPassword: CharArray): Encryption {
             try {
                 connection.createStatement().use { stmt ->
-                    stmt.executeUpdate("create table integers ( name string not null, value integer )")
-                    stmt.executeUpdate("create table reals ( name string not null, value integer )")
-                    stmt.executeUpdate("create table strings ( name string not null, value real )")
-                    stmt.executeUpdate("create table blobs ( name string not null, value blob )")
+                    stmt.executeUpdate("create table integers ( name text not null, value integer )")
+                    stmt.executeUpdate("create table reals ( name text not null, value real )")
+                    stmt.executeUpdate("create table strings ( name text not null, value text )")
+                    stmt.executeUpdate("create table blobs ( name text not null, value blob )")
                     stmt.executeUpdate(
                         "create table passwords (" +
                                 "id integer not null primary key, " +
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/kotlin/name/blackcap/passman/Shplitter.kt	Sun Jun 30 20:37:36 2024 -0700
@@ -0,0 +1,139 @@
+package name.blackcap.passman
+
+// A state is represented by a state-executing function (see below).
+typealias State = (Char) -> Boolean
+
+// Support for simplified *nix shell style splitting into tokens. We are
+// focused on splitting things into tokens only. No variable expansion,
+// no ~home expansion, etc. Backslash quotes the next character. Single
+// quotes quote everything literally up to the next closing single quote.
+// Double quotes quote to the closing double quote but honor backslashes
+// (i.e. "\"\nice\"" -> "nice"). No \t, \n, \r etc. backslash escapes
+// supported (KISS).
+class Shplitter() {
+    private val QUOTING = setOf<State>(::inSingle, ::inDouble)
+    private val WHITESPACE = setOf<Char>(' ', '\t', '\n')
+    private var oldStates = mutableListOf<State>()
+    private var state: State = ::space
+    private var accum = mutableListOf<String>()
+    private var current = StringBuilder()
+
+    val complete: Boolean
+        get() = state == ::space || state == ::nonspace
+
+    // Feeds more input into this tokenizer
+    fun feed(input: String) : Unit {
+        for (ch in input) {
+            while (state(ch))
+                ;
+        }
+    }
+
+    fun split(): Iterable<String> {
+        if (complete) {
+            if (current.isNotEmpty()) {
+                accum.add(current.toString())
+                current.clear()
+            }
+            if (state == ::nonspace) {
+                popState()
+            }
+            return accum
+        } else {
+            throw IllegalStateException("incomplete quoted expression")
+        }
+    }
+
+    // State transitions
+
+    private fun pushState(newState: State): Unit {
+        oldStates.add(state)
+        state = newState
+    }
+
+    private fun popState(): Unit {
+        state = oldStates.removeLast()
+    }
+
+    private fun lastState(): State = oldStates.last()
+
+    private fun endQuote(): Unit {
+        if (lastState() == ::space) {
+            accum.add(current.toString())
+            current.clear()
+        }
+        popState()
+    }
+
+    // States. A state is represented by a function that accepts the
+    // character currently being processed, and returns whether it should
+    // immediately transition to the next state without reading a new
+    // character.
+
+    private fun space(ch: Char): Boolean =
+        when (ch) {
+            in WHITESPACE -> { false }
+            '\'' -> { pushState(::inSingle); false }
+            '"' -> { pushState(::inDouble); false }
+            '\\' -> { pushState(::backslash); false }
+            else -> { pushState(::nonspace); true }
+        }
+
+    private fun nonspace(ch: Char): Boolean  =
+        when (ch) {
+            in WHITESPACE -> {
+                accum.add(current.toString())
+                current.clear()
+                popState()
+                false
+            }
+            '\'' -> {
+                pushState(::inSingle)
+                false
+            }
+            '"' -> {
+                pushState(::inDouble)
+                false
+            }
+            '\\' -> {
+                pushState(::backslash)
+                false
+            }
+            else -> {
+                current.append(ch)
+                false
+            }
+        }
+
+    private fun inSingle(ch: Char): Boolean =
+        when (ch) {
+            '\'' -> { endQuote(); false }
+            else -> { current.append(ch); false }
+        }
+
+    private fun inDouble(ch: Char): Boolean =
+        when (ch) {
+            '\\' -> { pushState(::backslash); false }
+            '"' -> { endQuote(); false }
+            else -> { current.append(ch); false }
+        }
+
+    private fun backslash(ch: Char): Boolean {
+        val last = lastState()
+        if (ch == '\n' && last !in QUOTING) {
+            // if not quoting, \\n makes a normal whitespace out of command terminator
+            popState()
+            return true
+        } else if (last == ::space) {
+            // start a new unquoted string no matter what
+            current.append(ch)
+            state = ::nonspace
+            return false
+        } else {
+            // continue existing string no matter what
+            current.append(ch)
+            popState()
+            return false
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/kotlin/name/blackcap/passman/ShplitterTest.kt	Sun Jun 30 20:37:36 2024 -0700
@@ -0,0 +1,119 @@
+package name.blackcap.passman
+
+import org.junit.jupiter.api.Assertions.assertFalse
+import org.junit.jupiter.api.Assertions.assertTrue
+import org.junit.jupiter.api.Test
+
+class ShplitterTest {
+    private lateinit var fixture: Shplitter
+
+    @Test
+    fun complete() {
+        val CASES = arrayOf<String>(
+            "unquoted",
+            "\"double quoted\"",
+            "'single quoted'",
+            "back\\ slash",
+            "in\\terior back slash",
+            "\"first\" word double quoted",
+            "last word double \"quoted\"",
+            "'first' word single quoted",
+            "last word single 'quoted'")
+
+        for (case in CASES) {
+            val fixture = Shplitter()
+            fixture.feed(case)
+            assertTrue(fixture.complete, "should be complete but is not: ${case}")
+        }
+    }
+
+    @Test
+    fun incomplete() {
+        val CASES = arrayOf<String>(
+            "ends with backslash\\",
+            "unclosed double \"quote",
+            "ends with double quote\"",
+            "unclosed single 'quote",
+            "ends with single quote'"
+        )
+        for (case in CASES) {
+            val fixture = Shplitter()
+            fixture.feed(case)
+            assertFalse(fixture.complete, "should not be complete but is: ${case}")
+        }
+    }
+
+    // Test cases cribbed from Python shlex source.
+    data class TestDatum(val from: String, val to: Array<String>)
+    val TEST_DATA_1 = arrayOf<TestDatum>(
+        TestDatum("foo bar", arrayOf<String>("foo", "bar")),
+        TestDatum(" foo bar", arrayOf<String>("foo", "bar")),
+        TestDatum(" foo bar ", arrayOf<String>("foo", "bar")),
+        TestDatum("foo\tbar\tbla\tfasel", arrayOf<String>("foo", "bar", "bla", "fasel")),
+        TestDatum("x y  z\t\txxxx", arrayOf<String>("x", "y", "z", "xxxx")),
+        TestDatum("\\x bar", arrayOf<String>("x", "bar")),
+        TestDatum("\\ x bar", arrayOf<String>(" x", "bar")),
+        TestDatum("\\ bar", arrayOf<String>(" bar")),
+        TestDatum("foo \\x bar", arrayOf<String>("foo", "x", "bar")),
+        TestDatum("foo \\ x bar", arrayOf<String>("foo", " x", "bar")),
+        TestDatum("foo \\ bar", arrayOf<String>("foo", " bar")),
+        TestDatum("foo \"bar\" bla", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("\"foo\" \"bar\" \"bla\"", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("\"foo\" bar \"bla\"", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("\"foo\" bar bla", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("foo 'bar' bla", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("'foo' 'bar' 'bla'", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("'foo' bar 'bla'", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("'foo' bar bla", arrayOf<String>("foo", "bar", "bla")),
+        TestDatum("blurb foo\"bar\"bar\"fasel\" baz", arrayOf<String>("blurb", "foobarbarfasel", "baz")),
+        TestDatum("blurb foo'bar'bar'fasel' baz", arrayOf<String>("blurb", "foobarbarfasel", "baz")),
+        TestDatum("\"\"", arrayOf<String>("")),
+        TestDatum("''", arrayOf<String>("")),
+        TestDatum("foo \"\" bar", arrayOf<String>("foo", "", "bar")),
+        TestDatum("foo '' bar", arrayOf<String>("foo", "", "bar")),
+        TestDatum("foo \"\" \"\" \"\" bar", arrayOf<String>("foo", "", "", "", "bar")),
+        TestDatum("foo '' '' '' bar", arrayOf<String>("foo", "", "", "", "bar")),
+        TestDatum("\"foo\\ bar\"", arrayOf<String>("foo bar")));
+    val TEST_DATA_2 = arrayOf<TestDatum>(
+        TestDatum("\"foo\\\\ bar\"", arrayOf<String>("foo\\ bar")),
+        TestDatum("\"foo\\\\ bar\\\\\"", arrayOf<String>("foo\\ bar\\")),
+        TestDatum("\"foo\\\\\" bar\\\"", arrayOf<String>("foo\\", "bar\"")),
+        TestDatum("\"foo\\\\ bar\" dfadf", arrayOf<String>("foo\\ bar", "dfadf")),
+        TestDatum("\"foo\\\\\\ bar\\\" dfadf\"", arrayOf<String>("foo\\ bar\" dfadf")),
+        TestDatum("\"foo\\\\\\x bar\" dfadf", arrayOf<String>("foo\\x bar", "dfadf")),
+        TestDatum("\"foo\\x bar\\\" dfadf\"", arrayOf<String>("foox bar\" dfadf")),
+        TestDatum("\"foo\\x\"", arrayOf<String>("foox")),
+        TestDatum("\"foo\\ \"", arrayOf<String>("foo ")),
+        TestDatum("foo\\ xx", arrayOf<String>("foo xx")),
+        TestDatum("foo\\ x\\x", arrayOf<String>("foo xx")),
+        TestDatum("foo\\ x\\x\\\"\"\"", arrayOf<String>("foo xx\"")),
+        TestDatum("\"foo\\ x\\x\"", arrayOf<String>("foo xx")),
+        TestDatum("\"foo\\ x\\x\\\\\"", arrayOf<String>("foo xx\\")),
+        TestDatum("\"foo\\ x\\x\\\\\"\"foobar\"", arrayOf<String>("foo xx\\foobar")),
+        TestDatum("\"foo\\ x\\x\\\\\"\\'\"foobar\"", arrayOf<String>("foo xx\\'foobar")),
+        TestDatum("\"foo\\ x\\x\\\\\"\\'\"fo'obar\" 'don'\\''t'", arrayOf<String>("foo xx\\'foobar\" don\\t")),
+        TestDatum("'foo\\ bar'", arrayOf<String>("foo\\ bar")),
+        TestDatum("'foo\\\\ bar'", arrayOf<String>("foo\\\\ bar")),
+        TestDatum("foo\\ bar", arrayOf<String>("foo bar")),
+        TestDatum("foo#bar\\nbaz", arrayOf<String>("foo#barnbaz")),
+        TestDatum(":-) ;-)", arrayOf<String>(":-)", ":-)")),
+        TestDatum("áéíóú", arrayOf<String>("áéíóú"))
+    )
+
+    fun runArray(testData: Array<TestDatum>) {
+        for (testDatum in testData) {
+            val s = Shplitter()
+            s.feed(testDatum.from)
+            assertTrue(s.complete, "${testDatum.from}: should be complete, is not")
+            val split = s.split().toList()
+            val expecting = testDatum.to.asList()
+            assertTrue(split == expecting, "${testDatum.from}: expected ${expecting}, got $split")
+        }
+    }
+
+    @Test
+    fun split() {
+        runArray(TEST_DATA_1)
+        runArray(TEST_DATA_2)
+    }
+}