add controlData to gitignore

fix up paths
more replacements
2025-10-09 11:26:23 -07:00 · 2025-10-09 11:25:18 -07:00 · 2025-10-09 09:31:19 -07:00 · 2025-10-07 12:52:56 -07:00 · 2025-09-29 14:41:44 -07:00 · 2025-09-29 08:52:07 -07:00
4 changed files with 396 additions and 142 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 zig-out/
 .zig-cache/
+controlData.json
--- a/build.zig
+++ b/build.zig
@ -16,6 +16,8 @@ pub fn build(b: *std.Build) !void {
    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
    // set a preferred release mode, allowing the user to decide how to optimize.
    const optimize = b.standardOptimizeOption(.{});
+
+    const long_tests = b.option(bool, "long-tests", "Run long-running tests") orelse false;
    // It's also possible to define more custom flags to toggle optional features
    // of this build script using `b.option()`. All defined flags (including
    // target and optimize options) will be listed when running `zig build --help`
@ -95,7 +97,6 @@ pub fn build(b: *std.Build) !void {
        .files = &.{
            "analyze-linkage.c",
            "and.c",
-            "api-example.c",
            "api.c",
            "build-disjuncts.c",
            "command-line.c",
@ -107,7 +108,6 @@ pub fn build(b: *std.Build) !void {
            "idiom.c",
            "linkset.c",
            "massage.c",
-            "parse.c",
            "post-process.c",
            "pp_knowledge.c",
            "pp_lexer.c",
@ -122,7 +122,6 @@ pub fn build(b: *std.Build) !void {
            "tokenize.c",
            "utilities.c",
            "word-file.c",
-            "www-parse.c",
        },
        .flags = &.{
            "-O2",
@ -149,6 +148,9 @@ pub fn build(b: *std.Build) !void {
        // which requires us to specify a target.
        .target = target,
    });
+    const options = b.addOptions();
+    options.addOption(bool, "long_tests", long_tests);
+    mod.addImport("build_options", options.createModule());
    mod.linkLibrary(lib);
    mod.addIncludePath(upstream.path("include"));

@ -197,8 +199,8 @@ pub fn build(b: *std.Build) !void {
    // Copy data files to install directory
    const install_data = b.addInstallDirectory(.{
        .source_dir = upstream.path("data"),
-        .install_dir = .bin,
-        .install_subdir = "data",
+        .install_dir = .{ .custom = "share" },
+        .install_subdir = "link",
    });
    install_data.step.dependOn(&download_link_step.step);

--- a/src/main.zig
+++ b/src/main.zig
@ -3,8 +3,18 @@ const std = @import("std");
 const pos = @import("pos");

 const word_replacements = std.StaticStringMap([]const u8).initComptime(.{
+    .{ "late", "light" },
    .{ "lake", "light" },
    .{ "like", "light" },
+    .{ "life", "light" },
+    .{ "another", "on the" },
+    .{ "better", "bedroom" },
+    .{ "my", "light" },
+    .{ "night", "light" },
+    .{ "way", "light" },
+    .{ "me all", "emil" },
+    .{ "a meal", "emil" },
+    .{ "her", "turn" },
 });

 const DeviceAction = enum {
@ -74,49 +84,68 @@ fn sendWemoCommand(allocator: std.mem.Allocator, device_entry: std.hash_map.Stri
 }

 fn loadDeviceConfig(allocator: std.mem.Allocator, bin_dir: []const u8) !std.StringHashMap([]const u8) {
+    // Try current directory first
+    if (loadConfigFromPath(allocator, "controlData.json")) |config| {
+        return config;
+    } else |_| {}
+
+    // Try controlData.json in bin directory
+    const json_path = try std.fs.path.join(allocator, &[_][]const u8{ bin_dir, "controlData.json" });
+    defer allocator.free(json_path);
+    if (loadConfigFromPath(allocator, json_path)) |config| {
+        return config;
+    } else |_| {}
+
+    // Try ../share/pos relative to bin directory
+    const share_path = try std.fs.path.join(allocator, &[_][]const u8{ bin_dir, "../share/pos/controlData.json" });
+    defer allocator.free(share_path);
+    if (loadConfigFromPath(allocator, share_path)) |config| {
+        return config;
+    } else |_| {}
+
+    return loadDevicesFromTxt(allocator, bin_dir);
+}
+
+fn loadConfigFromPath(allocator: std.mem.Allocator, path: []const u8) !std.StringHashMap([]const u8) {
    var devices = std.StringHashMap([]const u8).init(allocator);

+    const file = if (std.fs.path.isAbsolute(path))
+        std.fs.openFileAbsolute(path, .{}) catch return error.FileNotFound
+    else
+        std.fs.cwd().openFile(path, .{}) catch return error.FileNotFound;
+    defer file.close();
+
+    const content = try file.readToEndAlloc(allocator, 1024 * 1024);
+    defer allocator.free(content);
+
    var stderr_writer = std.fs.File.stderr().writer(&.{});
    const stderr = &stderr_writer.interface;
-    // Try controlData.json first
-    const json_path = try std.fs.path.join(allocator, &[_][]const u8{ bin_dir, "controlData.json" });
-    defer allocator.free(json_path);
+    const parsed = std.json.parseFromSlice(std.json.Value, allocator, content, .{}) catch |err| {
+        try stderr.print(
+            "Failed to parse controlData.json: {}. Ignoring controlData.json, looking for devices.txt",
+            .{err},
+        );
+        return error.ParseError;
+    };
+    defer parsed.deinit();

-    if (std.fs.openFileAbsolute(json_path, .{})) |file| {
-        defer file.close();
+    const root = parsed.value.object;
+    const device_array = root.get("devices").?.array;

-        const content = try file.readToEndAlloc(allocator, 1024 * 1024);
-        defer allocator.free(content);
+    for (device_array.items) |device| {
+        const device_obj = device.object;
+        const name = device_obj.get("name").?.string;
+        const url = device_obj.get("url").?.string;

-        const parsed = std.json.parseFromSlice(std.json.Value, allocator, content, .{}) catch |err| {
-            try stderr.print(
-                "Failed to parse controlData.json: {}. Ignoring controlData.json, looking for devices.txt",
-                .{err},
-            );
-            return loadDevicesFromTxt(allocator, bin_dir);
-        };
-        defer parsed.deinit();
-
-        const root = parsed.value.object;
-        const device_array = root.get("devices").?.array;
-
-        for (device_array.items) |device| {
-            const device_obj = device.object;
-            const name = device_obj.get("name").?.string;
-            const url = device_obj.get("url").?.string;
-
-            if (name.len > 0) {
-                const name_copy = try allocator.alloc(u8, name.len);
-                _ = std.ascii.lowerString(name_copy, name);
-                try devices.put(name_copy, try allocator.dupe(u8, url));
-                std.log.debug("Loaded device: '{s}' -> {s}", .{ name, url });
-            }
+        if (name.len > 0) {
+            const name_copy = try allocator.alloc(u8, name.len);
+            _ = std.ascii.lowerString(name_copy, name);
+            try devices.put(name_copy, try allocator.dupe(u8, url));
+            std.log.debug("Loaded device: '{s}' -> {s}", .{ name, url });
        }
-
-        return devices;
-    } else |_| {
-        return loadDevicesFromTxt(allocator, bin_dir);
    }
+
+    return devices;
 }

 fn loadDevicesFromTxt(allocator: std.mem.Allocator, bin_dir: []const u8) !std.StringHashMap([]const u8) {
@ -231,15 +260,16 @@ pub fn main() !u8 {
    defer std.process.argsFree(allocator, args);

    // Check for --sentence-parse-only option
-    var sentence_parse_only = false;
+    var sentence_parse_only: enum { sentence, command, none } = .none;
    var sentence_arg: ?[]const u8 = null;

    for (args[1..]) |arg| {
-        if (std.mem.eql(u8, arg, "--sentence-parse-only")) {
-            sentence_parse_only = true;
-        } else if (sentence_arg == null) {
+        if (std.mem.eql(u8, arg, "--sentence-parse-only"))
+            sentence_parse_only = .sentence
+        else if (std.mem.eql(u8, arg, "--command-parse-only"))
+            sentence_parse_only = .command
+        else if (sentence_arg == null)
            sentence_arg = arg;
-        }
    }

    var stdout_writer = std.fs.File.stdout().writer(&.{});
@ -248,7 +278,7 @@ pub fn main() !u8 {
    const stderr = &stderr_writer.interface;

    if (sentence_arg == null) {
-        try stderr.print("Usage: {s} [--sentence-parse-only] <sentence>\n", .{args[0]});
+        try stderr.print("Usage: {s} [--sentence-parse-only] [--command-parse-only] <sentence>\n", .{args[0]});
        return 1;
    }
    const bin_dir = std.fs.selfExeDirPathAlloc(allocator) catch |err| {
@ -263,14 +293,20 @@ pub fn main() !u8 {
        return 1;
    };
    defer parser.deinit();
-    if (sentence_parse_only) {
+    if (sentence_parse_only != .none) {
        const sentence_z = try allocator.dupeZ(u8, sentence_arg.?);
        defer allocator.free(sentence_z);

-        var tree = parser.adaptiveParse(sentence_z, word_replacements) catch |err| {
-            std.debug.print("Failed to parse sentence: {}\n", .{err});
-            return 1;
-        };
+        var tree = if (sentence_parse_only == .command)
+            parser.adaptiveCommandParse(sentence_z, word_replacements) catch {
+                std.log.err("Failed to parse sentence: {s}", .{sentence_z});
+                return 1;
+            }
+        else
+            parser.parse(sentence_z) catch {
+                std.log.err("Failed to parse sentence: {s}", .{sentence_z});
+                return 1;
+            };
        defer tree.deinit();

        try stdout.print("{f}", .{tree});
@ -304,7 +340,7 @@ pub fn main() !u8 {
 }

 fn processCommand(allocator: std.mem.Allocator, sentence: [:0]const u8, parser: *pos.Parser, devices: *std.StringHashMap([]const u8)) !void {
-    var tree = parser.adaptiveParse(sentence, word_replacements) catch |err| {
+    var tree = parser.adaptiveCommandParse(sentence, word_replacements) catch |err| {
        std.log.err("Failed to parse sentence with all replacements: {}\n", .{err});
        return error.UnrecognizedSentence;
    };
--- a/src/root.zig
+++ b/src/root.zig
@ -1,4 +1,6 @@
 const builtin = @import("builtin");
+const build_options = @import("build_options");
+
 const std = @import("std");
 const c = @cImport({
    @cInclude("link-includes.h");
@ -289,10 +291,10 @@ pub const Parser = struct {

    pub fn init(allocator: std.mem.Allocator) !Parser {
        const dict = c.dictionary_create(
-            @ptrCast(@constCast("data/4.0.dict")),
-            @ptrCast(@constCast("data/4.0.knowledge")),
-            @ptrCast(@constCast("data/4.0.constituent-knowledge")),
-            @ptrCast(@constCast("data/4.0.affix")),
+            @ptrCast(@constCast("../share/link/4.0.dict")),
+            @ptrCast(@constCast("../share/link/4.0.knowledge")),
+            @ptrCast(@constCast("../share/link/4.0.constituent-knowledge")),
+            @ptrCast(@constCast("../share/link/4.0.affix")),
        );
        if (dict == null) return error.DictionaryCreationFailed;

@ -317,13 +319,13 @@ pub const Parser = struct {
    }

    pub fn initWithDataDir(allocator: std.mem.Allocator, data_dir: []const u8) !Parser {
-        const dict_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.dict" });
+        const dict_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.dict" });
        defer allocator.free(dict_path);
-        const knowledge_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.knowledge" });
+        const knowledge_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.knowledge" });
        defer allocator.free(knowledge_path);
-        const constituent_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.constituent-knowledge" });
+        const constituent_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.constituent-knowledge" });
        defer allocator.free(constituent_path);
-        const affix_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.affix" });
+        const affix_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.affix" });
        defer allocator.free(affix_path);

        const dict_cstr = try allocator.dupeZ(u8, dict_path);
@ -360,23 +362,30 @@ pub const Parser = struct {
        _ = c.dictionary_delete(self.dict);
    }

-    /// Parses a sentence with an attempt to "fix" the sentence. If a valid
-    /// sentence is found, it will be returned, with the guarantee that
-    /// sentenceObject and sentenceAction will return non-zero results. If that
-    /// condition cannot be satisfied, error.NoValidParse will be returned
-    pub fn adaptiveParse(self: *Parser, sentence: [:0]const u8, replacements: std.StaticStringMap([]const u8)) !ParseTree {
-        var altered_buf: [1024]u8 = undefined;
-
+    fn applyReplacements(self: *Parser, sentence: []const u8, replacements: std.StaticStringMap([]const u8), final_buf: []u8) ![]const u8 {
+        _ = self; // we don't want to remove this completely, as there
+        // could be a time when we need to re-parse after replacement
        const replacement_keys = replacements.keys();
        const replacement_values = replacements.values();

        var altered = sentence;

-        // Step 1: Replacements
-        for (replacement_keys, replacement_values) |key, value| {
+        for (replacement_keys, replacement_values) |k, v| {
+            var k_buf: [256]u8 = undefined;
+            var v_buf: [256]u8 = undefined;
+
+            // add spaces on either side so we match words
+            const key = try std.fmt.bufPrint(&k_buf, " {s} ", .{k});
+            const value = try std.fmt.bufPrint(&v_buf, " {s} ", .{v});
+
+            // and then we need our sentence to have a space on either side
+            // so the replacement works
+            var sent_buf: [1024]u8 = undefined;
+            const sent = try std.fmt.bufPrint(&sent_buf, " {s} ", .{altered});
+            var altered_buf: [1024]u8 = undefined;
            const altered_size = std.mem.replacementSize(
                u8,
-                altered,
+                sent,
                key,
                value,
            );
@ -387,75 +396,176 @@ pub const Parser = struct {
            }
            const replacement_count = std.mem.replace(
                u8,
-                altered,
+                sent,
                key,
                value,
                &altered_buf,
            );

-            altered_buf[altered_size] = 0; // add sentinel
-            altered = altered_buf[0..altered_size :0];
+            if (std.mem.trimRight(u8, altered_buf[0..altered_size], " ").len == 0) {
+                std.log.debug("Sentence empty after replacements", .{});
+                return error.SentenceEmptyAfterReplacements;
+            }
+            const start: usize = if (altered_buf[0] == ' ') 1 else 0;
+            const last_is_space = altered_buf[altered_size - 1] == ' ';
+            const end: usize = if (last_is_space) altered_size - 1 else altered_size;
+            altered_buf[end] = 0; // add sentinel
+            @memcpy(final_buf[start .. end + 1], altered_buf[start .. end + 1]);
+            altered = final_buf[start..end :0];

            if (replacement_count > 0)
                // we have altered the deal. Pray we don't alter it further
                std.log.info("Replaced '{s}' in sentence with replacement '{s}' {d} times. Sentence now:\n\t{s}", .{
-                    key,
-                    value,
+                    k,
+                    v,
                    replacement_count,
                    altered,
                });
        }
-        var tree = self.parse(altered) catch |err| {
-            if (shouldLog())
-                std.log.err("Failed to parse sentence: {}\n\t{s}", .{ err, altered });
-            // continue;
-            return err;
-        };
+        return altered;
+    }

-        std.log.debug("adaptiveParse (step 1 - replacements):\n\toriginal:\n\t\t{s}\n\taltered:\n\t\t{s}\n{f}", .{
-            sentence,
-            altered,
-            tree,
-        });
+    fn removeNullWords(self: *Parser, altered_sentence: []const u8, tree: *ParseTree, final_buf: []u8) !struct { sentence: []const u8, nulls_removed: usize } {
+        var altered = altered_sentence;
+        var this_pass_nulls_removed: usize = 1;
+        var total_nulls_removed: usize = 0;
+        var replacement_errors: usize = 0;

-        // Step 2: replace null words
-        var nulls_removed = true;
-        while (nulls_removed) {
-            nulls_removed = false;
+        while (this_pass_nulls_removed - replacement_errors > 0) {
+            this_pass_nulls_removed = 0;
+            var last_word: ?[]const u8 = null;
            for (tree.words) |word| {
-                if (std.mem.indexOf(u8, word, "[?]")) |i| {
-                    nulls_removed = true;
-                    // We need to alter this further
-                    const trimmed = word[0..i];
-                    const removals = std.mem.replace(
-                        u8,
-                        altered,
-                        trimmed,
-                        "",
-                        &altered_buf,
-                    );
-                    const len = altered.len - (removals * trimmed.len);
-                    altered_buf[len] = 0;
-                    altered = altered_buf[0..len :0];
+                const had_last_word = last_word != null;
+                const is_null = if (last_word == null)
+                    std.mem.indexOf(u8, word, "[?]") != null
+                else
+                    (word[0] == '[' and word[word.len - 1] == ']') or
+                        std.mem.startsWith(u8, word, "'s.");

-                    std.log.info("Removed null word '{s}' in sentence {d} time(s). Sentence now:\n\t{s}", .{
-                        trimmed,
-                        removals,
-                        altered,
-                    });
-                    // Retry parsing with the word removed
-                    tree.deinit();
-                    tree = self.parse(altered) catch |err| {
-                        if (shouldLog())
-                            std.log.err("Failed to parse altered sentence: {}\n\t{s}", .{ err, altered });
-                        // continue;
-                        return err;
-                    };
-                    break; // we will remove these words conservatively...
+                if (!is_null) {
+                    if (last_word) |l| {
+                        // We had no replacements, and this next word is unusable
+                        std.log.warn("No replacements for word '{s}' and cannot combine with next word '{s}'. Continuing", .{ l, word });
+                        last_word = null;
+                        this_pass_nulls_removed += 1; // count as removal
+                        replacement_errors += 1;
+                    }
+                    continue;
                }
+                // we are on a null, but we might have to skip processing if
+                // there was an earlier failure
+                if (this_pass_nulls_removed < replacement_errors) {
+                    this_pass_nulls_removed += 1; // skip and move on
+                    continue;
+                }
+                // We need to alter this further
+                const trimmed = if (std.mem.indexOf(u8, word, "[?]")) |i|
+                    word[0..i]
+                else if (std.mem.startsWith(u8, word, "'s."))
+                    word[0..2]
+                else
+                    word[1 .. word.len - 1];
+                var needle_buf: [256]u8 = undefined;
+                var first_part: []const u8 = "";
+                if (last_word) |w| {
+                    if (std.mem.indexOf(u8, w, "[?]")) |i|
+                        first_part = w[0..i]
+                    else
+                        @panic("first part of null word does not have [?]. programming error");
+                }
+                const needle = try std.fmt.bufPrint(&needle_buf, " {s}{s} ", .{
+                    first_part,
+                    trimmed,
+                });
+                if (last_word) |w|
+                    std.log.debug("last word: {s}, needle: {s}", .{ w, needle[1 .. needle.len - 1] });
+                last_word = null;
+                // and then we need our sentence to have a space on either side
+                // so the replacement works
+                var sent_buf: [1024]u8 = undefined;
+                const sent = try std.fmt.bufPrint(&sent_buf, " {s} ", .{altered});
+                const removals = std.mem.replace(
+                    u8,
+                    sent,
+                    needle,
+                    " ",
+                    &sent_buf,
+                );
+                const len = sent.len - (removals * needle.len) + removals;
+                if (std.mem.trimRight(u8, sent_buf[0..len], " ").len == 0) {
+                    std.log.debug("Removed null word '{s}' in sentence {d} time(s). Sentence now empty", .{ needle[1 .. needle.len - 1], removals });
+                    return error.SentenceEmptyAfterNullRemoval;
+                }
+                const start: usize = if (sent_buf[0] == ' ') 1 else 0;
+                const last_is_space = sent_buf[len - 1] == ' ';
+                const end: usize = if (last_is_space) len - 1 else len;
+
+                if (removals == 0) {
+                    if (had_last_word) {
+                        // giving up
+                        std.log.info("Could not find word to remove after combining with next null. Giving up", .{});
+                        break;
+                    }
+                    //  contractions are sometimes split across words in the array
+                    std.log.debug("Could not find word to remove. Combining next word with this one ({s})", .{trimmed});
+                    // last_word = try std.fmt.bufPrint(&last_word_buf, "{s}", .{word});
+                    last_word = word; // I think this should work
+                    continue;
+                }
+                std.log.info("Removed null word '{s}' in sentence {d} time(s). Sentence before:\n\t{s}\nafter:\n\t{s}", .{
+                    needle[1 .. needle.len - 1],
+                    removals,
+                    altered, // this is our before...we will copy memory around just below
+                    sent_buf[start..end],
+                });
+                sent_buf[end] = 0; // add sentinal
+                @memcpy(final_buf[start .. end + 1], sent_buf[start .. end + 1]);
+                altered = final_buf[start..end :0];
+
+                // Retry parsing with the word removed
+                tree.deinit();
+                tree.* = self.parse(altered) catch |err| {
+                    if (shouldLog())
+                        std.log.err("Failed to parse altered sentence: {}\n\t{s}", .{ err, altered });
+                    // continue;
+                    return err;
+                };
+                this_pass_nulls_removed += 1;
+                total_nulls_removed += 1;
+                break; // we will remove these words conservatively...
            }
        }
+        return .{ .sentence = altered, .nulls_removed = total_nulls_removed };
+    }

+    /// Parses a sentence with an attempt to "fix" the sentence, assuming
+    /// the sentence is a command with an action and an object. If a valid
+    /// sentence is found, it will be returned, with the guarantee that
+    /// sentenceObject and sentenceAction will return non-zero results. If that
+    /// condition cannot be satisfied, error.NoValidParse will be returned
+    pub fn adaptiveCommandParse(self: *Parser, sentence: []const u8, replacements: std.StaticStringMap([]const u8)) !ParseTree {
+        var final_buf: [1024]u8 = undefined;
+
+        var altered = try self.applyReplacements(sentence, replacements, &final_buf);
+        var tree = self.parse(altered) catch |err| {
+            if (shouldLog()) {
+                if (altered.len > 0)
+                    std.log.err("Failed to parse sentence: {}\n\t{s}", .{ err, altered })
+                else
+                    std.log.err("Sentence is empty: not parsing", .{});
+            }
+            return err;
+        };
+        const tree_ptr: ?*ParseTree = &tree;
+        errdefer if (tree_ptr) |p| p.deinit();
+
+        std.log.debug("adaptiveCommandParse (step 1 - replacements):\n\toriginal:\n\t\t{s}\n\taltered:\n\t\t{s}\n{f}", .{
+            sentence, altered, tree,
+        });
+
+        const result = try self.removeNullWords(altered, &tree, &final_buf);
+        altered = result.sentence;
+        std.log.debug("{d} nulls removed", .{result.nulls_removed});
        // Bracketed words are "null"
        // words with [?] are "unknown"
        // If we have unknowns, I think we want to replace (or if no replacement
@ -475,9 +585,9 @@ pub const Parser = struct {

        // Validate that we can extract action and object before returning
        const action_words = tree.sentenceAction() catch |err| {
-            if (shouldLog())
-                std.log.err("Failed to extract action: {}\n", .{err});
-            tree.deinit();
+            // This is the first catch, so we don't want to log here as it
+            // gets super noisy
+            std.log.debug("Failed to extract action: {}", .{err});
            return err;
            // continue;
        };
@ -485,7 +595,6 @@ pub const Parser = struct {

        if (action_words.len == 0) {
            std.log.info("Failed to extract action from sentence", .{});
-            tree.deinit();
            return error.SentenceActionNotFound;
            // continue;
        }
@ -493,7 +602,6 @@ pub const Parser = struct {
        const object_words = tree.sentenceObject() catch |err| {
            if (shouldLog())
                std.log.err("Failed to extract object: {}\n", .{err});
-            tree.deinit();
            // continue;
            return err;
        };
@ -501,7 +609,6 @@ pub const Parser = struct {

        if (object_words.len == 0) {
            std.log.info("Failed to extract object from sentence", .{});
-            tree.deinit();
            // continue;
            return error.SentenceObjectNotFound;
        }
@ -677,7 +784,7 @@ test "real usage - jack" {
    try std.testing.expectEqualStrings("bedroom", sentence_object[1]);
    try std.testing.expectEqualStrings("light", sentence_object[2]);
 }
-test "adaptiveParse successful without replacements" {
+test "adaptiveCommandParse successful without replacements" {
    var parser = try Parser.init(std.testing.allocator);
    defer parser.deinit();

@ -687,10 +794,7 @@ test "adaptiveParse successful without replacements" {
    });

    const sentence = "turn on the kitchen light";
-    const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
-    defer std.testing.allocator.free(sentence_z);
-
-    var tree = try parser.adaptiveParse(sentence_z, replacements);
+    var tree = try parser.adaptiveCommandParse(sentence, replacements);
    defer tree.deinit();

    const action_words = try tree.sentenceAction();
@ -706,7 +810,7 @@ test "adaptiveParse successful without replacements" {
    try std.testing.expectEqualStrings("light", object_words[1]);
 }

-test "adaptiveParse with word replacement" {
+test "adaptiveCommandParse with word replacement" {
    var parser = try Parser.init(std.testing.allocator);
    defer parser.deinit();

@ -716,10 +820,8 @@ test "adaptiveParse with word replacement" {
    });

    const sentence = "turn on the kitchen lake";
-    const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
-    defer std.testing.allocator.free(sentence_z);

-    var tree = try parser.adaptiveParse(sentence_z, replacements);
+    var tree = try parser.adaptiveCommandParse(sentence, replacements);
    defer tree.deinit();

    const action_words = try tree.sentenceAction();
@ -735,7 +837,7 @@ test "adaptiveParse with word replacement" {
    try std.testing.expectEqualStrings("light", object_words[1]);
 }

-test "adaptiveParse no valid parse" {
+test "adaptiveCommandParse no valid parse" {
    var parser = try Parser.init(std.testing.allocator);
    defer parser.deinit();

@ -745,22 +847,20 @@ test "adaptiveParse no valid parse" {
    });

    const sentence = "xyz abc def";
-    const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
-    defer std.testing.allocator.free(sentence_z);

    // const ll = std.testing.log_level;
    // defer std.testing.log_level = ll;
    // std.testing.log_level = .debug;
    try std.testing.expectError(
-        error.SentenceCreationFailed,
-        parser.adaptiveParse(
-            sentence_z,
+        error.SentenceEmptyAfterNullRemoval,
+        parser.adaptiveCommandParse(
+            sentence,
            replacements,
        ),
    );
 }

-test "adaptiveParse with word replacement and null removal" {
+test "adaptiveCommandParse with word replacement and null removal" {
    var parser = try Parser.init(std.testing.allocator);
    defer parser.deinit();

@ -770,10 +870,11 @@ test "adaptiveParse with word replacement and null removal" {
    });

    const sentence = "alexa turn on the kitchen lake";
-    const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
-    defer std.testing.allocator.free(sentence_z);

-    var tree = try parser.adaptiveParse(sentence_z, replacements);
+    // const ll = std.testing.log_level;
+    // defer std.testing.log_level = ll;
+    // std.testing.log_level = .debug;
+    var tree = try parser.adaptiveCommandParse(sentence, replacements);
    defer tree.deinit();

    const action_words = try tree.sentenceAction();
@ -788,3 +889,117 @@ test "adaptiveParse with word replacement and null removal" {
    try std.testing.expectEqualStrings("kitchen", object_words[0]);
    try std.testing.expectEqualStrings("light", object_words[1]);
 }
+test "applyReplacements basic replacement" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    const replacements = std.StaticStringMap([]const u8).initComptime(.{
+        .{ "lake", "light" },
+    });
+
+    var final_buf: [1024]u8 = undefined;
+    const result = try parser.applyReplacements("turn on the kitchen lake", replacements, &final_buf);
+    try std.testing.expectEqualStrings("turn on the kitchen light", result);
+}
+
+test "applyReplacements multiple replacements" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    const replacements = std.StaticStringMap([]const u8).initComptime(.{
+        .{ "lake", "light" },
+        .{ "kitchen", "bedroom" },
+    });
+
+    var final_buf: [1024]u8 = undefined;
+    const result = try parser.applyReplacements("turn on the kitchen lake", replacements, &final_buf);
+    try std.testing.expectEqualStrings("turn on the bedroom light", result);
+}
+
+test "applyReplacements empty after replacement" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    const replacements = std.StaticStringMap([]const u8).initComptime(.{
+        .{ "test", "" },
+    });
+
+    var final_buf: [1024]u8 = undefined;
+    try std.testing.expectError(error.SentenceEmptyAfterReplacements, parser.applyReplacements("test", replacements, &final_buf));
+}
+
+test "removeNullWords no nulls" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    const sentence = "turn on the light";
+    var tree = try parser.parse(sentence);
+
+    var final_buf: [1024]u8 = undefined;
+    const result = try parser.removeNullWords(sentence, &tree, &final_buf);
+    defer tree.deinit();
+
+    try std.testing.expectEqualStrings(sentence, result.sentence);
+    try std.testing.expect(result.nulls_removed == 0);
+}
+test "removeNullWords - 'i' as null word" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    // sentence from unique samples that originally identified "i" as a null word,
+    // originally we were removing "i", which screwed up "eighteen", forcing logic
+    // to make sure that we were processing whole words
+    const sentence = "ah the next level the out wednesday october first i want alaska eighteen from seattle to boston";
+    var tree = try parser.parse(sentence);
+    var final_buf: [1024]u8 = undefined;
+    const result = try parser.removeNullWords(sentence, &tree, &final_buf);
+    defer tree.deinit();
+
+    try std.testing.expectEqualStrings("ah the next level the out first want eighteen from to", result.sentence);
+    try std.testing.expect(result.nulls_removed == 6);
+}
+
+test "removeNullWords - was originally crashing" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    const sentence = "for for big waiver yeah i'm be a masses";
+    var tree = try parser.parse(sentence);
+    var final_buf: [1024]u8 = undefined;
+    const result = try parser.removeNullWords(sentence, &tree, &final_buf);
+    defer tree.deinit();
+
+    try std.testing.expectEqualStrings("for for big waiver yeah be a masses", result.sentence);
+    try std.testing.expect(result.nulls_removed == 1);
+}
+
+test "removeNullWords - null word followed by possessive" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    // This shows up as tom[.?] but then a proper 's.p, so breaks the typical pattern of an unknown word after a null word
+    const sentence = "them sound indiscipline or doesn't have i say or okay so and so he's creating like an excel spreadsheet was tom's and records or whatever it to translate by so that's how he and he could have come conversation about";
+    var tree = try parser.parse(sentence);
+    var final_buf: [1024]u8 = undefined;
+    const result = try parser.removeNullWords(sentence, &tree, &final_buf);
+    defer tree.deinit();
+
+    try std.testing.expectEqualStrings("them sound or doesn't have say or okay so and so he's creating like an excel was and records or whatever it to translate by so that's how he and he could have come conversation about", result.sentence);
+    try std.testing.expect(result.nulls_removed == 4);
+}
+
+test "removeNullWords - complex and long processing time" {
+    var parser = try Parser.init(std.testing.allocator);
+    defer parser.deinit();
+
+    if (!build_options.long_tests) return error.SkipZigTest;
+    // This one takes a lot of processing. Also, "i" ends up as null, so forces us to strip the word 'i' and not the letter 'i'
+    const sentence = "i'm i seem to be breaking the website and so i'm training to or a multi city a reservation and it's telling me i can't get from seattle to portland or on monday september twenty ninth am and at that point i was like okay he something completely wrong effect";
+    var tree = try parser.parse(sentence);
+    var final_buf: [1024]u8 = undefined;
+    const result = try parser.removeNullWords(sentence, &tree, &final_buf);
+    defer tree.deinit();
+
+    try std.testing.expectEqualStrings("seem to be breaking the website and so training to or a city a reservation and it's telling me can't get from to or on twenty ninth am and at that point was like okay he something completely wrong effect", result.sentence);
+    try std.testing.expect(result.nulls_removed == 7);
+}
Author	SHA1	Message	Date
Emil Lerch	461ea3f50c	add controlData to gitignore	2025-10-09 11:26:23 -07:00
Emil Lerch	78be667a16	fix up paths	2025-10-09 11:25:18 -07:00
Emil Lerch	d66fdd5cae	more replacements	2025-10-09 09:31:19 -07:00
Emil Lerch	68b69e8275	more word replacements	2025-10-07 12:52:56 -07:00
Emil Lerch	11355886fe	refactor replacments and null removals out of adaptiveCommandParse Also includes new tests for speech captured in production	2025-09-29 14:41:44 -07:00
Emil Lerch	83a6824320	more resiliency based on samples from actual speech to text	2025-09-29 08:52:07 -07:00
Emil Lerch	036ddb7f49	less noisy in release mode	2025-09-24 10:56:00 -07:00
Emil Lerch	a41c94b2c9	be clear that adaptive parse assumes this is a command	2025-09-24 10:33:45 -07:00
Emil Lerch	46fd8f585c	avoid memory corruption with multiple replacements, deinit fixes, clean up output	2025-09-24 10:31:34 -07:00
Emil Lerch	c325012ce4	remove extraneous compilation units that introduce additional main symbols	2025-09-24 08:41:13 -07:00