refactor replacments and null removals out of adaptiveCommandParse

Also includes new tests for speech captured in production
This commit is contained in:
Emil Lerch 2025-09-29 14:41:44 -07:00
parent 83a6824320
commit 11355886fe
Signed by: lobo
GPG key ID: A7B62D657EF764F8
2 changed files with 159 additions and 46 deletions

View file

@ -16,6 +16,8 @@ pub fn build(b: *std.Build) !void {
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const long_tests = b.option(bool, "long-tests", "Run long-running tests") orelse false;
// It's also possible to define more custom flags to toggle optional features
// of this build script using `b.option()`. All defined flags (including
// target and optimize options) will be listed when running `zig build --help`
@ -146,6 +148,9 @@ pub fn build(b: *std.Build) !void {
// which requires us to specify a target.
.target = target,
});
const options = b.addOptions();
options.addOption(bool, "long_tests", long_tests);
mod.addImport("build_options", options.createModule());
mod.linkLibrary(lib);
mod.addIncludePath(upstream.path("include"));

View file

@ -1,4 +1,6 @@
const builtin = @import("builtin");
const build_options = @import("build_options");
const std = @import("std");
const c = @cImport({
@cInclude("link-includes.h");
@ -360,20 +362,14 @@ pub const Parser = struct {
_ = c.dictionary_delete(self.dict);
}
/// Parses a sentence with an attempt to "fix" the sentence, assuming
/// the sentence is a command with an action and an object. If a valid
/// sentence is found, it will be returned, with the guarantee that
/// sentenceObject and sentenceAction will return non-zero results. If that
/// condition cannot be satisfied, error.NoValidParse will be returned
pub fn adaptiveCommandParse(self: *Parser, sentence: []const u8, replacements: std.StaticStringMap([]const u8)) !ParseTree {
var final_buf: [1024]u8 = undefined;
fn applyReplacements(self: *Parser, sentence: []const u8, replacements: std.StaticStringMap([]const u8), final_buf: []u8) ![]const u8 {
_ = self; // we don't want to remove this completely, as there
// could be a time when we need to re-parse after replacement
const replacement_keys = replacements.keys();
const replacement_values = replacements.values();
var altered = sentence;
// Step 1: Replacements
for (replacement_keys, replacement_values) |k, v| {
var k_buf: [256]u8 = undefined;
var v_buf: [256]u8 = undefined;
@ -426,32 +422,17 @@ pub const Parser = struct {
altered,
});
}
var tree = self.parse(altered) catch |err| {
if (shouldLog()) {
if (altered.len > 0)
std.log.err("Failed to parse sentence: {}\n\t{s}", .{ err, altered })
else
std.log.err("Sentence is empty: not parsing", .{});
}
// continue;
return err;
};
var tree_ptr: ?*ParseTree = &tree;
errdefer if (tree_ptr) |p| p.deinit();
return altered;
}
std.log.debug("adaptiveCommandParse (step 1 - replacements):\n\toriginal:\n\t\t{s}\n\taltered:\n\t\t{s}\n{f}", .{
sentence,
altered,
tree,
});
// Step 2: replace null words
fn removeNullWords(self: *Parser, altered_sentence: []const u8, tree: *ParseTree, final_buf: []u8) !struct { sentence: []const u8, nulls_removed: usize } {
var altered = altered_sentence;
var this_pass_nulls_removed: usize = 1;
var total_nulls_removed: usize = 0;
var replacement_errors: usize = 0;
while (this_pass_nulls_removed - replacement_errors > 0) {
this_pass_nulls_removed = 0;
// var last_word_buf: [256]u8 = undefined;
var last_word: ?[]const u8 = null;
for (tree.words) |word| {
const had_last_word = last_word != null;
@ -543,24 +524,48 @@ pub const Parser = struct {
// Retry parsing with the word removed
tree.deinit();
tree = self.parse(altered) catch |err| {
tree_ptr = null;
tree.* = self.parse(altered) catch |err| {
if (shouldLog())
std.log.err("Failed to parse altered sentence: {}\n\t{s}", .{ err, altered });
// continue;
return err;
};
tree_ptr = &tree;
this_pass_nulls_removed += 1;
total_nulls_removed += 1;
break; // we will remove these words conservatively...
}
}
return .{ .sentence = altered, .nulls_removed = total_nulls_removed };
}
std.log.debug("{d} nulls removed ({d} replacement errors)", .{
total_nulls_removed,
replacement_errors,
/// Parses a sentence with an attempt to "fix" the sentence, assuming
/// the sentence is a command with an action and an object. If a valid
/// sentence is found, it will be returned, with the guarantee that
/// sentenceObject and sentenceAction will return non-zero results. If that
/// condition cannot be satisfied, error.NoValidParse will be returned
pub fn adaptiveCommandParse(self: *Parser, sentence: []const u8, replacements: std.StaticStringMap([]const u8)) !ParseTree {
var final_buf: [1024]u8 = undefined;
var altered = try self.applyReplacements(sentence, replacements, &final_buf);
var tree = self.parse(altered) catch |err| {
if (shouldLog()) {
if (altered.len > 0)
std.log.err("Failed to parse sentence: {}\n\t{s}", .{ err, altered })
else
std.log.err("Sentence is empty: not parsing", .{});
}
return err;
};
const tree_ptr: ?*ParseTree = &tree;
errdefer if (tree_ptr) |p| p.deinit();
std.log.debug("adaptiveCommandParse (step 1 - replacements):\n\toriginal:\n\t\t{s}\n\taltered:\n\t\t{s}\n{f}", .{
sentence, altered, tree,
});
const result = try self.removeNullWords(altered, &tree, &final_buf);
altered = result.sentence;
std.log.debug("{d} nulls removed", .{result.nulls_removed});
// Bracketed words are "null"
// words with [?] are "unknown"
// If we have unknowns, I think we want to replace (or if no replacement
@ -866,17 +871,6 @@ test "adaptiveCommandParse with word replacement and null removal" {
const sentence = "alexa turn on the kitchen lake";
// These should all be unit tests for the replacement and null processing
// Right now that code is all in the adaptiveCommandParse but should be factored out
// const sentence = "ah the next level the out wednesday october first i want alaska eighteen from seattle to boston";
// const sentence = "for for big waiver yeah i'm be a masses";
//
// This shows up as tom[.?] but then a proper 's.p, so breaks the typical pattern of an unknown word after a null word
// const sentence = "them sound indiscipline or doesn't have i say or okay so and so he's creating like an excel spreadsheet was tom's and records or whatever it to translate by so that's how he and he could have come conversation about";
//
// This one takes a lot of processing. Also, "i" ends up as null, so forces us to strip the word 'i' and not the letter 'i'
// const sentence = "i'm i seem to be breaking the website and so i'm training to or a multi city a reservation and it's telling me i can't get from seattle to portland or on monday september twenty ninth am and at that point i was like okay he something completely wrong effect";
// const ll = std.testing.log_level;
// defer std.testing.log_level = ll;
// std.testing.log_level = .debug;
@ -895,3 +889,117 @@ test "adaptiveCommandParse with word replacement and null removal" {
try std.testing.expectEqualStrings("kitchen", object_words[0]);
try std.testing.expectEqualStrings("light", object_words[1]);
}
test "applyReplacements basic replacement" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const replacements = std.StaticStringMap([]const u8).initComptime(.{
.{ "lake", "light" },
});
var final_buf: [1024]u8 = undefined;
const result = try parser.applyReplacements("turn on the kitchen lake", replacements, &final_buf);
try std.testing.expectEqualStrings("turn on the kitchen light", result);
}
test "applyReplacements multiple replacements" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const replacements = std.StaticStringMap([]const u8).initComptime(.{
.{ "lake", "light" },
.{ "kitchen", "bedroom" },
});
var final_buf: [1024]u8 = undefined;
const result = try parser.applyReplacements("turn on the kitchen lake", replacements, &final_buf);
try std.testing.expectEqualStrings("turn on the bedroom light", result);
}
test "applyReplacements empty after replacement" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const replacements = std.StaticStringMap([]const u8).initComptime(.{
.{ "test", "" },
});
var final_buf: [1024]u8 = undefined;
try std.testing.expectError(error.SentenceEmptyAfterReplacements, parser.applyReplacements("test", replacements, &final_buf));
}
test "removeNullWords no nulls" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const sentence = "turn on the light";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings(sentence, result.sentence);
try std.testing.expect(result.nulls_removed == 0);
}
test "removeNullWords - 'i' as null word" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
// sentence from unique samples that originally identified "i" as a null word,
// originally we were removing "i", which screwed up "eighteen", forcing logic
// to make sure that we were processing whole words
const sentence = "ah the next level the out wednesday october first i want alaska eighteen from seattle to boston";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("ah the next level the out first want eighteen from to", result.sentence);
try std.testing.expect(result.nulls_removed == 6);
}
test "removeNullWords - was originally crashing" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const sentence = "for for big waiver yeah i'm be a masses";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("for for big waiver yeah be a masses", result.sentence);
try std.testing.expect(result.nulls_removed == 1);
}
test "removeNullWords - null word followed by possessive" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
// This shows up as tom[.?] but then a proper 's.p, so breaks the typical pattern of an unknown word after a null word
const sentence = "them sound indiscipline or doesn't have i say or okay so and so he's creating like an excel spreadsheet was tom's and records or whatever it to translate by so that's how he and he could have come conversation about";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("them sound or doesn't have say or okay so and so he's creating like an excel was and records or whatever it to translate by so that's how he and he could have come conversation about", result.sentence);
try std.testing.expect(result.nulls_removed == 4);
}
test "removeNullWords - complex and long processing time" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
if (!build_options.long_tests) return error.SkipZigTest;
// This one takes a lot of processing. Also, "i" ends up as null, so forces us to strip the word 'i' and not the letter 'i'
const sentence = "i'm i seem to be breaking the website and so i'm training to or a multi city a reservation and it's telling me i can't get from seattle to portland or on monday september twenty ninth am and at that point i was like okay he something completely wrong effect";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("seem to be breaking the website and so training to or a city a reservation and it's telling me can't get from to or on twenty ninth am and at that point was like okay he something completely wrong effect", result.sentence);
try std.testing.expect(result.nulls_removed == 7);
}