Compare commits

...

10 commits

4 changed files with 396 additions and 142 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
zig-out/
.zig-cache/
controlData.json

View file

@ -16,6 +16,8 @@ pub fn build(b: *std.Build) !void {
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const long_tests = b.option(bool, "long-tests", "Run long-running tests") orelse false;
// It's also possible to define more custom flags to toggle optional features
// of this build script using `b.option()`. All defined flags (including
// target and optimize options) will be listed when running `zig build --help`
@ -95,7 +97,6 @@ pub fn build(b: *std.Build) !void {
.files = &.{
"analyze-linkage.c",
"and.c",
"api-example.c",
"api.c",
"build-disjuncts.c",
"command-line.c",
@ -107,7 +108,6 @@ pub fn build(b: *std.Build) !void {
"idiom.c",
"linkset.c",
"massage.c",
"parse.c",
"post-process.c",
"pp_knowledge.c",
"pp_lexer.c",
@ -122,7 +122,6 @@ pub fn build(b: *std.Build) !void {
"tokenize.c",
"utilities.c",
"word-file.c",
"www-parse.c",
},
.flags = &.{
"-O2",
@ -149,6 +148,9 @@ pub fn build(b: *std.Build) !void {
// which requires us to specify a target.
.target = target,
});
const options = b.addOptions();
options.addOption(bool, "long_tests", long_tests);
mod.addImport("build_options", options.createModule());
mod.linkLibrary(lib);
mod.addIncludePath(upstream.path("include"));
@ -197,8 +199,8 @@ pub fn build(b: *std.Build) !void {
// Copy data files to install directory
const install_data = b.addInstallDirectory(.{
.source_dir = upstream.path("data"),
.install_dir = .bin,
.install_subdir = "data",
.install_dir = .{ .custom = "share" },
.install_subdir = "link",
});
install_data.step.dependOn(&download_link_step.step);

View file

@ -3,8 +3,18 @@ const std = @import("std");
const pos = @import("pos");
const word_replacements = std.StaticStringMap([]const u8).initComptime(.{
.{ "late", "light" },
.{ "lake", "light" },
.{ "like", "light" },
.{ "life", "light" },
.{ "another", "on the" },
.{ "better", "bedroom" },
.{ "my", "light" },
.{ "night", "light" },
.{ "way", "light" },
.{ "me all", "emil" },
.{ "a meal", "emil" },
.{ "her", "turn" },
});
const DeviceAction = enum {
@ -74,49 +84,68 @@ fn sendWemoCommand(allocator: std.mem.Allocator, device_entry: std.hash_map.Stri
}
fn loadDeviceConfig(allocator: std.mem.Allocator, bin_dir: []const u8) !std.StringHashMap([]const u8) {
// Try current directory first
if (loadConfigFromPath(allocator, "controlData.json")) |config| {
return config;
} else |_| {}
// Try controlData.json in bin directory
const json_path = try std.fs.path.join(allocator, &[_][]const u8{ bin_dir, "controlData.json" });
defer allocator.free(json_path);
if (loadConfigFromPath(allocator, json_path)) |config| {
return config;
} else |_| {}
// Try ../share/pos relative to bin directory
const share_path = try std.fs.path.join(allocator, &[_][]const u8{ bin_dir, "../share/pos/controlData.json" });
defer allocator.free(share_path);
if (loadConfigFromPath(allocator, share_path)) |config| {
return config;
} else |_| {}
return loadDevicesFromTxt(allocator, bin_dir);
}
fn loadConfigFromPath(allocator: std.mem.Allocator, path: []const u8) !std.StringHashMap([]const u8) {
var devices = std.StringHashMap([]const u8).init(allocator);
const file = if (std.fs.path.isAbsolute(path))
std.fs.openFileAbsolute(path, .{}) catch return error.FileNotFound
else
std.fs.cwd().openFile(path, .{}) catch return error.FileNotFound;
defer file.close();
const content = try file.readToEndAlloc(allocator, 1024 * 1024);
defer allocator.free(content);
var stderr_writer = std.fs.File.stderr().writer(&.{});
const stderr = &stderr_writer.interface;
// Try controlData.json first
const json_path = try std.fs.path.join(allocator, &[_][]const u8{ bin_dir, "controlData.json" });
defer allocator.free(json_path);
const parsed = std.json.parseFromSlice(std.json.Value, allocator, content, .{}) catch |err| {
try stderr.print(
"Failed to parse controlData.json: {}. Ignoring controlData.json, looking for devices.txt",
.{err},
);
return error.ParseError;
};
defer parsed.deinit();
if (std.fs.openFileAbsolute(json_path, .{})) |file| {
defer file.close();
const root = parsed.value.object;
const device_array = root.get("devices").?.array;
const content = try file.readToEndAlloc(allocator, 1024 * 1024);
defer allocator.free(content);
for (device_array.items) |device| {
const device_obj = device.object;
const name = device_obj.get("name").?.string;
const url = device_obj.get("url").?.string;
const parsed = std.json.parseFromSlice(std.json.Value, allocator, content, .{}) catch |err| {
try stderr.print(
"Failed to parse controlData.json: {}. Ignoring controlData.json, looking for devices.txt",
.{err},
);
return loadDevicesFromTxt(allocator, bin_dir);
};
defer parsed.deinit();
const root = parsed.value.object;
const device_array = root.get("devices").?.array;
for (device_array.items) |device| {
const device_obj = device.object;
const name = device_obj.get("name").?.string;
const url = device_obj.get("url").?.string;
if (name.len > 0) {
const name_copy = try allocator.alloc(u8, name.len);
_ = std.ascii.lowerString(name_copy, name);
try devices.put(name_copy, try allocator.dupe(u8, url));
std.log.debug("Loaded device: '{s}' -> {s}", .{ name, url });
}
if (name.len > 0) {
const name_copy = try allocator.alloc(u8, name.len);
_ = std.ascii.lowerString(name_copy, name);
try devices.put(name_copy, try allocator.dupe(u8, url));
std.log.debug("Loaded device: '{s}' -> {s}", .{ name, url });
}
return devices;
} else |_| {
return loadDevicesFromTxt(allocator, bin_dir);
}
return devices;
}
fn loadDevicesFromTxt(allocator: std.mem.Allocator, bin_dir: []const u8) !std.StringHashMap([]const u8) {
@ -231,15 +260,16 @@ pub fn main() !u8 {
defer std.process.argsFree(allocator, args);
// Check for --sentence-parse-only option
var sentence_parse_only = false;
var sentence_parse_only: enum { sentence, command, none } = .none;
var sentence_arg: ?[]const u8 = null;
for (args[1..]) |arg| {
if (std.mem.eql(u8, arg, "--sentence-parse-only")) {
sentence_parse_only = true;
} else if (sentence_arg == null) {
if (std.mem.eql(u8, arg, "--sentence-parse-only"))
sentence_parse_only = .sentence
else if (std.mem.eql(u8, arg, "--command-parse-only"))
sentence_parse_only = .command
else if (sentence_arg == null)
sentence_arg = arg;
}
}
var stdout_writer = std.fs.File.stdout().writer(&.{});
@ -248,7 +278,7 @@ pub fn main() !u8 {
const stderr = &stderr_writer.interface;
if (sentence_arg == null) {
try stderr.print("Usage: {s} [--sentence-parse-only] <sentence>\n", .{args[0]});
try stderr.print("Usage: {s} [--sentence-parse-only] [--command-parse-only] <sentence>\n", .{args[0]});
return 1;
}
const bin_dir = std.fs.selfExeDirPathAlloc(allocator) catch |err| {
@ -263,14 +293,20 @@ pub fn main() !u8 {
return 1;
};
defer parser.deinit();
if (sentence_parse_only) {
if (sentence_parse_only != .none) {
const sentence_z = try allocator.dupeZ(u8, sentence_arg.?);
defer allocator.free(sentence_z);
var tree = parser.adaptiveParse(sentence_z, word_replacements) catch |err| {
std.debug.print("Failed to parse sentence: {}\n", .{err});
return 1;
};
var tree = if (sentence_parse_only == .command)
parser.adaptiveCommandParse(sentence_z, word_replacements) catch {
std.log.err("Failed to parse sentence: {s}", .{sentence_z});
return 1;
}
else
parser.parse(sentence_z) catch {
std.log.err("Failed to parse sentence: {s}", .{sentence_z});
return 1;
};
defer tree.deinit();
try stdout.print("{f}", .{tree});
@ -304,7 +340,7 @@ pub fn main() !u8 {
}
fn processCommand(allocator: std.mem.Allocator, sentence: [:0]const u8, parser: *pos.Parser, devices: *std.StringHashMap([]const u8)) !void {
var tree = parser.adaptiveParse(sentence, word_replacements) catch |err| {
var tree = parser.adaptiveCommandParse(sentence, word_replacements) catch |err| {
std.log.err("Failed to parse sentence with all replacements: {}\n", .{err});
return error.UnrecognizedSentence;
};

View file

@ -1,4 +1,6 @@
const builtin = @import("builtin");
const build_options = @import("build_options");
const std = @import("std");
const c = @cImport({
@cInclude("link-includes.h");
@ -289,10 +291,10 @@ pub const Parser = struct {
pub fn init(allocator: std.mem.Allocator) !Parser {
const dict = c.dictionary_create(
@ptrCast(@constCast("data/4.0.dict")),
@ptrCast(@constCast("data/4.0.knowledge")),
@ptrCast(@constCast("data/4.0.constituent-knowledge")),
@ptrCast(@constCast("data/4.0.affix")),
@ptrCast(@constCast("../share/link/4.0.dict")),
@ptrCast(@constCast("../share/link/4.0.knowledge")),
@ptrCast(@constCast("../share/link/4.0.constituent-knowledge")),
@ptrCast(@constCast("../share/link/4.0.affix")),
);
if (dict == null) return error.DictionaryCreationFailed;
@ -317,13 +319,13 @@ pub const Parser = struct {
}
pub fn initWithDataDir(allocator: std.mem.Allocator, data_dir: []const u8) !Parser {
const dict_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.dict" });
const dict_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.dict" });
defer allocator.free(dict_path);
const knowledge_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.knowledge" });
const knowledge_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.knowledge" });
defer allocator.free(knowledge_path);
const constituent_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.constituent-knowledge" });
const constituent_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.constituent-knowledge" });
defer allocator.free(constituent_path);
const affix_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "data/4.0.affix" });
const affix_path = try std.fs.path.join(allocator, &[_][]const u8{ data_dir, "../share/link/4.0.affix" });
defer allocator.free(affix_path);
const dict_cstr = try allocator.dupeZ(u8, dict_path);
@ -360,23 +362,30 @@ pub const Parser = struct {
_ = c.dictionary_delete(self.dict);
}
/// Parses a sentence with an attempt to "fix" the sentence. If a valid
/// sentence is found, it will be returned, with the guarantee that
/// sentenceObject and sentenceAction will return non-zero results. If that
/// condition cannot be satisfied, error.NoValidParse will be returned
pub fn adaptiveParse(self: *Parser, sentence: [:0]const u8, replacements: std.StaticStringMap([]const u8)) !ParseTree {
var altered_buf: [1024]u8 = undefined;
fn applyReplacements(self: *Parser, sentence: []const u8, replacements: std.StaticStringMap([]const u8), final_buf: []u8) ![]const u8 {
_ = self; // we don't want to remove this completely, as there
// could be a time when we need to re-parse after replacement
const replacement_keys = replacements.keys();
const replacement_values = replacements.values();
var altered = sentence;
// Step 1: Replacements
for (replacement_keys, replacement_values) |key, value| {
for (replacement_keys, replacement_values) |k, v| {
var k_buf: [256]u8 = undefined;
var v_buf: [256]u8 = undefined;
// add spaces on either side so we match words
const key = try std.fmt.bufPrint(&k_buf, " {s} ", .{k});
const value = try std.fmt.bufPrint(&v_buf, " {s} ", .{v});
// and then we need our sentence to have a space on either side
// so the replacement works
var sent_buf: [1024]u8 = undefined;
const sent = try std.fmt.bufPrint(&sent_buf, " {s} ", .{altered});
var altered_buf: [1024]u8 = undefined;
const altered_size = std.mem.replacementSize(
u8,
altered,
sent,
key,
value,
);
@ -387,75 +396,176 @@ pub const Parser = struct {
}
const replacement_count = std.mem.replace(
u8,
altered,
sent,
key,
value,
&altered_buf,
);
altered_buf[altered_size] = 0; // add sentinel
altered = altered_buf[0..altered_size :0];
if (std.mem.trimRight(u8, altered_buf[0..altered_size], " ").len == 0) {
std.log.debug("Sentence empty after replacements", .{});
return error.SentenceEmptyAfterReplacements;
}
const start: usize = if (altered_buf[0] == ' ') 1 else 0;
const last_is_space = altered_buf[altered_size - 1] == ' ';
const end: usize = if (last_is_space) altered_size - 1 else altered_size;
altered_buf[end] = 0; // add sentinel
@memcpy(final_buf[start .. end + 1], altered_buf[start .. end + 1]);
altered = final_buf[start..end :0];
if (replacement_count > 0)
// we have altered the deal. Pray we don't alter it further
std.log.info("Replaced '{s}' in sentence with replacement '{s}' {d} times. Sentence now:\n\t{s}", .{
key,
value,
k,
v,
replacement_count,
altered,
});
}
var tree = self.parse(altered) catch |err| {
if (shouldLog())
std.log.err("Failed to parse sentence: {}\n\t{s}", .{ err, altered });
// continue;
return err;
};
return altered;
}
std.log.debug("adaptiveParse (step 1 - replacements):\n\toriginal:\n\t\t{s}\n\taltered:\n\t\t{s}\n{f}", .{
sentence,
altered,
tree,
});
fn removeNullWords(self: *Parser, altered_sentence: []const u8, tree: *ParseTree, final_buf: []u8) !struct { sentence: []const u8, nulls_removed: usize } {
var altered = altered_sentence;
var this_pass_nulls_removed: usize = 1;
var total_nulls_removed: usize = 0;
var replacement_errors: usize = 0;
// Step 2: replace null words
var nulls_removed = true;
while (nulls_removed) {
nulls_removed = false;
while (this_pass_nulls_removed - replacement_errors > 0) {
this_pass_nulls_removed = 0;
var last_word: ?[]const u8 = null;
for (tree.words) |word| {
if (std.mem.indexOf(u8, word, "[?]")) |i| {
nulls_removed = true;
// We need to alter this further
const trimmed = word[0..i];
const removals = std.mem.replace(
u8,
altered,
trimmed,
"",
&altered_buf,
);
const len = altered.len - (removals * trimmed.len);
altered_buf[len] = 0;
altered = altered_buf[0..len :0];
const had_last_word = last_word != null;
const is_null = if (last_word == null)
std.mem.indexOf(u8, word, "[?]") != null
else
(word[0] == '[' and word[word.len - 1] == ']') or
std.mem.startsWith(u8, word, "'s.");
std.log.info("Removed null word '{s}' in sentence {d} time(s). Sentence now:\n\t{s}", .{
trimmed,
removals,
altered,
});
// Retry parsing with the word removed
tree.deinit();
tree = self.parse(altered) catch |err| {
if (shouldLog())
std.log.err("Failed to parse altered sentence: {}\n\t{s}", .{ err, altered });
// continue;
return err;
};
break; // we will remove these words conservatively...
if (!is_null) {
if (last_word) |l| {
// We had no replacements, and this next word is unusable
std.log.warn("No replacements for word '{s}' and cannot combine with next word '{s}'. Continuing", .{ l, word });
last_word = null;
this_pass_nulls_removed += 1; // count as removal
replacement_errors += 1;
}
continue;
}
// we are on a null, but we might have to skip processing if
// there was an earlier failure
if (this_pass_nulls_removed < replacement_errors) {
this_pass_nulls_removed += 1; // skip and move on
continue;
}
// We need to alter this further
const trimmed = if (std.mem.indexOf(u8, word, "[?]")) |i|
word[0..i]
else if (std.mem.startsWith(u8, word, "'s."))
word[0..2]
else
word[1 .. word.len - 1];
var needle_buf: [256]u8 = undefined;
var first_part: []const u8 = "";
if (last_word) |w| {
if (std.mem.indexOf(u8, w, "[?]")) |i|
first_part = w[0..i]
else
@panic("first part of null word does not have [?]. programming error");
}
const needle = try std.fmt.bufPrint(&needle_buf, " {s}{s} ", .{
first_part,
trimmed,
});
if (last_word) |w|
std.log.debug("last word: {s}, needle: {s}", .{ w, needle[1 .. needle.len - 1] });
last_word = null;
// and then we need our sentence to have a space on either side
// so the replacement works
var sent_buf: [1024]u8 = undefined;
const sent = try std.fmt.bufPrint(&sent_buf, " {s} ", .{altered});
const removals = std.mem.replace(
u8,
sent,
needle,
" ",
&sent_buf,
);
const len = sent.len - (removals * needle.len) + removals;
if (std.mem.trimRight(u8, sent_buf[0..len], " ").len == 0) {
std.log.debug("Removed null word '{s}' in sentence {d} time(s). Sentence now empty", .{ needle[1 .. needle.len - 1], removals });
return error.SentenceEmptyAfterNullRemoval;
}
const start: usize = if (sent_buf[0] == ' ') 1 else 0;
const last_is_space = sent_buf[len - 1] == ' ';
const end: usize = if (last_is_space) len - 1 else len;
if (removals == 0) {
if (had_last_word) {
// giving up
std.log.info("Could not find word to remove after combining with next null. Giving up", .{});
break;
}
// contractions are sometimes split across words in the array
std.log.debug("Could not find word to remove. Combining next word with this one ({s})", .{trimmed});
// last_word = try std.fmt.bufPrint(&last_word_buf, "{s}", .{word});
last_word = word; // I think this should work
continue;
}
std.log.info("Removed null word '{s}' in sentence {d} time(s). Sentence before:\n\t{s}\nafter:\n\t{s}", .{
needle[1 .. needle.len - 1],
removals,
altered, // this is our before...we will copy memory around just below
sent_buf[start..end],
});
sent_buf[end] = 0; // add sentinal
@memcpy(final_buf[start .. end + 1], sent_buf[start .. end + 1]);
altered = final_buf[start..end :0];
// Retry parsing with the word removed
tree.deinit();
tree.* = self.parse(altered) catch |err| {
if (shouldLog())
std.log.err("Failed to parse altered sentence: {}\n\t{s}", .{ err, altered });
// continue;
return err;
};
this_pass_nulls_removed += 1;
total_nulls_removed += 1;
break; // we will remove these words conservatively...
}
}
return .{ .sentence = altered, .nulls_removed = total_nulls_removed };
}
/// Parses a sentence with an attempt to "fix" the sentence, assuming
/// the sentence is a command with an action and an object. If a valid
/// sentence is found, it will be returned, with the guarantee that
/// sentenceObject and sentenceAction will return non-zero results. If that
/// condition cannot be satisfied, error.NoValidParse will be returned
pub fn adaptiveCommandParse(self: *Parser, sentence: []const u8, replacements: std.StaticStringMap([]const u8)) !ParseTree {
var final_buf: [1024]u8 = undefined;
var altered = try self.applyReplacements(sentence, replacements, &final_buf);
var tree = self.parse(altered) catch |err| {
if (shouldLog()) {
if (altered.len > 0)
std.log.err("Failed to parse sentence: {}\n\t{s}", .{ err, altered })
else
std.log.err("Sentence is empty: not parsing", .{});
}
return err;
};
const tree_ptr: ?*ParseTree = &tree;
errdefer if (tree_ptr) |p| p.deinit();
std.log.debug("adaptiveCommandParse (step 1 - replacements):\n\toriginal:\n\t\t{s}\n\taltered:\n\t\t{s}\n{f}", .{
sentence, altered, tree,
});
const result = try self.removeNullWords(altered, &tree, &final_buf);
altered = result.sentence;
std.log.debug("{d} nulls removed", .{result.nulls_removed});
// Bracketed words are "null"
// words with [?] are "unknown"
// If we have unknowns, I think we want to replace (or if no replacement
@ -475,9 +585,9 @@ pub const Parser = struct {
// Validate that we can extract action and object before returning
const action_words = tree.sentenceAction() catch |err| {
if (shouldLog())
std.log.err("Failed to extract action: {}\n", .{err});
tree.deinit();
// This is the first catch, so we don't want to log here as it
// gets super noisy
std.log.debug("Failed to extract action: {}", .{err});
return err;
// continue;
};
@ -485,7 +595,6 @@ pub const Parser = struct {
if (action_words.len == 0) {
std.log.info("Failed to extract action from sentence", .{});
tree.deinit();
return error.SentenceActionNotFound;
// continue;
}
@ -493,7 +602,6 @@ pub const Parser = struct {
const object_words = tree.sentenceObject() catch |err| {
if (shouldLog())
std.log.err("Failed to extract object: {}\n", .{err});
tree.deinit();
// continue;
return err;
};
@ -501,7 +609,6 @@ pub const Parser = struct {
if (object_words.len == 0) {
std.log.info("Failed to extract object from sentence", .{});
tree.deinit();
// continue;
return error.SentenceObjectNotFound;
}
@ -677,7 +784,7 @@ test "real usage - jack" {
try std.testing.expectEqualStrings("bedroom", sentence_object[1]);
try std.testing.expectEqualStrings("light", sentence_object[2]);
}
test "adaptiveParse successful without replacements" {
test "adaptiveCommandParse successful without replacements" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
@ -687,10 +794,7 @@ test "adaptiveParse successful without replacements" {
});
const sentence = "turn on the kitchen light";
const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
defer std.testing.allocator.free(sentence_z);
var tree = try parser.adaptiveParse(sentence_z, replacements);
var tree = try parser.adaptiveCommandParse(sentence, replacements);
defer tree.deinit();
const action_words = try tree.sentenceAction();
@ -706,7 +810,7 @@ test "adaptiveParse successful without replacements" {
try std.testing.expectEqualStrings("light", object_words[1]);
}
test "adaptiveParse with word replacement" {
test "adaptiveCommandParse with word replacement" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
@ -716,10 +820,8 @@ test "adaptiveParse with word replacement" {
});
const sentence = "turn on the kitchen lake";
const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
defer std.testing.allocator.free(sentence_z);
var tree = try parser.adaptiveParse(sentence_z, replacements);
var tree = try parser.adaptiveCommandParse(sentence, replacements);
defer tree.deinit();
const action_words = try tree.sentenceAction();
@ -735,7 +837,7 @@ test "adaptiveParse with word replacement" {
try std.testing.expectEqualStrings("light", object_words[1]);
}
test "adaptiveParse no valid parse" {
test "adaptiveCommandParse no valid parse" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
@ -745,22 +847,20 @@ test "adaptiveParse no valid parse" {
});
const sentence = "xyz abc def";
const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
defer std.testing.allocator.free(sentence_z);
// const ll = std.testing.log_level;
// defer std.testing.log_level = ll;
// std.testing.log_level = .debug;
try std.testing.expectError(
error.SentenceCreationFailed,
parser.adaptiveParse(
sentence_z,
error.SentenceEmptyAfterNullRemoval,
parser.adaptiveCommandParse(
sentence,
replacements,
),
);
}
test "adaptiveParse with word replacement and null removal" {
test "adaptiveCommandParse with word replacement and null removal" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
@ -770,10 +870,11 @@ test "adaptiveParse with word replacement and null removal" {
});
const sentence = "alexa turn on the kitchen lake";
const sentence_z = try std.testing.allocator.dupeZ(u8, sentence);
defer std.testing.allocator.free(sentence_z);
var tree = try parser.adaptiveParse(sentence_z, replacements);
// const ll = std.testing.log_level;
// defer std.testing.log_level = ll;
// std.testing.log_level = .debug;
var tree = try parser.adaptiveCommandParse(sentence, replacements);
defer tree.deinit();
const action_words = try tree.sentenceAction();
@ -788,3 +889,117 @@ test "adaptiveParse with word replacement and null removal" {
try std.testing.expectEqualStrings("kitchen", object_words[0]);
try std.testing.expectEqualStrings("light", object_words[1]);
}
test "applyReplacements basic replacement" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const replacements = std.StaticStringMap([]const u8).initComptime(.{
.{ "lake", "light" },
});
var final_buf: [1024]u8 = undefined;
const result = try parser.applyReplacements("turn on the kitchen lake", replacements, &final_buf);
try std.testing.expectEqualStrings("turn on the kitchen light", result);
}
test "applyReplacements multiple replacements" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const replacements = std.StaticStringMap([]const u8).initComptime(.{
.{ "lake", "light" },
.{ "kitchen", "bedroom" },
});
var final_buf: [1024]u8 = undefined;
const result = try parser.applyReplacements("turn on the kitchen lake", replacements, &final_buf);
try std.testing.expectEqualStrings("turn on the bedroom light", result);
}
test "applyReplacements empty after replacement" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const replacements = std.StaticStringMap([]const u8).initComptime(.{
.{ "test", "" },
});
var final_buf: [1024]u8 = undefined;
try std.testing.expectError(error.SentenceEmptyAfterReplacements, parser.applyReplacements("test", replacements, &final_buf));
}
test "removeNullWords no nulls" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const sentence = "turn on the light";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings(sentence, result.sentence);
try std.testing.expect(result.nulls_removed == 0);
}
test "removeNullWords - 'i' as null word" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
// sentence from unique samples that originally identified "i" as a null word,
// originally we were removing "i", which screwed up "eighteen", forcing logic
// to make sure that we were processing whole words
const sentence = "ah the next level the out wednesday october first i want alaska eighteen from seattle to boston";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("ah the next level the out first want eighteen from to", result.sentence);
try std.testing.expect(result.nulls_removed == 6);
}
test "removeNullWords - was originally crashing" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
const sentence = "for for big waiver yeah i'm be a masses";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("for for big waiver yeah be a masses", result.sentence);
try std.testing.expect(result.nulls_removed == 1);
}
test "removeNullWords - null word followed by possessive" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
// This shows up as tom[.?] but then a proper 's.p, so breaks the typical pattern of an unknown word after a null word
const sentence = "them sound indiscipline or doesn't have i say or okay so and so he's creating like an excel spreadsheet was tom's and records or whatever it to translate by so that's how he and he could have come conversation about";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("them sound or doesn't have say or okay so and so he's creating like an excel was and records or whatever it to translate by so that's how he and he could have come conversation about", result.sentence);
try std.testing.expect(result.nulls_removed == 4);
}
test "removeNullWords - complex and long processing time" {
var parser = try Parser.init(std.testing.allocator);
defer parser.deinit();
if (!build_options.long_tests) return error.SkipZigTest;
// This one takes a lot of processing. Also, "i" ends up as null, so forces us to strip the word 'i' and not the letter 'i'
const sentence = "i'm i seem to be breaking the website and so i'm training to or a multi city a reservation and it's telling me i can't get from seattle to portland or on monday september twenty ninth am and at that point i was like okay he something completely wrong effect";
var tree = try parser.parse(sentence);
var final_buf: [1024]u8 = undefined;
const result = try parser.removeNullWords(sentence, &tree, &final_buf);
defer tree.deinit();
try std.testing.expectEqualStrings("seem to be breaking the website and so training to or a city a reservation and it's telling me can't get from to or on twenty ninth am and at that point was like okay he something completely wrong effect", result.sentence);
try std.testing.expect(result.nulls_removed == 7);
}