diff --git a/src/root.zig b/src/root.zig index 5f63999..753df65 100644 --- a/src/root.zig +++ b/src/root.zig @@ -365,92 +365,152 @@ pub const Parser = struct { /// sentenceObject and sentenceAction will return non-zero results. If that /// condition cannot be satisfied, error.NoValidParse will be returned pub fn adaptiveParse(self: *Parser, sentence: [:0]const u8, replacements: std.StaticStringMap([]const u8)) !ParseTree { - var sentence_to_try: [:0]const u8 = sentence; - var replaced_sentence: ?[:0]u8 = null; - defer if (replaced_sentence) |s| self.allocator.free(s); + var altered_buf: [1024]u8 = undefined; const replacement_keys = replacements.keys(); const replacement_values = replacements.values(); - for (0..replacement_keys.len + 1) |attempt| { - if (attempt > 0) { - // Create sentence with replacements - if (replaced_sentence) |s| self.allocator.free(s); - const temp = try std.mem.replaceOwned( - u8, - self.allocator, - sentence_to_try, - replacement_keys[attempt - 1], - replacement_values[attempt - 1], - ); - replaced_sentence = try self.allocator.dupeZ(u8, temp); - self.allocator.free(temp); - if (std.mem.eql(u8, sentence_to_try, replaced_sentence.?)) continue; - sentence_to_try = replaced_sentence.?; - std.log.info("Replaced word '{s}' in sentence with replacement '{s}'. Trying sentence: {s}", .{ - replacement_keys[attempt - 1], - replacement_values[attempt - 1], - sentence_to_try, + var altered = sentence; + + // Step 1: Replacements + for (replacement_keys, replacement_values) |key, value| { + const altered_size = std.mem.replacementSize( + u8, + altered, + key, + value, + ); + if (altered_size > 1023) { + if (shouldLog()) + std.log.err("Sentence too long (>1023): {s}", .{altered}); + return error.SentenceTooLong; + } + const replacement_count = std.mem.replace( + u8, + altered, + key, + value, + &altered_buf, + ); + + altered_buf[altered_size] = 0; // add sentinel + altered = altered_buf[0..altered_size :0]; + + if (replacement_count > 0) + // we have altered the deal. Pray we don't alter it further + std.log.info("Replaced '{s}' in sentence with replacement '{s}' {d} times. Sentence now:\n\t{s}", .{ + key, + value, + replacement_count, + altered, }); + } + var tree = self.parse(altered) catch |err| { + if (shouldLog()) + std.log.err("Failed to parse sentence: {}\n\t{s}", .{ err, altered }); + // continue; + return err; + }; + + std.log.debug("adaptiveParse (step 1 - replacements):\n\toriginal:\n\t\t{s}\n\taltered:\n\t\t{s}\n{f}", .{ + sentence, + altered, + tree, + }); + + // Step 2: replace null words + var nulls_removed = true; + while (nulls_removed) { + nulls_removed = false; + for (tree.words) |word| { + if (std.mem.indexOf(u8, word, "[?]")) |i| { + nulls_removed = true; + // We need to alter this further + const trimmed = word[0..i]; + const removals = std.mem.replace( + u8, + altered, + trimmed, + "", + &altered_buf, + ); + const len = altered.len - (removals * trimmed.len); + altered_buf[len] = 0; + altered = altered_buf[0..len :0]; + + std.log.info("Removed null word '{s}' in sentence {d} time(s). Sentence now:\n\t{s}", .{ + trimmed, + removals, + altered, + }); + // Retry parsing with the word removed + tree.deinit(); + tree = self.parse(altered) catch |err| { + if (shouldLog()) + std.log.err("Failed to parse altered sentence: {}\n\t{s}", .{ err, altered }); + // continue; + return err; + }; + break; // we will remove these words conservatively... + } } - - var tree = self.parse(sentence_to_try) catch |err| { - std.log.err("Failed to parse sentence: {}\n", .{err}); - continue; - }; - - // Bracketed words are "null" - // words with [?] are "unknown" - // If we have unknowns, I think we want to replace (or if no replacement - // is available, strip) them. Then re-parse immediately, because we're - // in a bad enough state that we might screw something else up - // - // If there are nulls, then we should walk those nulls and look for - // replacement values. If any replacements have been performed, then - // try re-parsing at that point. - // - // This might all be best done in the library itself. Pass in the - // map of replacement words and let it churn. - // - // For null words, I think we can use this replacement loop - // if (tree.hasUnknowns()) // then what? - // {} - - // Validate that we can extract action and object before returning - const action_words = tree.sentenceAction() catch |err| { - if (!builtin.is_test) - std.log.err("Failed to extract action: {}\n", .{err}); - tree.deinit(); - continue; - }; - defer self.allocator.free(action_words); - - if (action_words.len == 0) { - if (!builtin.is_test) - std.log.info("Failed to extract action from sentence", .{}); - tree.deinit(); - continue; - } - - const object_words = tree.sentenceObject() catch |err| { - if (!builtin.is_test) - std.log.err("Failed to extract object: {}\n", .{err}); - tree.deinit(); - continue; - }; - defer self.allocator.free(object_words); - - if (object_words.len == 0) { - if (!builtin.is_test) - std.log.info("Failed to extract object from sentence", .{}); - tree.deinit(); - continue; - } - - return tree; } - return error.NoValidParse; + // Bracketed words are "null" + // words with [?] are "unknown" + // If we have unknowns, I think we want to replace (or if no replacement + // is available, strip) them. Then re-parse immediately, because we're + // in a bad enough state that we might screw something else up + // + // If there are nulls, then we should walk those nulls and look for + // replacement values. If any replacements have been performed, then + // try re-parsing at that point. + // + // This might all be best done in the library itself. Pass in the + // map of replacement words and let it churn. + // + // For null words, I think we can use this replacement loop + // if (tree.hasUnknowns()) // then what? + // {} + + // Validate that we can extract action and object before returning + const action_words = tree.sentenceAction() catch |err| { + if (shouldLog()) + std.log.err("Failed to extract action: {}\n", .{err}); + tree.deinit(); + return err; + // continue; + }; + defer self.allocator.free(action_words); + + if (action_words.len == 0) { + std.log.info("Failed to extract action from sentence", .{}); + tree.deinit(); + return error.SentenceActionNotFound; + // continue; + } + + const object_words = tree.sentenceObject() catch |err| { + if (shouldLog()) + std.log.err("Failed to extract object: {}\n", .{err}); + tree.deinit(); + // continue; + return err; + }; + defer self.allocator.free(object_words); + + if (object_words.len == 0) { + std.log.info("Failed to extract object from sentence", .{}); + tree.deinit(); + // continue; + return error.SentenceObjectNotFound; + } + + return tree; + } + + inline fn shouldLog() bool { + return !builtin.is_test or std.testing.log_level != .warn; // .warn is default testing log level } pub fn parse(self: *Parser, input: []const u8) !ParseTree { @@ -688,5 +748,43 @@ test "adaptiveParse no valid parse" { const sentence_z = try std.testing.allocator.dupeZ(u8, sentence); defer std.testing.allocator.free(sentence_z); - try std.testing.expectError(error.NoValidParse, parser.adaptiveParse(sentence_z, replacements)); + // const ll = std.testing.log_level; + // defer std.testing.log_level = ll; + // std.testing.log_level = .debug; + try std.testing.expectError( + error.SentenceCreationFailed, + parser.adaptiveParse( + sentence_z, + replacements, + ), + ); +} + +test "adaptiveParse with word replacement and null removal" { + var parser = try Parser.init(std.testing.allocator); + defer parser.deinit(); + + const replacements = std.StaticStringMap([]const u8).initComptime(.{ + .{ "lake", "light" }, + .{ "like", "light" }, + }); + + const sentence = "alexa turn on the kitchen lake"; + const sentence_z = try std.testing.allocator.dupeZ(u8, sentence); + defer std.testing.allocator.free(sentence_z); + + var tree = try parser.adaptiveParse(sentence_z, replacements); + defer tree.deinit(); + + const action_words = try tree.sentenceAction(); + defer std.testing.allocator.free(action_words); + try std.testing.expect(action_words.len == 2); + try std.testing.expectEqualStrings("turn", action_words[0]); + try std.testing.expectEqualStrings("on", action_words[1]); + + const object_words = try tree.sentenceObject(); + defer std.testing.allocator.free(object_words); + try std.testing.expect(object_words.len == 2); + try std.testing.expectEqualStrings("kitchen", object_words[0]); + try std.testing.expectEqualStrings("light", object_words[1]); }