use constituent tree to find the correct nouns
This commit is contained in:
parent
87ee0f9580
commit
4e1bf8b4b9
1 changed files with 131 additions and 48 deletions
179
src/root.zig
179
src/root.zig
|
@ -3,6 +3,26 @@ const c = @cImport({
|
|||
@cInclude("link-includes.h");
|
||||
});
|
||||
|
||||
pub const ConstituentNode = struct {
|
||||
label: []const u8,
|
||||
/// index of start for the linkage, does not include LEFT_WALL/RIGHT_WALL
|
||||
start: usize,
|
||||
/// index of end for the linkage, does not include LEFT_WALL/RIGHT_WALL
|
||||
end: usize,
|
||||
child: ?*ConstituentNode,
|
||||
next: ?*ConstituentNode,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
pub fn deinit(self: *ConstituentNode) void {
|
||||
self.allocator.free(self.label);
|
||||
if (self.child) |child|
|
||||
child.deinit();
|
||||
if (self.next) |next|
|
||||
next.deinit();
|
||||
self.allocator.destroy(self);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Link = struct {
|
||||
left_word: []const u8,
|
||||
right_word: []const u8,
|
||||
|
@ -14,6 +34,7 @@ pub const Link = struct {
|
|||
pub const ParseTree = struct {
|
||||
words: [][]const u8,
|
||||
links: []Link,
|
||||
constituent_tree: ?*ConstituentNode,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
pub fn deinit(self: *ParseTree) void {
|
||||
|
@ -27,8 +48,28 @@ pub const ParseTree = struct {
|
|||
self.allocator.free(link.label);
|
||||
}
|
||||
self.allocator.free(self.links);
|
||||
if (self.constituent_tree) |tree| {
|
||||
tree.deinit();
|
||||
}
|
||||
}
|
||||
pub fn format(self: ParseTree, writer: *std.io.Writer) std.io.Writer.Error!void {
|
||||
try writer.writeAll("Words: ");
|
||||
for (self.words, 0..) |word, i| {
|
||||
try writer.print("{d}: '{s}' ", .{ i, word });
|
||||
}
|
||||
try writer.print("\n\nLinks ({} total):\n", .{self.links.len});
|
||||
|
||||
for (self.links, 0..) |link, i| {
|
||||
try writer.print(" [{d}] '{s}' --{s}--> '{s}'\n", .{ i, link.left_word, link.label, link.right_word });
|
||||
}
|
||||
|
||||
try writer.writeAll("\nConstituent Tree:\n");
|
||||
if (self.constituent_tree) |tree| {
|
||||
try self.printConstituentNode(writer, tree, 0);
|
||||
} else {
|
||||
try writer.writeAll(" (no constituent tree)\n");
|
||||
}
|
||||
}
|
||||
pub fn firstVerb(self: *ParseTree) ?[]const u8 {
|
||||
for (self.words) |word| {
|
||||
if (std.mem.endsWith(u8, word, ".v")) {
|
||||
|
@ -72,46 +113,68 @@ pub const ParseTree = struct {
|
|||
pub fn sentenceObject(self: *ParseTree) ![][]const u8 {
|
||||
var al: std.ArrayList([]const u8) = .{};
|
||||
defer al.deinit(self.allocator);
|
||||
// var noun: ?usize = null;
|
||||
for (self.words, 0..) |word, i| {
|
||||
if (std.mem.endsWith(u8, word, ".n")) {
|
||||
// adjective or noun
|
||||
for (self.links) |l| {
|
||||
if (@as(usize, l.left_index) == i and
|
||||
std.mem.startsWith(u8, l.label, "AN"))
|
||||
{
|
||||
// this is an adjective. We need to add both it and the noun to the list
|
||||
// https://www.link.cs.cmu.edu/link/dict/section-AN.html
|
||||
try al.append(self.allocator, word[0 .. word.len - 2]);
|
||||
try al.append(
|
||||
self.allocator,
|
||||
l.right_word[0 .. std.mem.lastIndexOfScalar(u8, l.right_word, '.') orelse l.right_word.len],
|
||||
);
|
||||
return al.toOwnedSlice(self.allocator);
|
||||
}
|
||||
}
|
||||
|
||||
// no adjective, just this noun
|
||||
try al.append(self.allocator, word[0 .. word.len - 2]);
|
||||
return al.toOwnedSlice(self.allocator);
|
||||
}
|
||||
if (self.constituent_tree == null) return error.NoConstituentTree;
|
||||
var node = self.constituent_tree.?;
|
||||
// https://www.link.cs.cmu.edu/link/dict/section-S.html
|
||||
if (!std.mem.startsWith(u8, node.label, "S"))
|
||||
return error.NoSubjectNounFound;
|
||||
if (node.child == null) @panic("S node must have a child");
|
||||
node = node.child.?;
|
||||
|
||||
// I'm not entirely sure this will be universally true, but we'll see
|
||||
// in real life testing
|
||||
// https://www.link.cs.cmu.edu/link/dict/section-V.html
|
||||
// https://www.link.cs.cmu.edu/link/dict/section-P.html
|
||||
if (!std.mem.eql(u8, node.label, "VP"))
|
||||
return error.VerbBeLinkageNotFound;
|
||||
|
||||
if (node.child == null) @panic("VP node must have a child");
|
||||
node = node.child.?;
|
||||
|
||||
// We need the next node, which should be our PP node
|
||||
if (node.next == null) @panic("VP node must have a child with two members");
|
||||
node = node.next.?;
|
||||
|
||||
// https://opencog.github.io/link-grammar-website/dict/section-PP.html
|
||||
if (!std.mem.startsWith(u8, node.label, "PP"))
|
||||
return error.PastParticipleLinkageNotFound;
|
||||
|
||||
if (node.child == null) @panic("PP node must have a child");
|
||||
node = node.child.?;
|
||||
|
||||
// At this point we should be pointing to something like "on" or "off"
|
||||
// We need the next node, which is the money shot
|
||||
if (node.next == null) @panic("PP node must have a child with two members");
|
||||
node = node.next.?;
|
||||
|
||||
// N is "No", which doesn't make sense here, but there is no specific
|
||||
// NP section, so I'm not sure what we're here
|
||||
if (!std.mem.eql(u8, node.label, "NP"))
|
||||
return error.NPLinkageNotFound;
|
||||
for (node.start..node.end + 1) |i| {
|
||||
// we need to add 1 to this index so we can avoid LEFT-WALL
|
||||
const inx = i + 1;
|
||||
// we only want the nouns out of this...
|
||||
const word = self.words[inx];
|
||||
if (!std.mem.endsWith(u8, word, ".n")) continue;
|
||||
const trimmed = word[0 .. word.len - 2];
|
||||
try al.append(self.allocator, trimmed);
|
||||
}
|
||||
return al.toOwnedSlice(self.allocator);
|
||||
}
|
||||
|
||||
// TODO: This should be a format function
|
||||
pub fn printTree(self: *const ParseTree, writer: *std.Io.Writer) !void {
|
||||
try writer.writeAll("Parse Tree:\n");
|
||||
try writer.writeAll("Words: ");
|
||||
for (self.words) |word| {
|
||||
try writer.print("'{s}' ", .{word});
|
||||
fn printConstituentNode(self: *const ParseTree, writer: *std.Io.Writer, node: *const ConstituentNode, depth: usize) !void {
|
||||
for (0..depth) |_| {
|
||||
try writer.writeAll(" ");
|
||||
}
|
||||
try writer.print("\n\nLinks ({} total):\n", .{self.links.len});
|
||||
|
||||
for (self.links, 0..) |link, i| {
|
||||
try writer.print(" [{d}] '{s}' --{s}--> '{s}'\n", .{ i, link.left_word, link.label, link.right_word });
|
||||
try writer.print("[{s}] ({}-{})\n", .{ node.label, node.start, node.end });
|
||||
if (node.child) |child| {
|
||||
try self.printConstituentNode(writer, child, depth + 1);
|
||||
}
|
||||
if (node.next) |next| {
|
||||
try self.printConstituentNode(writer, next, depth);
|
||||
}
|
||||
try writer.writeAll("\n");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -243,12 +306,45 @@ pub const Parser = struct {
|
|||
};
|
||||
}
|
||||
|
||||
// Extract constituent tree structure
|
||||
const constituent_tree = try self.constituentTree(linkage);
|
||||
|
||||
return ParseTree{
|
||||
.words = words,
|
||||
.links = links,
|
||||
.constituent_tree = constituent_tree,
|
||||
.allocator = self.allocator,
|
||||
};
|
||||
}
|
||||
|
||||
fn constituentTree(self: *Parser, linkage: c.Linkage) !?*ConstituentNode {
|
||||
// struct CNode_s {
|
||||
// char * label;
|
||||
// CNode * child;
|
||||
// CNode * next;
|
||||
// int start, end;
|
||||
// };
|
||||
const constituent_tree = c.linkage_constituent_tree(linkage);
|
||||
if (constituent_tree == null) return null;
|
||||
defer c.linkage_free_constituent_tree(constituent_tree);
|
||||
|
||||
return try parseCNode(self.allocator, constituent_tree);
|
||||
}
|
||||
fn parseCNode(allocator: std.mem.Allocator, cnode: *c.struct_CNode_s) !*ConstituentNode {
|
||||
const node = try allocator.create(ConstituentNode);
|
||||
errdefer allocator.destroy(node);
|
||||
const label = try allocator.dupe(u8, std.mem.span(cnode.label));
|
||||
errdefer allocator.free(label);
|
||||
node.* = ConstituentNode{
|
||||
.label = label,
|
||||
.start = @intCast(cnode.start),
|
||||
.end = @intCast(cnode.end),
|
||||
.child = if (cnode.child != null) try parseCNode(allocator, cnode.child) else null,
|
||||
.next = if (cnode.next != null) try parseCNode(allocator, cnode.next) else null,
|
||||
.allocator = allocator,
|
||||
};
|
||||
return node;
|
||||
}
|
||||
};
|
||||
|
||||
test "basic C API functionality" {
|
||||
|
@ -264,10 +360,6 @@ test "parser functionality" {
|
|||
var tree = try parser.parse("The cat sat on the mat");
|
||||
defer tree.deinit();
|
||||
|
||||
// for (tree.links) |w|
|
||||
// std.debug.print("l: '{s}', r: '{s}', label: '{s}'\n", .{ w.left_word, w.right_word, w.label });
|
||||
//
|
||||
// std.debug.print("{any}\n", .{tree.words.len});
|
||||
// LEFT-WALL
|
||||
// the
|
||||
// cat.n
|
||||
|
@ -293,12 +385,6 @@ test "real usage" {
|
|||
var tree = try parser.parse("turn on the bedroom light");
|
||||
defer tree.deinit();
|
||||
|
||||
// for (tree.links) |w|
|
||||
// std.debug.print("l: '{s}', r: '{s}', label: '{s}'\n", .{ w.left_word, w.right_word, w.label });
|
||||
//
|
||||
// for (tree.words) |w|
|
||||
// std.debug.print("{s}\n", .{w});
|
||||
// std.debug.print("{any}\n", .{tree.words.len});
|
||||
try std.testing.expect(tree.words.len == 7); // 5 + LEFT_WALL / RIGHT_WALL
|
||||
try std.testing.expectEqualStrings("turn", tree.firstVerb().?);
|
||||
const sentence_action = try tree.sentenceAction();
|
||||
|
@ -316,20 +402,17 @@ test "real usage - jack" {
|
|||
var parser = try Parser.init(std.testing.allocator);
|
||||
defer parser.deinit();
|
||||
|
||||
if (true) return error.SkipZigTest;
|
||||
var tree = try parser.parse("turn on jack bedroom light");
|
||||
defer tree.deinit();
|
||||
|
||||
var stderr_writer = std.fs.File.stderr().writer(&.{});
|
||||
const stderr = &stderr_writer.interface;
|
||||
try tree.printTree(stderr); // well, this is stupid. We shall fix later
|
||||
// std.debug.print("{f}\n", .{tree});
|
||||
try std.testing.expect(tree.words.len == 7); // 5 + LEFT_WALL / RIGHT_WALL
|
||||
try std.testing.expectEqualStrings("turn", tree.firstVerb().?);
|
||||
const sentence_action = try tree.sentenceAction();
|
||||
defer std.testing.allocator.free(sentence_action);
|
||||
try std.testing.expect(sentence_action.len == 2);
|
||||
try std.testing.expectEqualStrings("turn", sentence_action[0]);
|
||||
try std.testing.expectEqualStrings("on", sentence_action[1]);
|
||||
|
||||
const sentence_object = try tree.sentenceObject();
|
||||
defer std.testing.allocator.free(sentence_object);
|
||||
try std.testing.expect(sentence_object.len == 3);
|
||||
|
|
Loading…
Add table
Reference in a new issue