diff --git a/src/root.zig b/src/root.zig index bcbcb6a..9b00160 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,29 +1,146 @@ -//! By convention, root.zig is the root source file when making a library. const std = @import("std"); const c = @cImport({ @cInclude("link-includes.h"); }); -pub fn bufferedPrint() !void { - // Stdout is for the actual output of your application, for example if you - // are implementing gzip, then only the compressed bytes should be sent to - // stdout, not any debugging messages. - var stdout_buffer: [1024]u8 = undefined; - var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); - const stdout = &stdout_writer.interface; - const sentence = "When your back is is against the whiteboard, I'll be back to back you up"; - _ = sentence; +pub const sentence = "When your back is is against the whiteboard, I'll be back to back you up"; + +pub const Link = struct { + left_word: []const u8, + right_word: []const u8, + label: []const u8, + left_index: u32, + right_index: u32, +}; + +pub const ParseTree = struct { + words: [][]const u8, + links: []Link, + allocator: std.mem.Allocator, + + pub fn deinit(self: *ParseTree) void { + for (self.words) |word| { + self.allocator.free(word); + } + self.allocator.free(self.words); + for (self.links) |link| { + self.allocator.free(link.left_word); + self.allocator.free(link.right_word); + self.allocator.free(link.label); + } + self.allocator.free(self.links); + } +}; + +pub const Parser = struct { + dict: c.Dictionary, + opts: c.Parse_Options, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) !Parser { + std.debug.print("1", .{}); + const dict = c.dictionary_create( + @ptrCast(@constCast("data/4.0.dict")), + @ptrCast(@constCast("data/4.0.knowledge")), + @ptrCast(@constCast("data/4.0.constituent-knowledge")), + @ptrCast(@constCast("data/4.0.affix")), + ); + if (dict == null) return error.DictionaryCreationFailed; + + std.debug.print("2", .{}); + const opts = c.parse_options_create(); + if (opts == null) return error.ParseOptionsCreationFailed; + std.debug.print("3", .{}); + + c.parse_options_set_verbosity(opts, 0); + c.parse_options_set_linkage_limit(opts, 100); + c.parse_options_set_disjunct_cost(opts, 2); + c.parse_options_set_min_null_count(opts, 0); + c.parse_options_set_max_null_count(opts, 0); + + return Parser{ + .dict = dict, + .opts = opts, + .allocator = allocator, + }; + } + + pub fn deinit(self: *Parser) void { + _ = c.parse_options_delete(self.opts); + _ = c.dictionary_delete(self.dict); + } + + pub fn parse(self: *Parser, input: []const u8) !ParseTree { + const c_input = try self.allocator.dupeZ(u8, input); + defer self.allocator.free(c_input); + + const sent = c.sentence_create(c_input.ptr, self.dict); + if (sent == null) return error.SentenceCreationFailed; + defer c.sentence_delete(sent); + + var num_linkages = c.sentence_parse(sent, self.opts); + + // If no linkages found, try with null links allowed + if (num_linkages == 0) { + c.parse_options_set_min_null_count(self.opts, 1); + c.parse_options_set_max_null_count(self.opts, @intCast(c.sentence_length(sent))); + num_linkages = c.sentence_parse(sent, self.opts); + } + + if (num_linkages == 0) return error.NoLinkagesFound; + + const linkage = c.linkage_create(0, sent, self.opts); + if (linkage == null) return error.LinkageCreationFailed; + defer c.linkage_delete(linkage); + + const num_words = c.linkage_get_num_words(linkage); + const num_links = c.linkage_get_num_links(linkage); + + var words = try self.allocator.alloc([]const u8, @intCast(num_words)); + for (0..@intCast(num_words)) |i| { + const word_ptr = c.linkage_get_word(linkage, @intCast(i)); + words[i] = try self.allocator.dupe(u8, std.mem.span(word_ptr)); + } + + var links = try self.allocator.alloc(Link, @intCast(num_links)); + for (0..@intCast(num_links)) |i| { + const left = c.linkage_get_link_lword(linkage, @intCast(i)); + const right = c.linkage_get_link_rword(linkage, @intCast(i)); + const label_ptr = c.linkage_get_link_label(linkage, @intCast(i)); + + const left_word_ptr = c.linkage_get_word(linkage, left); + const right_word_ptr = c.linkage_get_word(linkage, right); + + links[i] = Link{ + .left_word = try self.allocator.dupe(u8, std.mem.span(left_word_ptr)), + .right_word = try self.allocator.dupe(u8, std.mem.span(right_word_ptr)), + .label = try self.allocator.dupe(u8, std.mem.span(label_ptr)), + .left_index = @intCast(left), + .right_index = @intCast(right), + }; + } + + return ParseTree{ + .words = words, + .links = links, + .allocator = self.allocator, + }; + } +}; + +test "basic C API functionality" { const opts = c.parse_options_create(); - defer _ = c.parse_options_delete(opts); // ignore failures - try stdout.print("Run `zig build test` to run the tests.\n", .{}); - - try stdout.flush(); // Don't forget to flush! + defer _ = c.parse_options_delete(opts); + try std.testing.expect(opts != null); } -pub fn add(a: i32, b: i32) i32 { - return a + b; -} - -test "basic add functionality" { - try std.testing.expect(add(3, 7) == 10); +test "parser functionality" { + const parser = try Parser.init(std.testing.allocator); + _ = parser; + // defer parser.deinit(); + // + // var tree = try parser.parse("The cat sat on the mat"); + // defer tree.deinit(); + // + // try std.testing.expect(tree.words.len > 0); }