AI added functionality for parsing that seems to work

This commit is contained in:
Emil Lerch 2025-09-18 10:20:28 -07:00
parent 3cbb5d00c2
commit 7858c0f51c
Signed by: lobo
GPG key ID: A7B62D657EF764F8

View file

@ -1,29 +1,146 @@
//! By convention, root.zig is the root source file when making a library.
const std = @import("std");
const c = @cImport({
@cInclude("link-includes.h");
});
pub fn bufferedPrint() !void {
// Stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
var stdout_buffer: [1024]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
const sentence = "When your back is is against the whiteboard, I'll be back to back you up";
_ = sentence;
pub const sentence = "When your back is is against the whiteboard, I'll be back to back you up";
pub const Link = struct {
left_word: []const u8,
right_word: []const u8,
label: []const u8,
left_index: u32,
right_index: u32,
};
pub const ParseTree = struct {
words: [][]const u8,
links: []Link,
allocator: std.mem.Allocator,
pub fn deinit(self: *ParseTree) void {
for (self.words) |word| {
self.allocator.free(word);
}
self.allocator.free(self.words);
for (self.links) |link| {
self.allocator.free(link.left_word);
self.allocator.free(link.right_word);
self.allocator.free(link.label);
}
self.allocator.free(self.links);
}
};
pub const Parser = struct {
dict: c.Dictionary,
opts: c.Parse_Options,
allocator: std.mem.Allocator,
pub fn init(allocator: std.mem.Allocator) !Parser {
std.debug.print("1", .{});
const dict = c.dictionary_create(
@ptrCast(@constCast("data/4.0.dict")),
@ptrCast(@constCast("data/4.0.knowledge")),
@ptrCast(@constCast("data/4.0.constituent-knowledge")),
@ptrCast(@constCast("data/4.0.affix")),
);
if (dict == null) return error.DictionaryCreationFailed;
std.debug.print("2", .{});
const opts = c.parse_options_create();
if (opts == null) return error.ParseOptionsCreationFailed;
std.debug.print("3", .{});
c.parse_options_set_verbosity(opts, 0);
c.parse_options_set_linkage_limit(opts, 100);
c.parse_options_set_disjunct_cost(opts, 2);
c.parse_options_set_min_null_count(opts, 0);
c.parse_options_set_max_null_count(opts, 0);
return Parser{
.dict = dict,
.opts = opts,
.allocator = allocator,
};
}
pub fn deinit(self: *Parser) void {
_ = c.parse_options_delete(self.opts);
_ = c.dictionary_delete(self.dict);
}
pub fn parse(self: *Parser, input: []const u8) !ParseTree {
const c_input = try self.allocator.dupeZ(u8, input);
defer self.allocator.free(c_input);
const sent = c.sentence_create(c_input.ptr, self.dict);
if (sent == null) return error.SentenceCreationFailed;
defer c.sentence_delete(sent);
var num_linkages = c.sentence_parse(sent, self.opts);
// If no linkages found, try with null links allowed
if (num_linkages == 0) {
c.parse_options_set_min_null_count(self.opts, 1);
c.parse_options_set_max_null_count(self.opts, @intCast(c.sentence_length(sent)));
num_linkages = c.sentence_parse(sent, self.opts);
}
if (num_linkages == 0) return error.NoLinkagesFound;
const linkage = c.linkage_create(0, sent, self.opts);
if (linkage == null) return error.LinkageCreationFailed;
defer c.linkage_delete(linkage);
const num_words = c.linkage_get_num_words(linkage);
const num_links = c.linkage_get_num_links(linkage);
var words = try self.allocator.alloc([]const u8, @intCast(num_words));
for (0..@intCast(num_words)) |i| {
const word_ptr = c.linkage_get_word(linkage, @intCast(i));
words[i] = try self.allocator.dupe(u8, std.mem.span(word_ptr));
}
var links = try self.allocator.alloc(Link, @intCast(num_links));
for (0..@intCast(num_links)) |i| {
const left = c.linkage_get_link_lword(linkage, @intCast(i));
const right = c.linkage_get_link_rword(linkage, @intCast(i));
const label_ptr = c.linkage_get_link_label(linkage, @intCast(i));
const left_word_ptr = c.linkage_get_word(linkage, left);
const right_word_ptr = c.linkage_get_word(linkage, right);
links[i] = Link{
.left_word = try self.allocator.dupe(u8, std.mem.span(left_word_ptr)),
.right_word = try self.allocator.dupe(u8, std.mem.span(right_word_ptr)),
.label = try self.allocator.dupe(u8, std.mem.span(label_ptr)),
.left_index = @intCast(left),
.right_index = @intCast(right),
};
}
return ParseTree{
.words = words,
.links = links,
.allocator = self.allocator,
};
}
};
test "basic C API functionality" {
const opts = c.parse_options_create();
defer _ = c.parse_options_delete(opts); // ignore failures
try stdout.print("Run `zig build test` to run the tests.\n", .{});
try stdout.flush(); // Don't forget to flush!
defer _ = c.parse_options_delete(opts);
try std.testing.expect(opts != null);
}
pub fn add(a: i32, b: i32) i32 {
return a + b;
}
test "basic add functionality" {
try std.testing.expect(add(3, 7) == 10);
test "parser functionality" {
const parser = try Parser.init(std.testing.allocator);
_ = parser;
// defer parser.deinit();
//
// var tree = try parser.parse("The cat sat on the mat");
// defer tree.deinit();
//
// try std.testing.expect(tree.words.len > 0);
}