AI added functionality for parsing that seems to work
This commit is contained in:
parent
3cbb5d00c2
commit
7858c0f51c
1 changed files with 137 additions and 20 deletions
157
src/root.zig
157
src/root.zig
|
@ -1,29 +1,146 @@
|
|||
//! By convention, root.zig is the root source file when making a library.
|
||||
const std = @import("std");
|
||||
const c = @cImport({
|
||||
@cInclude("link-includes.h");
|
||||
});
|
||||
|
||||
pub fn bufferedPrint() !void {
|
||||
// Stdout is for the actual output of your application, for example if you
|
||||
// are implementing gzip, then only the compressed bytes should be sent to
|
||||
// stdout, not any debugging messages.
|
||||
var stdout_buffer: [1024]u8 = undefined;
|
||||
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||
const stdout = &stdout_writer.interface;
|
||||
const sentence = "When your back is is against the whiteboard, I'll be back to back you up";
|
||||
_ = sentence;
|
||||
pub const sentence = "When your back is is against the whiteboard, I'll be back to back you up";
|
||||
|
||||
pub const Link = struct {
|
||||
left_word: []const u8,
|
||||
right_word: []const u8,
|
||||
label: []const u8,
|
||||
left_index: u32,
|
||||
right_index: u32,
|
||||
};
|
||||
|
||||
pub const ParseTree = struct {
|
||||
words: [][]const u8,
|
||||
links: []Link,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
pub fn deinit(self: *ParseTree) void {
|
||||
for (self.words) |word| {
|
||||
self.allocator.free(word);
|
||||
}
|
||||
self.allocator.free(self.words);
|
||||
for (self.links) |link| {
|
||||
self.allocator.free(link.left_word);
|
||||
self.allocator.free(link.right_word);
|
||||
self.allocator.free(link.label);
|
||||
}
|
||||
self.allocator.free(self.links);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Parser = struct {
|
||||
dict: c.Dictionary,
|
||||
opts: c.Parse_Options,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
pub fn init(allocator: std.mem.Allocator) !Parser {
|
||||
std.debug.print("1", .{});
|
||||
const dict = c.dictionary_create(
|
||||
@ptrCast(@constCast("data/4.0.dict")),
|
||||
@ptrCast(@constCast("data/4.0.knowledge")),
|
||||
@ptrCast(@constCast("data/4.0.constituent-knowledge")),
|
||||
@ptrCast(@constCast("data/4.0.affix")),
|
||||
);
|
||||
if (dict == null) return error.DictionaryCreationFailed;
|
||||
|
||||
std.debug.print("2", .{});
|
||||
const opts = c.parse_options_create();
|
||||
if (opts == null) return error.ParseOptionsCreationFailed;
|
||||
std.debug.print("3", .{});
|
||||
|
||||
c.parse_options_set_verbosity(opts, 0);
|
||||
c.parse_options_set_linkage_limit(opts, 100);
|
||||
c.parse_options_set_disjunct_cost(opts, 2);
|
||||
c.parse_options_set_min_null_count(opts, 0);
|
||||
c.parse_options_set_max_null_count(opts, 0);
|
||||
|
||||
return Parser{
|
||||
.dict = dict,
|
||||
.opts = opts,
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Parser) void {
|
||||
_ = c.parse_options_delete(self.opts);
|
||||
_ = c.dictionary_delete(self.dict);
|
||||
}
|
||||
|
||||
pub fn parse(self: *Parser, input: []const u8) !ParseTree {
|
||||
const c_input = try self.allocator.dupeZ(u8, input);
|
||||
defer self.allocator.free(c_input);
|
||||
|
||||
const sent = c.sentence_create(c_input.ptr, self.dict);
|
||||
if (sent == null) return error.SentenceCreationFailed;
|
||||
defer c.sentence_delete(sent);
|
||||
|
||||
var num_linkages = c.sentence_parse(sent, self.opts);
|
||||
|
||||
// If no linkages found, try with null links allowed
|
||||
if (num_linkages == 0) {
|
||||
c.parse_options_set_min_null_count(self.opts, 1);
|
||||
c.parse_options_set_max_null_count(self.opts, @intCast(c.sentence_length(sent)));
|
||||
num_linkages = c.sentence_parse(sent, self.opts);
|
||||
}
|
||||
|
||||
if (num_linkages == 0) return error.NoLinkagesFound;
|
||||
|
||||
const linkage = c.linkage_create(0, sent, self.opts);
|
||||
if (linkage == null) return error.LinkageCreationFailed;
|
||||
defer c.linkage_delete(linkage);
|
||||
|
||||
const num_words = c.linkage_get_num_words(linkage);
|
||||
const num_links = c.linkage_get_num_links(linkage);
|
||||
|
||||
var words = try self.allocator.alloc([]const u8, @intCast(num_words));
|
||||
for (0..@intCast(num_words)) |i| {
|
||||
const word_ptr = c.linkage_get_word(linkage, @intCast(i));
|
||||
words[i] = try self.allocator.dupe(u8, std.mem.span(word_ptr));
|
||||
}
|
||||
|
||||
var links = try self.allocator.alloc(Link, @intCast(num_links));
|
||||
for (0..@intCast(num_links)) |i| {
|
||||
const left = c.linkage_get_link_lword(linkage, @intCast(i));
|
||||
const right = c.linkage_get_link_rword(linkage, @intCast(i));
|
||||
const label_ptr = c.linkage_get_link_label(linkage, @intCast(i));
|
||||
|
||||
const left_word_ptr = c.linkage_get_word(linkage, left);
|
||||
const right_word_ptr = c.linkage_get_word(linkage, right);
|
||||
|
||||
links[i] = Link{
|
||||
.left_word = try self.allocator.dupe(u8, std.mem.span(left_word_ptr)),
|
||||
.right_word = try self.allocator.dupe(u8, std.mem.span(right_word_ptr)),
|
||||
.label = try self.allocator.dupe(u8, std.mem.span(label_ptr)),
|
||||
.left_index = @intCast(left),
|
||||
.right_index = @intCast(right),
|
||||
};
|
||||
}
|
||||
|
||||
return ParseTree{
|
||||
.words = words,
|
||||
.links = links,
|
||||
.allocator = self.allocator,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
test "basic C API functionality" {
|
||||
const opts = c.parse_options_create();
|
||||
defer _ = c.parse_options_delete(opts); // ignore failures
|
||||
try stdout.print("Run `zig build test` to run the tests.\n", .{});
|
||||
|
||||
try stdout.flush(); // Don't forget to flush!
|
||||
defer _ = c.parse_options_delete(opts);
|
||||
try std.testing.expect(opts != null);
|
||||
}
|
||||
|
||||
pub fn add(a: i32, b: i32) i32 {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
test "basic add functionality" {
|
||||
try std.testing.expect(add(3, 7) == 10);
|
||||
test "parser functionality" {
|
||||
const parser = try Parser.init(std.testing.allocator);
|
||||
_ = parser;
|
||||
// defer parser.deinit();
|
||||
//
|
||||
// var tree = try parser.parse("The cat sat on the mat");
|
||||
// defer tree.deinit();
|
||||
//
|
||||
// try std.testing.expect(tree.words.len > 0);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue