use arena in parse method - no perf change
This commit is contained in:
parent
b37fb7fb1a
commit
415aa30f75
2 changed files with 64 additions and 55 deletions
|
|
@ -85,10 +85,7 @@ pub fn main() !void {
|
||||||
|
|
||||||
var reader = std.Io.Reader.fixed(data.items);
|
var reader = std.Io.Reader.fixed(data.items);
|
||||||
const records = try srf.parse(&reader, srf_allocator, .{});
|
const records = try srf.parse(&reader, srf_allocator, .{});
|
||||||
defer {
|
defer records.deinit();
|
||||||
for (records.items) |r| r.deinit(srf_allocator);
|
|
||||||
srf_allocator.free(records.items);
|
|
||||||
}
|
|
||||||
} else if (std.mem.eql(u8, format, "jsonl")) {
|
} else if (std.mem.eql(u8, format, "jsonl")) {
|
||||||
var lines = std.mem.splitScalar(u8, data.items, '\n');
|
var lines = std.mem.splitScalar(u8, data.items, '\n');
|
||||||
while (lines.next()) |line| {
|
while (lines.next()) |line| {
|
||||||
|
|
|
||||||
114
src/srf.zig
114
src/srf.zig
|
|
@ -16,6 +16,7 @@ pub const ParseLineError = struct {
|
||||||
pub const Diagnostics = struct {
|
pub const Diagnostics = struct {
|
||||||
errors: *std.ArrayList(ParseLineError),
|
errors: *std.ArrayList(ParseLineError),
|
||||||
stop_after: usize = 10,
|
stop_after: usize = 10,
|
||||||
|
arena: std.heap.ArenaAllocator,
|
||||||
|
|
||||||
pub fn addError(self: Diagnostics, allocator: std.mem.Allocator, err: ParseLineError) ParseError!void {
|
pub fn addError(self: Diagnostics, allocator: std.mem.Allocator, err: ParseLineError) ParseError!void {
|
||||||
if (self.errors.items.len >= self.stop_after) {
|
if (self.errors.items.len >= self.stop_after) {
|
||||||
|
|
@ -24,9 +25,14 @@ pub const Diagnostics = struct {
|
||||||
}
|
}
|
||||||
try self.errors.append(allocator, err);
|
try self.errors.append(allocator, err);
|
||||||
}
|
}
|
||||||
pub fn deinit(self: Diagnostics, allocator: std.mem.Allocator) void {
|
pub fn deinit(self: RecordList) void {
|
||||||
for (self.errors) |e| e.deinit(allocator);
|
// From parse, three things can happen:
|
||||||
self.errors.deinit(allocator);
|
// 1. Happy path - record comes back, deallocation happens on that deinit
|
||||||
|
// 2. Errors is returned, no diagnostics provided. Deallocation happens in parse on errdefer
|
||||||
|
// 3. Errors are returned, diagnostics provided. Deallocation happens here
|
||||||
|
const child_allocator = self.arena.child_allocator;
|
||||||
|
self.arena.deinit();
|
||||||
|
child_allocator.destroy(self.arena);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -260,12 +266,13 @@ pub const Record = struct {
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const RecordList = struct {
|
pub const RecordList = struct {
|
||||||
items: []Record,
|
list: std.ArrayList(Record),
|
||||||
|
arena: *std.heap.ArenaAllocator,
|
||||||
|
|
||||||
pub fn deinit(self: RecordList, allocator: std.mem.Allocator) void {
|
pub fn deinit(self: RecordList) void {
|
||||||
for (self.items) |r|
|
const child_allocator = self.arena.child_allocator;
|
||||||
r.deinit(allocator);
|
self.arena.deinit();
|
||||||
allocator.free(self.items);
|
child_allocator.destroy(self.arena);
|
||||||
}
|
}
|
||||||
pub fn format(self: RecordList, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
pub fn format(self: RecordList, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||||
_ = self;
|
_ = self;
|
||||||
|
|
@ -312,41 +319,46 @@ pub const ParseState = struct {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordList {
|
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordList {
|
||||||
|
// create an arena allocator for everytyhing related to parsing
|
||||||
|
const arena: *std.heap.ArenaAllocator = try allocator.create(std.heap.ArenaAllocator);
|
||||||
|
errdefer if (options.diagnostics == null) allocator.destroy(arena);
|
||||||
|
arena.* = .init(allocator);
|
||||||
|
errdefer if (options.diagnostics == null) arena.deinit();
|
||||||
|
const aa = arena.allocator();
|
||||||
var long_format = false; // Default to compact format
|
var long_format = false; // Default to compact format
|
||||||
var require_eof = false; // Default to no eof required
|
var require_eof = false; // Default to no eof required
|
||||||
var eof_found: bool = false;
|
var eof_found: bool = false;
|
||||||
var state = ParseState{ .line = 0, .column = 0, .partial_line_column = 0, .reader = reader };
|
var state = ParseState{ .line = 0, .column = 0, .partial_line_column = 0, .reader = reader };
|
||||||
const first_line = nextLine(reader, &state) orelse return ParseError.ParseFailed;
|
const first_line = nextLine(reader, &state) orelse return ParseError.ParseFailed;
|
||||||
|
|
||||||
if (try Directive.parse(allocator, first_line, state, options)) |d| {
|
if (try Directive.parse(aa, first_line, state, options)) |d| {
|
||||||
if (d != .magic) try parseError(allocator, options, "Magic header not found on first line", state);
|
if (d != .magic) try parseError(aa, options, "Magic header not found on first line", state);
|
||||||
} else try parseError(allocator, options, "Magic header not found on first line", state);
|
} else try parseError(aa, options, "Magic header not found on first line", state);
|
||||||
|
|
||||||
// Loop through the header material and configure our main parsing
|
// Loop through the header material and configure our main parsing
|
||||||
var record_list: std.ArrayList(Record) = .empty;
|
var parsed: RecordList = .{
|
||||||
errdefer {
|
.list = .empty,
|
||||||
for (record_list.items) |i| i.deinit(allocator);
|
.arena = arena,
|
||||||
record_list.deinit(allocator);
|
};
|
||||||
}
|
|
||||||
const first_data = blk: {
|
const first_data = blk: {
|
||||||
while (nextLine(reader, &state)) |line| {
|
while (nextLine(reader, &state)) |line| {
|
||||||
if (try Directive.parse(allocator, line, state, options)) |d| {
|
if (try Directive.parse(aa, line, state, options)) |d| {
|
||||||
switch (d) {
|
switch (d) {
|
||||||
.magic => try parseError(allocator, options, "Found a duplicate magic header", state),
|
.magic => try parseError(aa, options, "Found a duplicate magic header", state),
|
||||||
.long_format => long_format = true,
|
.long_format => long_format = true,
|
||||||
.compact_format => long_format = false, // what if we have both?
|
.compact_format => long_format = false, // what if we have both?
|
||||||
.require_eof => require_eof = true,
|
.require_eof => require_eof = true,
|
||||||
.eof => {
|
.eof => {
|
||||||
// there needs to be an eof then
|
// there needs to be an eof then
|
||||||
if (nextLine(reader, &state)) |_| {
|
if (nextLine(reader, &state)) |_| {
|
||||||
try parseError(allocator, options, "Data found after #!eof", state);
|
try parseError(aa, options, "Data found after #!eof", state);
|
||||||
return ParseError.ParseFailed; // this is terminal
|
return ParseError.ParseFailed; // this is terminal
|
||||||
} else return .{ .items = try record_list.toOwnedSlice(allocator) };
|
} else return parsed;
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else break :blk line;
|
} else break :blk line;
|
||||||
}
|
}
|
||||||
return .{ .items = try record_list.toOwnedSlice(allocator) };
|
return parsed;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Main parsing. We already have the first line of data, which could
|
// Main parsing. We already have the first line of data, which could
|
||||||
|
|
@ -354,8 +366,8 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
var line: ?[]const u8 = first_data;
|
var line: ?[]const u8 = first_data;
|
||||||
var items: std.ArrayList(Item) = .empty;
|
var items: std.ArrayList(Item) = .empty;
|
||||||
errdefer {
|
errdefer {
|
||||||
for (items.items) |i| i.deinit(allocator);
|
for (items.items) |i| i.deinit(aa);
|
||||||
items.deinit(allocator);
|
items.deinit(aa);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Because in long format we don't have newline delimiter, that should really be a noop
|
// Because in long format we don't have newline delimiter, that should really be a noop
|
||||||
|
|
@ -372,19 +384,19 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
line = nextLine(reader, &state);
|
line = nextLine(reader, &state);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (try Directive.parse(allocator, l, state, options)) |d| {
|
if (try Directive.parse(aa, l, state, options)) |d| {
|
||||||
switch (d) {
|
switch (d) {
|
||||||
.eof => {
|
.eof => {
|
||||||
// there needs to be an eof then
|
// there needs to be an eof then
|
||||||
if (nextLine(reader, &state)) |_| {
|
if (nextLine(reader, &state)) |_| {
|
||||||
try parseError(allocator, options, "Data found after #!eof", state);
|
try parseError(aa, options, "Data found after #!eof", state);
|
||||||
return ParseError.ParseFailed; // this is terminal
|
return ParseError.ParseFailed; // this is terminal
|
||||||
} else {
|
} else {
|
||||||
eof_found = true;
|
eof_found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
else => try parseError(allocator, options, "Directive found after data started", state),
|
else => try parseError(aa, options, "Directive found after data started", state),
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -398,7 +410,7 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
state.column += key.len + 1;
|
state.column += key.len + 1;
|
||||||
state.partial_line_column += key.len + 1;
|
state.partial_line_column += key.len + 1;
|
||||||
const value = try ItemValue.parse(
|
const value = try ItemValue.parse(
|
||||||
allocator,
|
aa,
|
||||||
it.rest(),
|
it.rest(),
|
||||||
&state,
|
&state,
|
||||||
delimiter,
|
delimiter,
|
||||||
|
|
@ -407,7 +419,7 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
|
|
||||||
if (!value.error_parsing) {
|
if (!value.error_parsing) {
|
||||||
// std.debug.print("alloc on key: {s}, val: {?f}\n", .{ key, value.item_value });
|
// std.debug.print("alloc on key: {s}, val: {?f}\n", .{ key, value.item_value });
|
||||||
try items.append(allocator, .{ .key = try allocator.dupe(u8, key), .value = value.item_value });
|
try items.append(aa, .{ .key = try aa.dupe(u8, key), .value = value.item_value });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (value.reader_advanced and !long_format) {
|
if (value.reader_advanced and !long_format) {
|
||||||
|
|
@ -426,16 +438,16 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
const maybe_line = nextLine(reader, &state);
|
const maybe_line = nextLine(reader, &state);
|
||||||
if (maybe_line == null) {
|
if (maybe_line == null) {
|
||||||
// close out record, return
|
// close out record, return
|
||||||
try record_list.append(allocator, .{
|
try parsed.list.append(aa, .{
|
||||||
.items = try items.toOwnedSlice(allocator),
|
.items = try items.toOwnedSlice(aa),
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
line = maybe_line.?;
|
line = maybe_line.?;
|
||||||
if (line.?.len == 0) {
|
if (line.?.len == 0) {
|
||||||
// End of record
|
// End of record
|
||||||
try record_list.append(allocator, .{
|
try parsed.list.append(aa, .{
|
||||||
.items = try items.toOwnedSlice(allocator),
|
.items = try items.toOwnedSlice(aa),
|
||||||
});
|
});
|
||||||
line = nextLine(reader, &state);
|
line = nextLine(reader, &state);
|
||||||
}
|
}
|
||||||
|
|
@ -445,8 +457,8 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
state.partial_line_column = 0;
|
state.partial_line_column = 0;
|
||||||
if (line.?.len == 0) {
|
if (line.?.len == 0) {
|
||||||
// close out record
|
// close out record
|
||||||
try record_list.append(allocator, .{
|
try parsed.list.append(aa, .{
|
||||||
.items = try items.toOwnedSlice(allocator),
|
.items = try items.toOwnedSlice(aa),
|
||||||
});
|
});
|
||||||
line = nextLine(reader, &state);
|
line = nextLine(reader, &state);
|
||||||
state.partial_line_column = 0;
|
state.partial_line_column = 0;
|
||||||
|
|
@ -461,13 +473,13 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
}
|
}
|
||||||
// Parsing complete. Add final record to list. Then, if there are any parse errors, throw
|
// Parsing complete. Add final record to list. Then, if there are any parse errors, throw
|
||||||
if (items.items.len > 0)
|
if (items.items.len > 0)
|
||||||
try record_list.append(allocator, .{
|
try parsed.list.append(aa, .{
|
||||||
.items = try items.toOwnedSlice(allocator),
|
.items = try items.toOwnedSlice(aa),
|
||||||
});
|
});
|
||||||
if (options.diagnostics) |d|
|
if (options.diagnostics) |d|
|
||||||
if (d.errors.items.len > 0) return ParseError.ParseFailed;
|
if (d.errors.items.len > 0) return ParseError.ParseFailed;
|
||||||
if (require_eof and !eof_found) return ParseError.ParseFailed;
|
if (require_eof and !eof_found) return ParseError.ParseFailed;
|
||||||
return .{ .items = try record_list.toOwnedSlice(allocator) };
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Takes the next line, trimming leading whitespace and ignoring comments
|
/// Takes the next line, trimming leading whitespace and ignoring comments
|
||||||
|
|
@ -513,10 +525,10 @@ test "long format single record, no eof" {
|
||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
var reader = std.Io.Reader.fixed(data);
|
var reader = std.Io.Reader.fixed(data);
|
||||||
const records = try parse(&reader, allocator, .{});
|
const records = try parse(&reader, allocator, .{});
|
||||||
defer records.deinit(allocator);
|
defer records.deinit();
|
||||||
try std.testing.expectEqual(@as(usize, 1), records.items.len);
|
try std.testing.expectEqual(@as(usize, 1), records.list.items.len);
|
||||||
try std.testing.expectEqual(@as(usize, 1), records.items[0].items.len);
|
try std.testing.expectEqual(@as(usize, 1), records.list.items[0].items.len);
|
||||||
const kvps = records.items[0].items;
|
const kvps = records.list.items[0].items;
|
||||||
try std.testing.expectEqualStrings("key", kvps[0].key);
|
try std.testing.expectEqualStrings("key", kvps[0].key);
|
||||||
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", kvps[0].value.?.string);
|
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", kvps[0].value.?.string);
|
||||||
}
|
}
|
||||||
|
|
@ -535,8 +547,8 @@ test "long format from README - generic data structures, first record only" {
|
||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
var reader = std.Io.Reader.fixed(data);
|
var reader = std.Io.Reader.fixed(data);
|
||||||
const records = try parse(&reader, allocator, .{});
|
const records = try parse(&reader, allocator, .{});
|
||||||
defer records.deinit(allocator);
|
defer records.deinit();
|
||||||
try std.testing.expectEqual(@as(usize, 1), records.items.len);
|
try std.testing.expectEqual(@as(usize, 1), records.list.items.len);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "long format from README - generic data structures" {
|
test "long format from README - generic data structures" {
|
||||||
|
|
@ -567,9 +579,9 @@ test "long format from README - generic data structures" {
|
||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
var reader = std.Io.Reader.fixed(data);
|
var reader = std.Io.Reader.fixed(data);
|
||||||
const records = try parse(&reader, allocator, .{});
|
const records = try parse(&reader, allocator, .{});
|
||||||
defer records.deinit(allocator);
|
defer records.deinit();
|
||||||
try std.testing.expectEqual(@as(usize, 2), records.items.len);
|
try std.testing.expectEqual(@as(usize, 2), records.list.items.len);
|
||||||
const first = records.items[0];
|
const first = records.list.items[0];
|
||||||
try std.testing.expectEqual(@as(usize, 6), first.items.len);
|
try std.testing.expectEqual(@as(usize, 6), first.items.len);
|
||||||
try std.testing.expectEqualStrings("key", first.items[0].key);
|
try std.testing.expectEqualStrings("key", first.items[0].key);
|
||||||
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", first.items[0].value.?.string);
|
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", first.items[0].value.?.string);
|
||||||
|
|
@ -584,7 +596,7 @@ test "long format from README - generic data structures" {
|
||||||
try std.testing.expectEqualStrings("boolean value", first.items[5].key);
|
try std.testing.expectEqualStrings("boolean value", first.items[5].key);
|
||||||
try std.testing.expect(!first.items[5].value.?.boolean);
|
try std.testing.expect(!first.items[5].value.?.boolean);
|
||||||
|
|
||||||
const second = records.items[1];
|
const second = records.list.items[1];
|
||||||
try std.testing.expectEqual(@as(usize, 5), second.items.len);
|
try std.testing.expectEqual(@as(usize, 5), second.items.len);
|
||||||
try std.testing.expectEqualStrings("key", second.items[0].key);
|
try std.testing.expectEqualStrings("key", second.items[0].key);
|
||||||
try std.testing.expectEqualStrings("this is the second record", second.items[0].value.?.string);
|
try std.testing.expectEqualStrings("this is the second record", second.items[0].value.?.string);
|
||||||
|
|
@ -610,9 +622,9 @@ test "compact format from README - generic data structures" {
|
||||||
var reader = std.Io.Reader.fixed(data);
|
var reader = std.Io.Reader.fixed(data);
|
||||||
// We want "parse" and "parseLeaky" probably. Second parameter is a diagnostics
|
// We want "parse" and "parseLeaky" probably. Second parameter is a diagnostics
|
||||||
const records = try parse(&reader, allocator, .{});
|
const records = try parse(&reader, allocator, .{});
|
||||||
defer records.deinit(allocator);
|
defer records.deinit();
|
||||||
try std.testing.expectEqual(@as(usize, 2), records.items.len);
|
try std.testing.expectEqual(@as(usize, 2), records.list.items.len);
|
||||||
const first = records.items[0];
|
const first = records.list.items[0];
|
||||||
try std.testing.expectEqual(@as(usize, 6), first.items.len);
|
try std.testing.expectEqual(@as(usize, 6), first.items.len);
|
||||||
try std.testing.expectEqualStrings("key", first.items[0].key);
|
try std.testing.expectEqualStrings("key", first.items[0].key);
|
||||||
try std.testing.expectEqualStrings("string value must have a length between colons or end with a comma", first.items[0].value.?.string);
|
try std.testing.expectEqualStrings("string value must have a length between colons or end with a comma", first.items[0].value.?.string);
|
||||||
|
|
@ -627,7 +639,7 @@ test "compact format from README - generic data structures" {
|
||||||
try std.testing.expectEqualStrings("boolean value", first.items[5].key);
|
try std.testing.expectEqualStrings("boolean value", first.items[5].key);
|
||||||
try std.testing.expect(!first.items[5].value.?.boolean);
|
try std.testing.expect(!first.items[5].value.?.boolean);
|
||||||
|
|
||||||
const second = records.items[1];
|
const second = records.list.items[1];
|
||||||
try std.testing.expectEqual(@as(usize, 1), second.items.len);
|
try std.testing.expectEqual(@as(usize, 1), second.items.len);
|
||||||
try std.testing.expectEqualStrings("key", second.items[0].key);
|
try std.testing.expectEqualStrings("key", second.items[0].key);
|
||||||
try std.testing.expectEqualStrings("this is the second record", second.items[0].value.?.string);
|
try std.testing.expectEqualStrings("this is the second record", second.items[0].value.?.string);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue