Compare commits
No commits in common. "master" and "0.15.2" have entirely different histories.
5 changed files with 68 additions and 400 deletions
|
|
@ -1,5 +1,5 @@
|
|||
[tools]
|
||||
prek = "0.3.1"
|
||||
"ubi:DonIsaac/zlint" = "0.7.9"
|
||||
zig = "0.16.0"
|
||||
zig = "0.15.2"
|
||||
zls = "0.15.1"
|
||||
|
|
|
|||
41
build.zig
41
build.zig
|
|
@ -250,7 +250,6 @@ const BenchmarkStep = struct {
|
|||
const b = step.owner;
|
||||
const self: *BenchmarkStep = @fieldParentPtr("step", step);
|
||||
|
||||
const io = b.graph.io;
|
||||
const gen_path = b.getInstallPath(.bin, self.gen_exe.name);
|
||||
const exe_path = b.getInstallPath(.bin, self.srf_exe.name);
|
||||
const count_str = b.fmt("{d}", .{self.record_count});
|
||||
|
|
@ -272,32 +271,29 @@ const BenchmarkStep = struct {
|
|||
|
||||
const hash_str = b.fmt("{x}", .{hash});
|
||||
const cache_dir = b.cache_root.join(b.allocator, &.{ "o", hash_str }) catch @panic("OOM");
|
||||
b.cache_root.handle.createDirPath(io, cache_dir) catch @panic("Could not create cache path");
|
||||
std.fs.cwd().makePath(cache_dir) catch {};
|
||||
|
||||
const filename = b.fmt("test-{s}.{s}", .{ fmt.name, fmt.ext });
|
||||
const filepath = b.pathJoin(&.{ cache_dir, filename });
|
||||
test_files[i] = filepath;
|
||||
|
||||
// Check if file exists
|
||||
if (b.cache_root.handle.access(io, filepath, .{})) {
|
||||
if (std.fs.cwd().access(filepath, .{})) {
|
||||
continue; // File exists, skip generation
|
||||
} else |_| {}
|
||||
|
||||
// Generate file
|
||||
var child = try std.process.spawn(io, .{
|
||||
.argv = &.{ gen_path, fmt.name, count_str },
|
||||
.stdout = .pipe,
|
||||
});
|
||||
var child = std.process.Child.init(&.{ gen_path, fmt.name, count_str }, b.allocator);
|
||||
child.stdout_behavior = .Pipe;
|
||||
try child.spawn();
|
||||
|
||||
var buf: [4096]u8 = undefined;
|
||||
var file_reader = child.stdout.?.reader(io, &buf);
|
||||
var reader = &file_reader.interface;
|
||||
const output = try reader.allocRemaining(b.allocator, .unlimited);
|
||||
const output = try child.stdout.?.readToEndAlloc(b.allocator, 100 * 1024 * 1024);
|
||||
defer b.allocator.free(output);
|
||||
const term = try child.wait(io);
|
||||
if (term != .exited or term.exited != 0) return error.GenerationFailed;
|
||||
|
||||
try b.cache_root.handle.writeFile(io, .{ .sub_path = filepath, .data = output });
|
||||
const term = try child.wait();
|
||||
if (term != .Exited or term.Exited != 0) return error.GenerationFailed;
|
||||
|
||||
try std.fs.cwd().writeFile(.{ .sub_path = filepath, .data = output });
|
||||
}
|
||||
|
||||
// Run hyperfine
|
||||
|
|
@ -312,19 +308,16 @@ const BenchmarkStep = struct {
|
|||
try argv.append(b.allocator, b.fmt("{s} jsonl <{s}", .{ exe_path, test_files[2] }));
|
||||
}
|
||||
|
||||
var child = std.process.Child.init(argv.items, b.allocator);
|
||||
|
||||
// We need to lock stderror so hyperfine can output progress in place
|
||||
// SAFETY: buffer for locking
|
||||
var buf: [1024]u8 = undefined; // I have no idea what the right size buffer should be
|
||||
_ = try io.lockStderr(&buf, null);
|
||||
defer io.unlockStderr();
|
||||
std.debug.lockStdErr();
|
||||
defer std.debug.unlockStdErr();
|
||||
|
||||
var child = try std.process.spawn(io, .{
|
||||
.argv = argv.items,
|
||||
});
|
||||
try child.spawn();
|
||||
const term = try child.wait();
|
||||
|
||||
const term = try child.wait(io);
|
||||
|
||||
if (term != .exited or term.exited != 0)
|
||||
if (term != .Exited or term.Exited != 0)
|
||||
return error.BenchmarkFailed;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
.fingerprint = 0x102ed002eff998a9, // Changing this has security and trust implications.
|
||||
// Tracks the earliest Zig version that the package considers to be a
|
||||
// supported use case.
|
||||
.minimum_zig_version = "0.16.0",
|
||||
.minimum_zig_version = "0.15.2",
|
||||
// This field is optional.
|
||||
// Each dependency must either provide a `url` and `hash`, or a `path`.
|
||||
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
|
||||
|
|
|
|||
23
src/main.zig
23
src/main.zig
|
|
@ -46,10 +46,13 @@ const CountingAllocator = struct {
|
|||
}
|
||||
};
|
||||
|
||||
pub fn main(init: std.process.Init) !void {
|
||||
const gpa = init.gpa;
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const base_allocator = gpa.allocator();
|
||||
|
||||
const args = try init.minimal.args.toSlice(init.arena.allocator());
|
||||
const args = try std.process.argsAlloc(base_allocator);
|
||||
defer std.process.argsFree(base_allocator, args);
|
||||
|
||||
if (args.len < 2) {
|
||||
std.debug.print("Usage: {s} <srf|json|jsonl>\n", .{args[0]});
|
||||
|
|
@ -58,23 +61,23 @@ pub fn main(init: std.process.Init) !void {
|
|||
|
||||
const format = args[1];
|
||||
|
||||
const debug_allocs = init.environ_map.contains("DEBUG_ALLOCATIONS");
|
||||
const debug_allocs = std.process.hasEnvVarConstant("DEBUG_ALLOCATIONS");
|
||||
|
||||
var counting = CountingAllocator{ .child_allocator = gpa };
|
||||
const allocator = if (debug_allocs) counting.allocator() else gpa;
|
||||
var counting = CountingAllocator{ .child_allocator = base_allocator };
|
||||
const allocator = if (debug_allocs) counting.allocator() else base_allocator;
|
||||
|
||||
var stdin_buffer: [1024]u8 = undefined;
|
||||
var stdin_reader = std.Io.File.stdin().reader(init.io, &stdin_buffer);
|
||||
var stdin_reader = std.fs.File.stdin().reader(&stdin_buffer);
|
||||
const stdin = &stdin_reader.interface;
|
||||
|
||||
// Load all data into memory first for fair comparison
|
||||
var data: std.ArrayList(u8) = .empty;
|
||||
defer data.deinit(gpa);
|
||||
try stdin.appendRemaining(gpa, &data, @enumFromInt(100 * 1024 * 1024));
|
||||
defer data.deinit(base_allocator);
|
||||
try stdin.appendRemaining(base_allocator, &data, @enumFromInt(100 * 1024 * 1024));
|
||||
|
||||
if (std.mem.eql(u8, format, "srf")) {
|
||||
var reader = std.Io.Reader.fixed(data.items);
|
||||
const records = try srf.parse(&reader, allocator, .{ .parse_allocator = .none });
|
||||
const records = try srf.parse(&reader, allocator, .{ .alloc_strings = false });
|
||||
defer records.deinit();
|
||||
} else if (std.mem.eql(u8, format, "jsonl")) {
|
||||
var lines = std.mem.splitScalar(u8, data.items, '\n');
|
||||
|
|
|
|||
400
src/srf.zig
400
src/srf.zig
|
|
@ -95,7 +95,6 @@ pub const ParseError = error{
|
|||
ReadFailed,
|
||||
StreamTooLong,
|
||||
OutOfMemory,
|
||||
AllocationRequired,
|
||||
EndOfStream,
|
||||
};
|
||||
|
||||
|
|
@ -141,7 +140,7 @@ pub const Value = union(enum) {
|
|||
/// as well as multi-line strings. Metadata is returned to assist in tracking
|
||||
///
|
||||
/// This function is intended to be used by the SRF parser
|
||||
pub fn parse(str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData {
|
||||
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData {
|
||||
const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':');
|
||||
if (type_val_sep_raw == null) {
|
||||
try parseError("no type data or value after key", state);
|
||||
|
|
@ -160,11 +159,7 @@ pub const Value = union(enum) {
|
|||
state.column += total_chars;
|
||||
state.partial_line_column += total_chars;
|
||||
return .{
|
||||
.item_value = .{ .string = try dupe(
|
||||
state.*,
|
||||
val,
|
||||
.value,
|
||||
) },
|
||||
.item_value = .{ .string = try dupe(allocator, state.options, val) },
|
||||
};
|
||||
}
|
||||
if (std.mem.eql(u8, "binary", trimmed_meta)) {
|
||||
|
|
@ -184,13 +179,11 @@ pub const Value = union(enum) {
|
|||
.error_parsing = true,
|
||||
};
|
||||
};
|
||||
const alloc = findAllocator(state.*, .value) orelse
|
||||
try fallbackAllocatorFor(state);
|
||||
const data = try alloc.alloc(u8, size);
|
||||
errdefer alloc.free(data);
|
||||
const data = try allocator.alloc(u8, size);
|
||||
errdefer allocator.free(data);
|
||||
Decoder.decode(data, val) catch {
|
||||
try parseError("error parsing base64 value", state);
|
||||
alloc.free(data);
|
||||
allocator.free(data);
|
||||
return .{
|
||||
.item_value = null,
|
||||
.error_parsing = true,
|
||||
|
|
@ -278,18 +271,12 @@ pub const Value = union(enum) {
|
|||
// We fit on this line, everything is "normal"
|
||||
const val = rest_of_data[0..size];
|
||||
return .{
|
||||
.item_value = .{ .string = try dupe(
|
||||
state.*,
|
||||
val,
|
||||
.value,
|
||||
) },
|
||||
.item_value = .{ .string = val },
|
||||
};
|
||||
}
|
||||
// This is not enough, we need more data from the reader
|
||||
const alloc = findAllocator(state.*, .value) orelse
|
||||
try fallbackAllocatorFor(state);
|
||||
const buf = try alloc.alloc(u8, size);
|
||||
errdefer alloc.free(buf);
|
||||
const buf = try allocator.alloc(u8, size);
|
||||
errdefer allocator.free(buf);
|
||||
@memcpy(buf[0..rest_of_data.len], rest_of_data);
|
||||
// add back the newline we are skipping
|
||||
buf[rest_of_data.len] = '\n';
|
||||
|
|
@ -309,13 +296,6 @@ pub const Value = union(enum) {
|
|||
.reader_advanced = true,
|
||||
};
|
||||
}
|
||||
inline fn fallbackAllocatorFor(state: *RecordIterator.State) !std.mem.Allocator {
|
||||
if (state.fallback_arena) |f| return f.allocator();
|
||||
if (state.options.parse_allocator == .none) return error.AllocationRequired;
|
||||
state.fallback_arena = try state.allocator.create(std.heap.ArenaAllocator);
|
||||
state.fallback_arena.?.* = .init(state.allocator);
|
||||
return state.fallback_arena.?.allocator();
|
||||
}
|
||||
};
|
||||
|
||||
/// A single key-value pair within a record. The key is always a string.
|
||||
|
|
@ -432,7 +412,6 @@ pub const Record = struct {
|
|||
fields_allocated: [fields_len]bool = .{false} ** fields_len,
|
||||
allocator: std.mem.Allocator,
|
||||
source_value: T,
|
||||
format_options: FormatOptions,
|
||||
cached_record: ?Record = null,
|
||||
|
||||
const Self = @This();
|
||||
|
|
@ -440,13 +419,12 @@ pub const Record = struct {
|
|||
|
||||
pub const SourceType = T;
|
||||
|
||||
pub fn init(allocator: std.mem.Allocator, source: T, options: FormatOptions) Self {
|
||||
pub fn init(allocator: std.mem.Allocator, source: T) Self {
|
||||
return .{
|
||||
// SAFETY: fields_buf is set by record() and is guarded by fields_set
|
||||
.fields_buf = undefined,
|
||||
.allocator = allocator,
|
||||
.source_value = source,
|
||||
.format_options = options,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -464,7 +442,7 @@ pub const Record = struct {
|
|||
) !usize {
|
||||
if (default_value_ptr) |d| {
|
||||
const default_val: *const field_type = @ptrCast(@alignCast(d));
|
||||
if (!self.format_options.emit_default_values and std.meta.eql(val, default_val.*)) return inx;
|
||||
if (std.meta.eql(val, default_val.*)) return inx;
|
||||
}
|
||||
const value = try self.formatField(field_type, field_name, val);
|
||||
self.fields_buf[inx] = .{
|
||||
|
|
@ -534,7 +512,7 @@ pub const Record = struct {
|
|||
const key = if (@hasDecl(U, "srf_tag_field"))
|
||||
U.srf_tag_field
|
||||
else
|
||||
"type";
|
||||
"active_tag";
|
||||
self.fields_buf[inx] = .{
|
||||
.key = key,
|
||||
.value = .{ .string = active_tag_name },
|
||||
|
|
@ -584,13 +562,7 @@ pub const Record = struct {
|
|||
///
|
||||
/// Call `deinit()` to free any allocations made for custom-formatted fields.
|
||||
pub fn from(comptime T: type, allocator: std.mem.Allocator, val: T) !OwnedRecord(T) {
|
||||
return OwnedRecord(T).init(allocator, val, .{});
|
||||
}
|
||||
|
||||
/// Internal function to allow an OwnedRecord to see format options necessary
|
||||
/// to emit default values
|
||||
fn fromWithOptions(comptime T: type, allocator: std.mem.Allocator, val: T, options: FormatOptions) !OwnedRecord(T) {
|
||||
return OwnedRecord(T).init(allocator, val, options);
|
||||
return OwnedRecord(T).init(allocator, val);
|
||||
}
|
||||
|
||||
/// Coerce a `Record` to a Zig struct or tagged union. For each field in `T`,
|
||||
|
|
@ -601,7 +573,7 @@ pub const Record = struct {
|
|||
/// first value silently ignored.
|
||||
///
|
||||
/// For tagged unions, the active variant is determined by a field named
|
||||
/// `"type"` (or the value of `T.srf_tag_field` if declared). The
|
||||
/// `"active_tag"` (or the value of `T.srf_tag_field` if declared). The
|
||||
/// remaining fields are coerced into the payload struct of that variant.
|
||||
///
|
||||
/// For streaming data without collecting fields first, prefer
|
||||
|
|
@ -635,7 +607,7 @@ pub const Record = struct {
|
|||
const active_tag_name = if (@hasDecl(T, "srf_tag_field"))
|
||||
T.srf_tag_field
|
||||
else
|
||||
"type";
|
||||
"active_tag";
|
||||
if (self.firstFieldByName(active_tag_name)) |srf_field| {
|
||||
if (srf_field.value == null or srf_field.value.? != .string)
|
||||
return error.ActiveTagValueMustBeAString;
|
||||
|
|
@ -719,10 +691,6 @@ pub const RecordIterator = struct {
|
|||
end_of_record_reached: bool = false,
|
||||
field_iterator: ?FieldIterator = null,
|
||||
|
||||
aa: std.mem.Allocator,
|
||||
allocator: std.mem.Allocator,
|
||||
fallback_arena: ?*std.heap.ArenaAllocator = null,
|
||||
|
||||
/// Takes the next line, trimming leading whitespace and ignoring comments
|
||||
/// Directives (comments starting with #!) are preserved
|
||||
pub fn nextLine(state: *State) ?[]const u8 {
|
||||
|
|
@ -822,6 +790,7 @@ pub const RecordIterator = struct {
|
|||
/// subsequent calls continue to return `null`.
|
||||
pub fn next(self: FieldIterator) !?Field {
|
||||
const state = self.state;
|
||||
const aa = self.arena.allocator();
|
||||
// Main parsing. We already have the first line of data, which could
|
||||
// be a record (compact format) or a key/value pair (long format)
|
||||
|
||||
|
|
@ -860,6 +829,7 @@ pub const RecordIterator = struct {
|
|||
state.column += key.len + 1;
|
||||
state.partial_line_column += key.len + 1;
|
||||
const value = try Value.parse(
|
||||
aa,
|
||||
it.rest(),
|
||||
state,
|
||||
state.field_delimiter,
|
||||
|
|
@ -867,10 +837,7 @@ pub const RecordIterator = struct {
|
|||
|
||||
var field: ?Field = null;
|
||||
if (!value.error_parsing) {
|
||||
field = .{
|
||||
.key = try dupe(state.*, key, .key),
|
||||
.value = value.item_value,
|
||||
};
|
||||
field = .{ .key = try dupe(aa, state.options, key), .value = value.item_value };
|
||||
}
|
||||
|
||||
if (value.reader_advanced and state.field_delimiter == ',') {
|
||||
|
|
@ -933,7 +900,7 @@ pub const RecordIterator = struct {
|
|||
///
|
||||
/// For tagged unions, the active tag field must appear first in the
|
||||
/// stream (unlike `Record.to` which can do random access). The tag
|
||||
/// field name defaults to `"type"` or `T.srf_tag_field` if
|
||||
/// field name defaults to `"active_tag"` or `T.srf_tag_field` if
|
||||
/// declared.
|
||||
pub fn to(self: FieldIterator, comptime T: type) !T {
|
||||
const ti = @typeInfo(T);
|
||||
|
|
@ -990,7 +957,7 @@ pub const RecordIterator = struct {
|
|||
const active_tag_name = if (@hasDecl(T, "srf_tag_field"))
|
||||
T.srf_tag_field
|
||||
else
|
||||
"type";
|
||||
"active_tag";
|
||||
const first_try = try self.next();
|
||||
if (first_try == null) return error.ActiveTagFieldNotFound;
|
||||
const f = first_try.?;
|
||||
|
|
@ -1051,9 +1018,9 @@ pub const RecordIterator = struct {
|
|||
/// use or refresh cached data. Note that data will be returned by parse/
|
||||
/// iterator regardless of freshness. This enables callers to use cached
|
||||
/// data temporarily while refreshing it
|
||||
pub fn isFresh(self: RecordIterator, io: std.Io) bool {
|
||||
pub fn isFresh(self: RecordIterator) bool {
|
||||
if (self.expires) |exp|
|
||||
return std.Io.Timestamp.now(io, .real).toSeconds() < exp;
|
||||
return std.time.timestamp() < exp;
|
||||
|
||||
// no expiry: always fresh, never frozen
|
||||
return true;
|
||||
|
|
@ -1071,7 +1038,7 @@ pub const RecordIterator = struct {
|
|||
defer ri.deinit();
|
||||
|
||||
// No expiry set, so always fresh
|
||||
try std.testing.expect(ri.isFresh(std.testing.io));
|
||||
try std.testing.expect(ri.isFresh());
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -1082,99 +1049,10 @@ pub const ParseOptions = struct {
|
|||
diagnostics: ?*Diagnostics = null,
|
||||
|
||||
/// By default, the parser will copy data so it is safe to free the original
|
||||
/// buffer or use with streaming readers. This will impose about 8% overhead,
|
||||
/// and ties the lifetime of any strings to the deinit() method. For
|
||||
/// fixed buffer parsing, consider using .none, which will not allocate
|
||||
/// strings. More complex use cases can use their own allocator for control
|
||||
/// over string lifetime
|
||||
parse_allocator: ParseAllocator = .parse_arena,
|
||||
};
|
||||
|
||||
/// Allocator to use for parsing data
|
||||
pub const ParseAllocator = union(enum) {
|
||||
/// Absolutely no allocation allowed. This will fail with AllocationRequired under the following circumstances:
|
||||
///
|
||||
/// * binary data is encountered (needs decoding)
|
||||
/// * multi-line string literals are encountered (allocation needed to accomodate streaming readers)
|
||||
none,
|
||||
/// No allocator. Lifetime of any data parsed is tied to the underlying
|
||||
/// data passed to the reader. This is most appropriate when the caller
|
||||
/// uses a fixed buffer, and is equivalent of the "Leaky" parsing
|
||||
/// functions of std.json. IMPORTANT: This will NOT avoid all allocations.
|
||||
/// Specifically binary data is base64 encoded per the spec and we need
|
||||
/// to allocate space for the decode. Also, multi-line data can not be
|
||||
/// assumed to be available post-reader advance, and therefore allocation
|
||||
/// is performed in that case.
|
||||
///
|
||||
/// For ABSOLUTELY NO ALLOCATION, use none. Otherwise, the Parsed
|
||||
/// struct has a deinit function that frees everything, and toOwnedFallback
|
||||
/// which will deinit the arena for parsing and return the fallback arena
|
||||
/// that can be released at a later time
|
||||
none_with_fallback,
|
||||
/// Use the arena allocator created by the parser to copy any strings.
|
||||
/// This ties the lifetime of any data parsed to the parser deinit()
|
||||
/// function. Imposes about 8% overhead compared to "none".
|
||||
parse_arena,
|
||||
/// Parser will use the caller-supplied allocator, providing the most
|
||||
/// flexibility over lifetime. Overhead will be contingent on the allocator
|
||||
/// used. If the allocator is an arena allocator, assume 8% overhead over
|
||||
/// "none". It is likely a fixed buffer allocator would be somewhat less.
|
||||
custom: CustomParseAllocator,
|
||||
};
|
||||
|
||||
/// Allocator to use for a specific scope (either keys or values). Different
|
||||
/// from parseAllocator because the custom variant here has to be a std.mem.Allocator
|
||||
pub const ScopeAllocator = union(enum) {
|
||||
/// Absolutely no allocation allowed. This will fail with AllocationRequired under the following circumstances:
|
||||
///
|
||||
/// * binary data is encountered (needs decoding)
|
||||
/// * multi-line string literals are encountered (allocation needed to accomodate streaming readers)
|
||||
none,
|
||||
/// No allocator. Lifetime of any data parsed is tied to the underlying
|
||||
/// data passed to the reader. This is most appropriate when the caller
|
||||
/// uses a fixed buffer, and is equivalent of the "Leaky" parsing
|
||||
/// functions of std.json. IMPORTANT: This will NOT avoid all allocations.
|
||||
/// Specifically binary data is base64 encoded per the spec and we need
|
||||
/// to allocate space for the decode. Also, multi-line data can not be
|
||||
/// assumed to be available post-reader advance, and therefore allocation
|
||||
/// is performed in that case.
|
||||
///
|
||||
/// For ABSOLUTELY NO ALLOCATION, use none. Otherwise, the Parsed
|
||||
/// struct has a deinit function that frees everything, and toOwnedFallback
|
||||
/// which will deinit the arena for parsing and return the fallback arena
|
||||
/// that can be released at a later time
|
||||
none_with_fallback,
|
||||
/// Use the arena allocator created by the parser to copy any strings.
|
||||
/// This ties the lifetime of any data parsed to the parser deinit()
|
||||
/// function. Imposes about 8% overhead compared to "none".
|
||||
parse_arena,
|
||||
/// Parser will use the caller-supplied allocator, providing the most
|
||||
/// flexibility over lifetime. Overhead will be contingent on the allocator
|
||||
/// used. If the allocator is an arena allocator, assume 8% overhead over
|
||||
/// "none". It is likely a fixed buffer allocator would be somewhat less.
|
||||
allocator: std.mem.Allocator,
|
||||
};
|
||||
pub const CustomParseAllocator = struct {
|
||||
key_allocator: ScopeAllocator,
|
||||
value_allocator: ScopeAllocator,
|
||||
|
||||
/// Initializes a custom parse allocator suitable for use in common workflows
|
||||
/// where you iterate each record, then iterate through fields with full control
|
||||
pub fn initIterator(allocator: std.mem.Allocator) CustomParseAllocator {
|
||||
return .{
|
||||
.key_allocator = .{ .allocator = allocator },
|
||||
.value_allocator = .{ .allocator = allocator },
|
||||
};
|
||||
}
|
||||
|
||||
/// Initializes a custom parse allocator suitable for use in common workflows
|
||||
/// where you iterate each record and call RecordIterator.to() on the result
|
||||
pub fn initTo(allocator: std.mem.Allocator) CustomParseAllocator {
|
||||
return .{
|
||||
.key_allocator = .{ .none = {} },
|
||||
.value_allocator = .{ .allocator = allocator },
|
||||
};
|
||||
}
|
||||
/// This will impose about 8% overhead, but be safer. If you do not require
|
||||
/// this safety, set alloc_strings to false. Setting this to false is the
|
||||
/// equivalent of the "Leaky" parsing functions of std.json
|
||||
alloc_strings: bool = true,
|
||||
};
|
||||
|
||||
const Directive = union(enum) {
|
||||
|
|
@ -1240,9 +1118,6 @@ pub const FormatOptions = struct {
|
|||
/// and just format the record. This is useful for appending to an existing
|
||||
/// srf file rather than overwriting all the data
|
||||
emit_directives: bool = true,
|
||||
|
||||
/// When set to true, this will output all values, even if they are the default values
|
||||
emit_default_values: bool = false,
|
||||
};
|
||||
|
||||
/// Returns a `Formatter` for writing pre-built `Record` values to a writer.
|
||||
|
|
@ -1275,12 +1150,7 @@ pub fn FromFormatter(comptime T: type) type {
|
|||
for (self.value) |item| {
|
||||
if (!first and self.options.long_format) try writer.writeByte('\n');
|
||||
first = false;
|
||||
var owned_record = Record.fromWithOptions(
|
||||
T,
|
||||
self.allocator,
|
||||
item,
|
||||
self.options,
|
||||
) catch
|
||||
var owned_record = Record.from(T, self.allocator, item) catch
|
||||
return std.Io.Writer.Error.WriteFailed;
|
||||
defer owned_record.deinit();
|
||||
const record = owned_record.record() catch return std.Io.Writer.Error.WriteFailed;
|
||||
|
|
@ -1356,9 +1226,8 @@ pub const RecordFormatter = struct {
|
|||
switch (f.value.?) {
|
||||
.string => |s| {
|
||||
const newlines = std.mem.containsAtLeastScalar(u8, s, 1, '\n');
|
||||
const commas = !self.options.long_format and std.mem.containsAtLeastScalar(u8, s, 1, ',');
|
||||
// Output the count if newlines exist
|
||||
const count = if (newlines or commas) s.len else null;
|
||||
const count = if (newlines) s.len else null;
|
||||
if (count) |c| try writer.print("{d}", .{c});
|
||||
try writer.writeByte(':');
|
||||
try writer.writeAll(s);
|
||||
|
|
@ -1384,7 +1253,6 @@ pub const RecordFormatter = struct {
|
|||
pub const Parsed = struct {
|
||||
records: []Record,
|
||||
arena: *std.heap.ArenaAllocator,
|
||||
fallback_arena: ?*std.heap.ArenaAllocator,
|
||||
|
||||
/// optional expiry time for the data. Useful for caching
|
||||
/// Note that on a parse, data will always be returned and it will be up
|
||||
|
|
@ -1403,26 +1271,10 @@ pub const Parsed = struct {
|
|||
/// record and field data. After calling `deinit`, any slices or string
|
||||
/// pointers obtained from `records` are invalid.
|
||||
pub fn deinit(self: Parsed) void {
|
||||
self.toOwnedFallback().deinit();
|
||||
}
|
||||
|
||||
pub fn toOwnedFallback(self: Parsed) FallbackArena {
|
||||
const ca = self.arena.child_allocator;
|
||||
self.arena.deinit();
|
||||
ca.destroy(self.arena);
|
||||
return .{ .fallback_arena = self.fallback_arena };
|
||||
}
|
||||
|
||||
pub const FallbackArena = struct {
|
||||
fallback_arena: ?*std.heap.ArenaAllocator,
|
||||
|
||||
pub fn deinit(self: FallbackArena) void {
|
||||
if (self.fallback_arena) |f| {
|
||||
f.deinit();
|
||||
f.child_allocator.destroy(f);
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
/// Parses all records from the reader into memory, returning a `Parsed` struct
|
||||
|
|
@ -1460,7 +1312,6 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
|||
.expires = it.expires,
|
||||
.created = it.created,
|
||||
.modified = it.modified,
|
||||
.fallback_arena = it.state.fallback_arena,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -1493,13 +1344,12 @@ pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: P
|
|||
errdefer allocator.destroy(arena);
|
||||
arena.* = .init(allocator);
|
||||
errdefer arena.deinit();
|
||||
const state = try arena.allocator().create(RecordIterator.State);
|
||||
const aa = arena.allocator();
|
||||
const state = try aa.create(RecordIterator.State);
|
||||
state.* = .{
|
||||
.reader = reader,
|
||||
.current_line = null,
|
||||
.options = options,
|
||||
.aa = arena.allocator(),
|
||||
.allocator = allocator,
|
||||
};
|
||||
var it: RecordIterator = .{
|
||||
.arena = arena,
|
||||
|
|
@ -1540,32 +1390,12 @@ pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: P
|
|||
};
|
||||
return it; // with current_line
|
||||
}
|
||||
const DataScope = enum {
|
||||
key,
|
||||
value,
|
||||
};
|
||||
inline fn dupe(state: RecordIterator.State, data: []const u8, scope: DataScope) ParseError![]const u8 {
|
||||
if (findAllocator(state, scope)) |a|
|
||||
return try a.dupe(u8, data);
|
||||
|
||||
inline fn dupe(allocator: std.mem.Allocator, options: ParseOptions, data: []const u8) ParseError![]const u8 {
|
||||
if (options.alloc_strings)
|
||||
return try allocator.dupe(u8, data);
|
||||
return data;
|
||||
}
|
||||
inline fn findAllocator(state: RecordIterator.State, scope: DataScope) ?std.mem.Allocator {
|
||||
switch (state.options.parse_allocator) {
|
||||
.none, .none_with_fallback => return null,
|
||||
.parse_arena => return state.aa,
|
||||
.custom => |a| {
|
||||
const alloc = switch (scope) {
|
||||
.key => a.key_allocator,
|
||||
.value => a.value_allocator,
|
||||
};
|
||||
switch (alloc) {
|
||||
.none, .none_with_fallback => return null,
|
||||
.parse_arena => return state.aa,
|
||||
.allocator => |c| return c,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
/// Logs a parse error to diagnostics. Note that the allocator provided should
|
||||
/// *NOT* be an arena, as the message must outlive the parse results, which will
|
||||
/// be otherwise cleaned up in the arena deinit
|
||||
|
|
@ -1926,48 +1756,6 @@ test "serialize/deserialize" {
|
|||
;
|
||||
try std.testing.expectEqualStrings(expect, compact_from);
|
||||
}
|
||||
test "serialize/deserialize allows overflow lifetime semantics" {
|
||||
const Data = struct {
|
||||
foo: []const u8,
|
||||
bar: u8,
|
||||
qux: ?TestRecType = .foo,
|
||||
b: bool = false,
|
||||
f: f32 = 4.2,
|
||||
custom: ?TestCustomType = null,
|
||||
};
|
||||
|
||||
const compact =
|
||||
\\#!srfv1
|
||||
\\foo:binary:YmFy,bar:num:42
|
||||
\\foo:binary:YmFy,bar:num:42
|
||||
\\foo:binary:YmFy,bar:num:42,qux::bar
|
||||
\\foo:binary:YmFy,bar:num:42,qux::bar,b:bool:true,f:num:6.9,custom:string:hi
|
||||
\\
|
||||
;
|
||||
// Round trip and make sure we get equivalent objects back
|
||||
var compact_reader = std.Io.Reader.fixed(compact);
|
||||
const parsed = try parse(
|
||||
&compact_reader,
|
||||
std.testing.allocator,
|
||||
.{ .parse_allocator = .none_with_fallback },
|
||||
);
|
||||
try std.testing.expect(parsed.fallback_arena != null);
|
||||
|
||||
const rec1 = try parsed.records[0].to(Data);
|
||||
const fallback = parsed.toOwnedFallback();
|
||||
defer fallback.deinit();
|
||||
// This would not be possible otherwise
|
||||
try std.testing.expectEqualStrings("bar", rec1.foo);
|
||||
try std.testing.expectEqual(@as(u8, 42), rec1.bar);
|
||||
try std.testing.expectEqual(@as(TestRecType, .foo), rec1.qux);
|
||||
|
||||
var another_reader = std.Io.Reader.fixed(compact);
|
||||
try std.testing.expectError(error.AllocationRequired, parse(
|
||||
&another_reader,
|
||||
std.testing.allocator,
|
||||
.{ .parse_allocator = .none },
|
||||
));
|
||||
}
|
||||
test "conversion from string true/false to proper type" {
|
||||
const Data = struct {
|
||||
foo: []const u8,
|
||||
|
|
@ -2054,8 +1842,8 @@ test "unions" {
|
|||
);
|
||||
const expect =
|
||||
\\#!srfv1
|
||||
\\type::foo,number:num:42,true_or_false:bool:true
|
||||
\\type::bar,sentence::foobar,decimal:num:6.9
|
||||
\\active_tag::foo,number:num:42,true_or_false:bool:true
|
||||
\\active_tag::bar,sentence::foobar,decimal:num:6.9
|
||||
\\
|
||||
;
|
||||
try std.testing.expectEqualStrings(expect, compact_from);
|
||||
|
|
@ -2178,80 +1966,6 @@ test iterator {
|
|||
// No more records
|
||||
try std.testing.expect(try ri.next() == null);
|
||||
}
|
||||
test "iterator with custom allocator" {
|
||||
// Example: streaming through records and fields using the iterator API.
|
||||
// This is the preferred parsing approach -- no intermediate slices are
|
||||
// allocated for fields or records.
|
||||
const data =
|
||||
\\#!srfv1
|
||||
\\name::alice,desc:5:world
|
||||
;
|
||||
const allocator = std.testing.allocator;
|
||||
var reader = std.Io.Reader.fixed(data);
|
||||
var ri = try iterator(
|
||||
&reader,
|
||||
allocator,
|
||||
.{
|
||||
.parse_allocator = .{ .custom = .initIterator(std.testing.allocator) },
|
||||
},
|
||||
);
|
||||
defer ri.deinit();
|
||||
|
||||
// Advance to the first (and only) record
|
||||
const fi = (try ri.next()).?;
|
||||
|
||||
// Iterate fields within the record
|
||||
const field1 = (try fi.next()).?;
|
||||
defer allocator.free(field1.key);
|
||||
defer allocator.free(field1.value.?.string);
|
||||
try std.testing.expectEqualStrings("name", field1.key);
|
||||
try std.testing.expectEqualStrings("alice", field1.value.?.string);
|
||||
const field2 = (try fi.next()).?;
|
||||
defer allocator.free(field2.key);
|
||||
defer allocator.free(field2.value.?.string);
|
||||
try std.testing.expectEqualStrings("desc", field2.key);
|
||||
try std.testing.expectEqualStrings("world", field2.value.?.string);
|
||||
|
||||
// No more fields in this record
|
||||
try std.testing.expect(try fi.next() == null);
|
||||
// No more records
|
||||
try std.testing.expect(try ri.next() == null);
|
||||
}
|
||||
test "iterator with custom allocator - to() pattern" {
|
||||
// const ll = std.testing.log_level;
|
||||
// std.testing.log_level = .debug;
|
||||
// defer std.testing.log_level = ll;
|
||||
// Example: streaming through records and fields using the iterator API.
|
||||
// This is the preferred parsing approach -- no intermediate slices are
|
||||
// allocated for fields or records.
|
||||
const data =
|
||||
\\#!srfv1
|
||||
\\name::alice,desc:5:world
|
||||
;
|
||||
const allocator = std.testing.allocator;
|
||||
var reader = std.Io.Reader.fixed(data);
|
||||
var ri = try iterator(
|
||||
&reader,
|
||||
allocator,
|
||||
.{
|
||||
.parse_allocator = .{ .custom = .initTo(std.testing.allocator) },
|
||||
},
|
||||
);
|
||||
defer ri.deinit();
|
||||
|
||||
// Advance to the first (and only) record
|
||||
const fi = (try ri.next()).?;
|
||||
const rec = try fi.to(struct { name: []const u8, desc: []const u8 });
|
||||
defer allocator.free(rec.name);
|
||||
defer allocator.free(rec.desc);
|
||||
try std.testing.expectEqualStrings("alice", rec.name);
|
||||
try std.testing.expectEqualStrings("world", rec.desc);
|
||||
|
||||
// No more fields in this record
|
||||
try std.testing.expect(try fi.next() == null);
|
||||
// No more records
|
||||
try std.testing.expect(try ri.next() == null);
|
||||
}
|
||||
test parse {
|
||||
// Example: batch parsing collects all records and fields into slices.
|
||||
// Prefer `iterator` for streaming; use `parse` when random access to
|
||||
|
|
@ -2302,48 +2016,6 @@ test fmtFrom {
|
|||
\\
|
||||
, result);
|
||||
}
|
||||
test "fmtFrom commas" {
|
||||
// Example: serialize typed Zig values directly to SRF format.
|
||||
const Data = struct {
|
||||
name: []const u8 = "bob",
|
||||
age: u8,
|
||||
};
|
||||
const values: []const Data = &.{
|
||||
.{ .name = "alice, yo", .age = 30 },
|
||||
};
|
||||
var buf: [4096]u8 = undefined;
|
||||
const result = try std.fmt.bufPrint(
|
||||
&buf,
|
||||
"{f}",
|
||||
.{fmtFrom(Data, std.testing.allocator, values, .{})},
|
||||
);
|
||||
try std.testing.expectEqualStrings(
|
||||
\\#!srfv1
|
||||
\\name:9:alice, yo,age:num:30
|
||||
\\
|
||||
, result);
|
||||
}
|
||||
test "fmtFrom outputs defaults with option" {
|
||||
// Example: serialize typed Zig values directly to SRF format.
|
||||
const Data = struct {
|
||||
name: []const u8 = "bob",
|
||||
age: u8,
|
||||
};
|
||||
const values: []const Data = &.{
|
||||
.{ .age = 30 },
|
||||
};
|
||||
var buf: [4096]u8 = undefined;
|
||||
const result = try std.fmt.bufPrint(
|
||||
&buf,
|
||||
"{f}",
|
||||
.{fmtFrom(Data, std.testing.allocator, values, .{ .emit_default_values = true })},
|
||||
);
|
||||
try std.testing.expectEqualStrings(
|
||||
\\#!srfv1
|
||||
\\name::bob,age:num:30
|
||||
\\
|
||||
, result);
|
||||
}
|
||||
test "parse with diagnostics" {
|
||||
// Example: batch parsing collects all records and fields into slices.
|
||||
// Prefer `iterator` for streaming; use `parse` when random access to
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue