initial implementation with long format tests passing
This commit is contained in:
parent
1262322d18
commit
040f84cdef
6 changed files with 590 additions and 50 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
.zig-cache/
|
||||||
|
zig-out/
|
||||||
|
|
@ -36,7 +36,8 @@ array::array's don't exist. Use json or toml or something
|
||||||
data with newlines must have a length:7:foo
|
data with newlines must have a length:7:foo
|
||||||
bar
|
bar
|
||||||
boolean value:bool:false
|
boolean value:bool:false
|
||||||
# Empty line separates records
|
|
||||||
|
# Empty line separates records, but comments don't count as empty
|
||||||
key::this is the second record
|
key::this is the second record
|
||||||
this is a number:num:42
|
this is a number:num:42
|
||||||
null value:null:
|
null value:null:
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ pub fn build(b: *std.Build) void {
|
||||||
// intend to expose to consumers that were defined in other files part
|
// intend to expose to consumers that were defined in other files part
|
||||||
// of this module, you will have to make sure to re-export them from
|
// of this module, you will have to make sure to re-export them from
|
||||||
// the root file.
|
// the root file.
|
||||||
.root_source_file = b.path("src/root.zig"),
|
.root_source_file = b.path("src/srf.zig"),
|
||||||
// Later on we'll use this module as the root module of a test executable
|
// Later on we'll use this module as the root module of a test executable
|
||||||
// which requires us to specify a target.
|
// which requires us to specify a target.
|
||||||
.target = target,
|
.target = target,
|
||||||
|
|
|
||||||
26
src/main.zig
26
src/main.zig
|
|
@ -1,27 +1,3 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const srf = @import("srf");
|
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {}
|
||||||
// Prints to stderr, ignoring potential errors.
|
|
||||||
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
|
|
||||||
try srf.bufferedPrint();
|
|
||||||
}
|
|
||||||
|
|
||||||
test "simple test" {
|
|
||||||
const gpa = std.testing.allocator;
|
|
||||||
var list: std.ArrayList(i32) = .empty;
|
|
||||||
defer list.deinit(gpa); // Try commenting this out and see if zig detects the memory leak!
|
|
||||||
try list.append(gpa, 42);
|
|
||||||
try std.testing.expectEqual(@as(i32, 42), list.pop());
|
|
||||||
}
|
|
||||||
|
|
||||||
test "fuzz example" {
|
|
||||||
const Context = struct {
|
|
||||||
fn testOne(context: @This(), input: []const u8) anyerror!void {
|
|
||||||
_ = context;
|
|
||||||
// Try passing `--fuzz` to `zig build test` and see if it manages to fail this test case!
|
|
||||||
try std.testing.expect(!std.mem.eql(u8, "canyoufindme", input));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
try std.testing.fuzz(Context{}, Context.testOne, .{});
|
|
||||||
}
|
|
||||||
|
|
|
||||||
23
src/root.zig
23
src/root.zig
|
|
@ -1,23 +0,0 @@
|
||||||
//! By convention, root.zig is the root source file when making a library.
|
|
||||||
const std = @import("std");
|
|
||||||
|
|
||||||
pub fn bufferedPrint() !void {
|
|
||||||
// Stdout is for the actual output of your application, for example if you
|
|
||||||
// are implementing gzip, then only the compressed bytes should be sent to
|
|
||||||
// stdout, not any debugging messages.
|
|
||||||
var stdout_buffer: [1024]u8 = undefined;
|
|
||||||
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
|
||||||
const stdout = &stdout_writer.interface;
|
|
||||||
|
|
||||||
try stdout.print("Run `zig build test` to run the tests.\n", .{});
|
|
||||||
|
|
||||||
try stdout.flush(); // Don't forget to flush!
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add(a: i32, b: i32) i32 {
|
|
||||||
return a + b;
|
|
||||||
}
|
|
||||||
|
|
||||||
test "basic add functionality" {
|
|
||||||
try std.testing.expect(add(3, 7) == 10);
|
|
||||||
}
|
|
||||||
584
src/srf.zig
Normal file
584
src/srf.zig
Normal file
|
|
@ -0,0 +1,584 @@
|
||||||
|
//! By convention, root.zig is the root source file when making a library.
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
const log = std.log.scoped(.srf);
|
||||||
|
|
||||||
|
pub const ParseLineError = struct {
|
||||||
|
message: []const u8,
|
||||||
|
level: std.log.Level,
|
||||||
|
line: usize,
|
||||||
|
column: usize,
|
||||||
|
|
||||||
|
pub fn deinit(self: ParseLineError, allocator: std.mem.Allocator) void {
|
||||||
|
allocator.free(self.message);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
pub const Diagnostics = struct {
|
||||||
|
errors: *std.ArrayList(ParseLineError),
|
||||||
|
stop_after: usize = 10,
|
||||||
|
|
||||||
|
pub fn addError(self: Diagnostics, allocator: std.mem.Allocator, err: ParseLineError) ParseError!void {
|
||||||
|
if (self.errors.items.len >= self.stop_after) {
|
||||||
|
err.deinit(allocator);
|
||||||
|
return ParseError.ParseFailed;
|
||||||
|
}
|
||||||
|
try self.errors.append(allocator, err);
|
||||||
|
}
|
||||||
|
pub fn deinit(self: Diagnostics, allocator: std.mem.Allocator) void {
|
||||||
|
for (self.errors) |e| e.deinit(allocator);
|
||||||
|
self.errors.deinit(allocator);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const ParseError = error{
|
||||||
|
ParseFailed,
|
||||||
|
ReadFailed,
|
||||||
|
StreamTooLong,
|
||||||
|
OutOfMemory,
|
||||||
|
EndOfStream,
|
||||||
|
};
|
||||||
|
|
||||||
|
const ItemValueWithMetaData = struct {
|
||||||
|
item_value: ?ItemValue,
|
||||||
|
error_parsing: bool = false,
|
||||||
|
ending_byte: usize,
|
||||||
|
column_set: bool = false,
|
||||||
|
};
|
||||||
|
pub const ItemValue = union(enum) {
|
||||||
|
number: f128,
|
||||||
|
|
||||||
|
/// Bytes are converted to/from base64, string is not
|
||||||
|
bytes: []const u8,
|
||||||
|
|
||||||
|
/// String is not touched in any way
|
||||||
|
string: []const u8,
|
||||||
|
|
||||||
|
boolean: bool,
|
||||||
|
|
||||||
|
pub fn format(self: ItemValue, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||||
|
switch (self) {
|
||||||
|
.number => try writer.print("num: {d}", .{self.number}),
|
||||||
|
.bytes => try writer.print("bytes: {x}", .{self.bytes}),
|
||||||
|
.string => try writer.print("string: {s}", .{self.string}),
|
||||||
|
.boolean => try writer.print("boolean: {}", .{self.boolean}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn deinit(self: ItemValue, allocator: std.mem.Allocator) void {
|
||||||
|
switch (self) {
|
||||||
|
.number, .boolean => {},
|
||||||
|
.bytes => |b| allocator.free(b),
|
||||||
|
.string => |s| allocator.free(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *ParseState, delimiter: u8, options: ParseOptions) ParseError!ItemValueWithMetaData {
|
||||||
|
const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':');
|
||||||
|
if (type_val_sep_raw == null) {
|
||||||
|
try parseError(allocator, options, "no type data or value after key", state.*);
|
||||||
|
return ParseError.ParseFailed;
|
||||||
|
}
|
||||||
|
|
||||||
|
const type_val_sep = type_val_sep_raw.?;
|
||||||
|
const metadata = str[0..type_val_sep];
|
||||||
|
const trimmed_meta = std.mem.trim(u8, metadata, &std.ascii.whitespace);
|
||||||
|
if (trimmed_meta.len == 0 or std.mem.eql(u8, "string", trimmed_meta)) {
|
||||||
|
// delimiter ended string
|
||||||
|
var it = std.mem.splitScalar(u8, str[type_val_sep + 1 ..], delimiter);
|
||||||
|
const val = it.first();
|
||||||
|
return .{
|
||||||
|
.item_value = .{ .string = try allocator.dupe(u8, val) },
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (std.mem.eql(u8, "binary", trimmed_meta)) {
|
||||||
|
// binary is base64 encoded, so we need to decode it, but we don't
|
||||||
|
// risk delimiter collision, so we don't need a length for this
|
||||||
|
var it = std.mem.splitScalar(u8, str[type_val_sep + 1 ..], delimiter);
|
||||||
|
const val = it.first();
|
||||||
|
const Decoder = std.base64.standard.Decoder;
|
||||||
|
const size = Decoder.calcSizeForSlice(val) catch {
|
||||||
|
try parseError(allocator, options, "error parsing base64 value", state.*);
|
||||||
|
return .{
|
||||||
|
.item_value = null,
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
.error_parsing = true,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
const data = try allocator.alloc(u8, size);
|
||||||
|
errdefer allocator.free(data);
|
||||||
|
Decoder.decode(data, val) catch {
|
||||||
|
try parseError(allocator, options, "error parsing base64 value", state.*);
|
||||||
|
allocator.free(data);
|
||||||
|
return .{
|
||||||
|
.item_value = null,
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
.error_parsing = true,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
return .{
|
||||||
|
.item_value = .{ .bytes = data },
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (std.mem.eql(u8, "num", trimmed_meta)) {
|
||||||
|
var it = std.mem.splitScalar(u8, str[type_val_sep + 1 ..], delimiter);
|
||||||
|
const val = it.first();
|
||||||
|
const val_trimmed = std.mem.trim(u8, val, &std.ascii.whitespace);
|
||||||
|
const number = std.fmt.parseFloat(@FieldType(ItemValue, "number"), val_trimmed) catch {
|
||||||
|
// TODO: in compact format we really need a column number here
|
||||||
|
try parseError(allocator, options, "error parsing numeric value", state.*);
|
||||||
|
return .{
|
||||||
|
.item_value = null,
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
.error_parsing = true,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
return .{
|
||||||
|
.item_value = .{ .number = number },
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (std.mem.eql(u8, "bool", trimmed_meta)) {
|
||||||
|
var it = std.mem.splitScalar(u8, str[type_val_sep + 1 ..], delimiter);
|
||||||
|
const val = it.first();
|
||||||
|
const val_trimmed = std.mem.trim(u8, val, &std.ascii.whitespace);
|
||||||
|
const boolean = blk: {
|
||||||
|
if (std.mem.eql(u8, "false", val_trimmed)) break :blk false;
|
||||||
|
if (std.mem.eql(u8, "true", val_trimmed)) break :blk true;
|
||||||
|
|
||||||
|
// TODO: in compact format we really need a column number here
|
||||||
|
try parseError(allocator, options, "error parsing boolean value", state.*);
|
||||||
|
return .{
|
||||||
|
.item_value = null,
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
.error_parsing = true,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
return .{
|
||||||
|
.item_value = .{ .boolean = boolean },
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (std.mem.eql(u8, "null", trimmed_meta)) {
|
||||||
|
return .{
|
||||||
|
.item_value = null,
|
||||||
|
.ending_byte = metadata.len + 2,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Last chance...the thing between these colons is a usize indicating
|
||||||
|
// the number of bytes to grab for a string
|
||||||
|
const size = std.fmt.parseInt(usize, trimmed_meta, 0) catch {
|
||||||
|
log.debug("parseInt fail, trimmed_data: '{s}'", .{trimmed_meta});
|
||||||
|
try parseError(allocator, options, "unrecognized metadata for key", state.*);
|
||||||
|
return .{
|
||||||
|
.item_value = null,
|
||||||
|
.ending_byte = metadata.len + 1,
|
||||||
|
.error_parsing = true,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
// If we are being asked specifically for bytes, we no longer care about
|
||||||
|
// delimiters. We just want raw bytes. This might adjust our line/column
|
||||||
|
// in the parse state
|
||||||
|
const rest_of_data = str[type_val_sep + 1 ..];
|
||||||
|
if (rest_of_data.len > size) {
|
||||||
|
// We fit on this line, everything is "normal"
|
||||||
|
const val = rest_of_data[0..size];
|
||||||
|
return .{
|
||||||
|
.item_value = .{ .string = val },
|
||||||
|
.ending_byte = metadata.len + 1 + val.len,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// This is not enough, we need more data from the reader
|
||||||
|
log.debug("item value includes newlines {f}", .{state});
|
||||||
|
// We need to advance the reader, so we need a copy of what we have so fa
|
||||||
|
const start = try allocator.dupe(u8, rest_of_data);
|
||||||
|
defer allocator.free(start);
|
||||||
|
// We won't do a parseError here. If we have an allocation error, read
|
||||||
|
// error, or end of stream, all of these are fatal. Our reader is currently
|
||||||
|
// past the newline, so we have to remove a character from size to account.
|
||||||
|
const end = try state.reader.readAlloc(allocator, size - rest_of_data.len - 1);
|
||||||
|
// However, we want to be past the end of the *next* newline too (in long
|
||||||
|
// format mode)
|
||||||
|
if (delimiter == '\n') state.reader.toss(1);
|
||||||
|
defer allocator.free(end);
|
||||||
|
// This \n is because the reader state will have advanced beyond the next newline, so end
|
||||||
|
// really should start with the newline. This only applies to long mode, because otherwise the
|
||||||
|
// entire record is a single line
|
||||||
|
const final = try std.mem.concat(allocator, u8, &.{ start, "\n", end });
|
||||||
|
// const final = if (delimiter == '\n')
|
||||||
|
// try std.mem.concat(allocator, u8, &.{ start, "\n", end })
|
||||||
|
// else
|
||||||
|
// try std.mem.concat(allocator, u8, &.{ start, end });
|
||||||
|
errdefer allocator.free(final);
|
||||||
|
log.debug("full val: {s}", .{final});
|
||||||
|
std.debug.assert(final.len == size);
|
||||||
|
// Now we need to get the parse state correct
|
||||||
|
state.line += std.mem.count(u8, final, "\n");
|
||||||
|
state.column = final.len - std.mem.lastIndexOf(u8, final, "\n").?;
|
||||||
|
return .{
|
||||||
|
.item_value = .{ .string = final },
|
||||||
|
.ending_byte = metadata.len + 1 + final.len, // This is useless here
|
||||||
|
.column_set = true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Item = struct {
|
||||||
|
key: []const u8,
|
||||||
|
value: ?ItemValue,
|
||||||
|
|
||||||
|
pub fn deinit(self: Item, allocator: std.mem.Allocator) void {
|
||||||
|
// std.debug.print("item deinit, key {s}, val: {?f}\n", .{ self.key, self.value });
|
||||||
|
allocator.free(self.key);
|
||||||
|
if (self.value) |v|
|
||||||
|
v.deinit(allocator);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Record = struct {
|
||||||
|
items: []Item,
|
||||||
|
|
||||||
|
pub fn deinit(self: Record, allocator: std.mem.Allocator) void {
|
||||||
|
for (self.items) |i| i.deinit(allocator);
|
||||||
|
allocator.free(self.items);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const RecordList = struct {
|
||||||
|
items: []Record,
|
||||||
|
|
||||||
|
pub fn deinit(self: RecordList, allocator: std.mem.Allocator) void {
|
||||||
|
for (self.items) |r|
|
||||||
|
r.deinit(allocator);
|
||||||
|
allocator.free(self.items);
|
||||||
|
}
|
||||||
|
pub fn format(self: RecordList, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||||
|
_ = self;
|
||||||
|
_ = writer;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const ParseOptions = struct {
|
||||||
|
diagnostics: ?*Diagnostics = null,
|
||||||
|
};
|
||||||
|
|
||||||
|
const Directive = union(enum) {
|
||||||
|
magic,
|
||||||
|
long_format,
|
||||||
|
compact_format,
|
||||||
|
require_eof,
|
||||||
|
eof,
|
||||||
|
|
||||||
|
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: ParseState, options: ParseOptions) ParseError!?Directive {
|
||||||
|
if (!std.mem.startsWith(u8, str, "#!")) return null;
|
||||||
|
// strip any comments off
|
||||||
|
var it = std.mem.splitScalar(u8, str[2..], '#');
|
||||||
|
const line = std.mem.trimEnd(u8, it.first(), &std.ascii.whitespace);
|
||||||
|
if (std.mem.eql(u8, "srfv1", line)) return .magic;
|
||||||
|
if (std.mem.eql(u8, "requireeof", line)) return .require_eof;
|
||||||
|
if (std.mem.eql(u8, "requireof", line)) {
|
||||||
|
try parseError(allocator, options, "#!requireof found. Did you mean #!requireeof?", state);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (std.mem.eql(u8, "eof", line)) return .eof;
|
||||||
|
if (std.mem.eql(u8, "compact", line)) return .compact_format;
|
||||||
|
if (std.mem.eql(u8, "long", line)) return .long_format;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
pub const ParseState = struct {
|
||||||
|
reader: *std.Io.Reader,
|
||||||
|
line: usize,
|
||||||
|
column: usize,
|
||||||
|
|
||||||
|
pub fn format(self: ParseState, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||||
|
try writer.print("line: {}, col: {}", .{ self.line, self.column });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordList {
|
||||||
|
var long_format = false; // Default to compact format
|
||||||
|
var require_eof = false; // Default to no eof required
|
||||||
|
var eof_found: bool = false;
|
||||||
|
var state = ParseState{ .line = 0, .column = 0, .reader = reader };
|
||||||
|
const first_line = nextLine(reader, &state, '\n') orelse return ParseError.ParseFailed;
|
||||||
|
|
||||||
|
if (try Directive.parse(allocator, first_line, state, options)) |d| {
|
||||||
|
if (d != .magic) try parseError(allocator, options, "Magic header not found on first line", state);
|
||||||
|
} else try parseError(allocator, options, "Magic header not found on first line", state);
|
||||||
|
|
||||||
|
// Loop through the header material and configure our main parsing
|
||||||
|
var record_list: std.ArrayList(Record) = .empty;
|
||||||
|
errdefer {
|
||||||
|
for (record_list.items) |i| i.deinit(allocator);
|
||||||
|
record_list.deinit(allocator);
|
||||||
|
}
|
||||||
|
const first_data = blk: {
|
||||||
|
while (nextLine(reader, &state, '\n')) |line| {
|
||||||
|
if (try Directive.parse(allocator, line, state, options)) |d| {
|
||||||
|
switch (d) {
|
||||||
|
.magic => try parseError(allocator, options, "Found a duplicate magic header", state),
|
||||||
|
.long_format => long_format = true,
|
||||||
|
.compact_format => long_format = false, // what if we have both?
|
||||||
|
.require_eof => require_eof = true,
|
||||||
|
.eof => {
|
||||||
|
// there needs to be an eof then
|
||||||
|
if (nextLine(reader, &state, '\n')) |_| {
|
||||||
|
try parseError(allocator, options, "Data found after #!eof", state);
|
||||||
|
return ParseError.ParseFailed; // this is terminal
|
||||||
|
} else return .{ .items = try record_list.toOwnedSlice(allocator) };
|
||||||
|
},
|
||||||
|
}
|
||||||
|
} else break :blk line;
|
||||||
|
}
|
||||||
|
return .{ .items = try record_list.toOwnedSlice(allocator) };
|
||||||
|
};
|
||||||
|
|
||||||
|
// Main parsing. We already have the first line of data, which could
|
||||||
|
// be a record (compact format) or a key/value pair (long format)
|
||||||
|
var line: ?[]const u8 = first_data;
|
||||||
|
var items: std.ArrayList(Item) = .empty;
|
||||||
|
errdefer {
|
||||||
|
for (items.items) |i| i.deinit(allocator);
|
||||||
|
items.deinit(allocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Because in long format we don't have newline delimiter, that should really be a noop
|
||||||
|
// but we need this for compact format
|
||||||
|
const delimiter: u8 = if (long_format) '\n' else ',';
|
||||||
|
while (line) |l| {
|
||||||
|
if (std.mem.trim(u8, l, &std.ascii.whitespace).len == 0) {
|
||||||
|
// empty lines can be signficant (to indicate a new record, but only once
|
||||||
|
// a record is processed, which requires data first. That record processing
|
||||||
|
// is at the bottom of the loop, so if an empty line is detected here, we can
|
||||||
|
// safely ignore it
|
||||||
|
line = nextLine(reader, &state, '\n');
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (try Directive.parse(allocator, l, state, options)) |d| {
|
||||||
|
switch (d) {
|
||||||
|
.eof => {
|
||||||
|
// there needs to be an eof then
|
||||||
|
if (nextLine(reader, &state, '\n')) |_| {
|
||||||
|
try parseError(allocator, options, "Data found after #!eof", state);
|
||||||
|
return ParseError.ParseFailed; // this is terminal
|
||||||
|
} else {
|
||||||
|
eof_found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => try parseError(allocator, options, "Directive found after data started", state),
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Real data: lfg
|
||||||
|
// Whatever the format, the beginning will always be the key data
|
||||||
|
// key:stuff:value
|
||||||
|
var it = std.mem.splitScalar(u8, l, ':');
|
||||||
|
const key = it.next().?; // first one we get for free
|
||||||
|
const value = try ItemValue.parse(
|
||||||
|
allocator,
|
||||||
|
it.rest(),
|
||||||
|
&state,
|
||||||
|
delimiter,
|
||||||
|
options,
|
||||||
|
);
|
||||||
|
if (!value.error_parsing) {
|
||||||
|
// std.debug.print("alloc on key: {s}, val: {?f}\n", .{ key, value.item_value });
|
||||||
|
try items.append(allocator, .{ .key = try allocator.dupe(u8, key), .value = value.item_value });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!value.column_set)
|
||||||
|
state.column = key.len + value.ending_byte;
|
||||||
|
// The difference between compact and line here is that compact we will instead of
|
||||||
|
// line = try nextLine, we will do something like line = line[42..]
|
||||||
|
|
||||||
|
if (long_format) {
|
||||||
|
const maybe_line = nextLine(reader, &state, '\n');
|
||||||
|
if (maybe_line == null) {
|
||||||
|
// close out record, return
|
||||||
|
try record_list.append(allocator, .{
|
||||||
|
.items = try items.toOwnedSlice(allocator),
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
line = maybe_line.?;
|
||||||
|
if (line.?.len == 0) {
|
||||||
|
// End of record
|
||||||
|
try record_list.append(allocator, .{
|
||||||
|
.items = try items.toOwnedSlice(allocator),
|
||||||
|
});
|
||||||
|
line = nextLine(reader, &state, '\n');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
line = l[state.column..];
|
||||||
|
if (line.?.len == 0) {
|
||||||
|
// close out record
|
||||||
|
try record_list.append(allocator, .{
|
||||||
|
.items = try items.toOwnedSlice(allocator),
|
||||||
|
});
|
||||||
|
line = nextLine(reader, &state, '\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Parsing complete. Add final record to list. Then, if there are any parse errors, throw
|
||||||
|
log.debug(
|
||||||
|
"Parse complete. Records parsed so far: {d}, Items in array (>0 means final record): {d}",
|
||||||
|
.{ record_list.items.len, items.items.len },
|
||||||
|
);
|
||||||
|
if (items.items.len > 0)
|
||||||
|
try record_list.append(allocator, .{
|
||||||
|
.items = try items.toOwnedSlice(allocator),
|
||||||
|
});
|
||||||
|
if (options.diagnostics) |d|
|
||||||
|
if (d.errors.items.len > 0) return ParseError.ParseFailed;
|
||||||
|
if (require_eof and !eof_found) return ParseError.ParseFailed;
|
||||||
|
return .{ .items = try record_list.toOwnedSlice(allocator) };
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Takes the next line, trimming leading whitespace and ignoring comments
|
||||||
|
/// Directives (comments starting with #!) are preserved
|
||||||
|
fn nextLine(reader: *std.Io.Reader, state: *ParseState, delimiter: u8) ?[]const u8 {
|
||||||
|
while (true) {
|
||||||
|
state.line += 1;
|
||||||
|
state.column = 0;
|
||||||
|
const raw_line = (reader.takeDelimiter(delimiter) catch return null) orelse return null;
|
||||||
|
// we don't want to trim the end, as there might be a key/value field
|
||||||
|
// with a string including important trailing whitespace
|
||||||
|
const trimmed_line = std.mem.trimStart(u8, raw_line, &std.ascii.whitespace);
|
||||||
|
if (std.mem.startsWith(u8, trimmed_line, "#") and !std.mem.startsWith(u8, trimmed_line, "#!")) continue;
|
||||||
|
return trimmed_line;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fn parseError(allocator: std.mem.Allocator, options: ParseOptions, message: []const u8, state: ParseState) ParseError!void {
|
||||||
|
log.debug("Parse error. Parse state {f}, message: {s}", .{ state, message });
|
||||||
|
if (options.diagnostics) |d| {
|
||||||
|
try d.addError(allocator, .{
|
||||||
|
.message = try allocator.dupe(u8, message),
|
||||||
|
.level = .err,
|
||||||
|
.line = state.line,
|
||||||
|
.column = state.column,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
return ParseError.ParseFailed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "long format single record, no eof" {
|
||||||
|
const data =
|
||||||
|
\\#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
||||||
|
\\#!long # Mandatory to use multiline records, compact format is optional #!compact
|
||||||
|
\\# A comment
|
||||||
|
\\# empty lines ignored
|
||||||
|
\\
|
||||||
|
\\key::string value, with any data except a \n. an optional string length between the colons
|
||||||
|
;
|
||||||
|
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
const records = try parse(&reader, allocator, .{});
|
||||||
|
defer records.deinit(allocator);
|
||||||
|
try std.testing.expectEqual(@as(usize, 1), records.items.len);
|
||||||
|
try std.testing.expectEqual(@as(usize, 1), records.items[0].items.len);
|
||||||
|
const kvps = records.items[0].items;
|
||||||
|
try std.testing.expectEqualStrings("key", kvps[0].key);
|
||||||
|
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", kvps[0].value.?.string);
|
||||||
|
}
|
||||||
|
test "long format from README - generic data structures, first record only" {
|
||||||
|
const data =
|
||||||
|
\\#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
||||||
|
\\#!requireeof # Set this if you want parsing to fail when #!eof not present on last line
|
||||||
|
\\#!long # Mandatory to use multiline records, compact format is optional #!compact
|
||||||
|
\\# A comment
|
||||||
|
\\# empty lines ignored
|
||||||
|
\\
|
||||||
|
\\this is a number:num: 5
|
||||||
|
\\#!eof
|
||||||
|
;
|
||||||
|
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
const records = try parse(&reader, allocator, .{});
|
||||||
|
defer records.deinit(allocator);
|
||||||
|
try std.testing.expectEqual(@as(usize, 1), records.items.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "long format from README - generic data structures" {
|
||||||
|
const data =
|
||||||
|
\\#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
||||||
|
\\#!requireeof # Set this if you want parsing to fail when #!eof not present on last line
|
||||||
|
\\#!long # Mandatory to use multiline records, compact format is optional #!compact
|
||||||
|
\\# A comment
|
||||||
|
\\# empty lines ignored
|
||||||
|
\\
|
||||||
|
\\key::string value, with any data except a \n. an optional string length between the colons
|
||||||
|
\\this is a number:num: 5
|
||||||
|
\\null value:null:
|
||||||
|
\\array::array's don't exist. Use json or toml or something
|
||||||
|
\\data with newlines must have a length:7:foo
|
||||||
|
\\bar
|
||||||
|
\\boolean value:bool:false
|
||||||
|
\\ # Empty line separates records
|
||||||
|
\\
|
||||||
|
\\key::this is the second record
|
||||||
|
\\this is a number:num:42
|
||||||
|
\\null value:null:
|
||||||
|
\\array::array's still don't exist
|
||||||
|
\\data with newlines must have a length::single line
|
||||||
|
\\#!eof # eof marker, useful to make sure your file wasn't cut in half. Only considered if requireeof set at top
|
||||||
|
;
|
||||||
|
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
const records = try parse(&reader, allocator, .{});
|
||||||
|
defer records.deinit(allocator);
|
||||||
|
try std.testing.expectEqual(@as(usize, 2), records.items.len);
|
||||||
|
const first = records.items[0];
|
||||||
|
try std.testing.expectEqual(@as(usize, 6), first.items.len);
|
||||||
|
try std.testing.expectEqualStrings("key", first.items[0].key);
|
||||||
|
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", first.items[0].value.?.string);
|
||||||
|
try std.testing.expectEqualStrings("this is a number", first.items[1].key);
|
||||||
|
try std.testing.expectEqual(@as(f128, 5), first.items[1].value.?.number);
|
||||||
|
try std.testing.expectEqualStrings("null value", first.items[2].key);
|
||||||
|
try std.testing.expect(first.items[2].value == null);
|
||||||
|
try std.testing.expectEqualStrings("array", first.items[3].key);
|
||||||
|
try std.testing.expectEqualStrings("array's don't exist. Use json or toml or something", first.items[3].value.?.string);
|
||||||
|
try std.testing.expectEqualStrings("data with newlines must have a length", first.items[4].key);
|
||||||
|
try std.testing.expectEqualStrings("foo\nbar", first.items[4].value.?.string);
|
||||||
|
try std.testing.expectEqualStrings("boolean value", first.items[5].key);
|
||||||
|
try std.testing.expect(!first.items[5].value.?.boolean);
|
||||||
|
|
||||||
|
// TODO: Second record
|
||||||
|
const second = records.items[1];
|
||||||
|
try std.testing.expectEqual(@as(usize, 5), second.items.len);
|
||||||
|
try std.testing.expectEqualStrings("key", second.items[0].key);
|
||||||
|
try std.testing.expectEqualStrings("this is the second record", second.items[0].value.?.string);
|
||||||
|
try std.testing.expectEqualStrings("this is a number", second.items[1].key);
|
||||||
|
try std.testing.expectEqual(@as(f128, 42), second.items[1].value.?.number);
|
||||||
|
try std.testing.expectEqualStrings("null value", second.items[2].key);
|
||||||
|
try std.testing.expect(second.items[2].value == null);
|
||||||
|
try std.testing.expectEqualStrings("array", second.items[3].key);
|
||||||
|
try std.testing.expectEqualStrings("array's still don't exist", second.items[3].value.?.string);
|
||||||
|
try std.testing.expectEqualStrings("data with newlines must have a length", second.items[4].key);
|
||||||
|
try std.testing.expectEqualStrings("single line", second.items[4].value.?.string);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "compact format from README - generic data structures" {
|
||||||
|
const lvl = std.testing.log_level;
|
||||||
|
defer std.testing.log_level = lvl;
|
||||||
|
std.testing.log_level = .debug;
|
||||||
|
if (true) return error.SkipZigTest;
|
||||||
|
const data =
|
||||||
|
\\#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
||||||
|
\\key::string value must have a length between colons or end with a comma,this is a number:num:5 ,null value:null:,array::array's don't exist. Use json or toml or something,data with newlines must have a length:7:foo
|
||||||
|
\\bar,boolean value:bool:false
|
||||||
|
\\key::this is the second record
|
||||||
|
;
|
||||||
|
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
// We want "parse" and "parseLeaky" probably. Second parameter is a diagnostics
|
||||||
|
const records = try parse(&reader, allocator, .{});
|
||||||
|
defer records.deinit(allocator);
|
||||||
|
try std.testing.expectEqual(@as(usize, 2), records.items.len);
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue