Compare commits
10 commits
0aacc8b37b
...
c4a59cfbd3
| Author | SHA1 | Date | |
|---|---|---|---|
| c4a59cfbd3 | |||
| 1ef493f282 | |||
| 0b8ec4aa89 | |||
| bbb65e08b9 | |||
| d5f6266e7c | |||
| 2846ee1cff | |||
| 4fbc08230e | |||
| 0e1d8bd424 | |||
| 415aa30f75 | |||
| b37fb7fb1a |
7 changed files with 428 additions and 175 deletions
27
.forgejo/workflows/zig-build.yaml
Normal file
27
.forgejo/workflows/zig-build.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
name: Generic zig build
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- '*'
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
- name: Setup Zig
|
||||
uses: https://codeberg.org/mlugg/setup-zig@v2.2.1
|
||||
- name: Build project
|
||||
run: zig build --summary all
|
||||
- name: Run tests
|
||||
run: zig build test --summary all
|
||||
- name: Notify
|
||||
uses: https://git.lerch.org/lobo/action-notify-ntfy@v2
|
||||
if: always() && env.GITEA_ACTIONS == 'true'
|
||||
with:
|
||||
host: ${{ secrets.NTFY_HOST }}
|
||||
topic: ${{ secrets.NTFY_TOPIC }}
|
||||
status: ${{ job.status }}
|
||||
user: ${{ secrets.NTFY_USER }}
|
||||
password: ${{ secrets.NTFY_PASSWORD }}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[tools]
|
||||
pre-commit = "4.2.0"
|
||||
"ubi:DonIsaac/zlint" = "0.7.6"
|
||||
prek = "0.3.1"
|
||||
"ubi:DonIsaac/zlint" = "0.7.9"
|
||||
zig = "0.15.2"
|
||||
zls = "0.15.0"
|
||||
zls = "0.15.1"
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.2.0
|
||||
rev: v6.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
|
|
|
|||
19
README.md
19
README.md
|
|
@ -1,6 +1,6 @@
|
|||
# SRF (Simple Record Format)
|
||||
|
||||
SRF is a minimal data format designed for L2 caches and simple structured storage suitable for simple configuration as well. It provides human-readable key-value records with basic type hints, while avoiding the parsing complexity and escaping requirements of JSON.
|
||||
SRF is a minimal data format designed for L2 caches and simple structured storage suitable for simple configuration as well. It provides human-readable key-value records with basic type hints, while avoiding the parsing complexity and escaping requirements of JSON. Current benchmarking with hyperfine demonstrate approximately twice the performance of JSON parsing, though for L2 caches, JSON may be a poor choice. Compared to jsonl, it is approximately 40x faster. Performance also improves by 8% if you instruct the library not to copy strings around (ParseOptions alloc_strings = false).
|
||||
|
||||
**Features:**
|
||||
- No escaping required - use length-prefixed strings for complex data
|
||||
|
|
@ -54,23 +54,6 @@ bar,boolean value:bool:false
|
|||
key::this is the second record
|
||||
```
|
||||
|
||||
Second record problem...these
|
||||
|
||||
## Implementation
|
||||
|
||||
**Note:** Long format may be tabled for later development. Initial implementation will focus on compact format only.
|
||||
|
||||
Unrecognized `#!<keyword>` should be an error:
|
||||
requireof -> requireeof will probably be a common problem.
|
||||
#! anywhere other than the beginning or end is an error
|
||||
|
||||
newline separates records in compact format. An empty line is required in long format
|
||||
|
||||
comma separates fields in compact format
|
||||
newline separates fields in long format
|
||||
|
||||
Should we have a #!hash directive to include all data not starting with `#!` ?
|
||||
|
||||
## Implementation Concerns
|
||||
|
||||
**Parser robustness:**
|
||||
|
|
|
|||
16
build.zig
16
build.zig
|
|
@ -142,6 +142,22 @@ pub fn build(b: *std.Build) void {
|
|||
test_step.dependOn(&run_mod_tests.step);
|
||||
test_step.dependOn(&run_exe_tests.step);
|
||||
|
||||
const lib = b.addLibrary(.{
|
||||
.name = "srf",
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/srf.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
}),
|
||||
});
|
||||
const install_docs = b.addInstallDirectory(.{
|
||||
.source_dir = lib.getEmittedDocs(),
|
||||
.install_dir = .prefix,
|
||||
.install_subdir = "docs",
|
||||
});
|
||||
|
||||
const docs_step = b.step("docs", "Generate documentation");
|
||||
docs_step.dependOn(&install_docs.step);
|
||||
// Just like flags, top level steps are also listed in the `--help` menu.
|
||||
//
|
||||
// The Zig build system is entirely implemented in userland, which means
|
||||
|
|
|
|||
14
src/main.zig
14
src/main.zig
|
|
@ -76,19 +76,9 @@ pub fn main() !void {
|
|||
try stdin.appendRemaining(base_allocator, &data, @enumFromInt(100 * 1024 * 1024));
|
||||
|
||||
if (std.mem.eql(u8, format, "srf")) {
|
||||
// TODO: Remove this code. SRF should be using an Arena allocator instead
|
||||
const buffer = try base_allocator.alloc(u8, 200 * 1024 * 1024);
|
||||
defer base_allocator.free(buffer);
|
||||
var fba = std.heap.FixedBufferAllocator.init(buffer);
|
||||
const srf_allocator = fba.allocator();
|
||||
// remove ^^
|
||||
|
||||
var reader = std.Io.Reader.fixed(data.items);
|
||||
const records = try srf.parse(&reader, srf_allocator, .{});
|
||||
defer {
|
||||
for (records.items) |r| r.deinit(srf_allocator);
|
||||
srf_allocator.free(records.items);
|
||||
}
|
||||
const records = try srf.parse(&reader, allocator, .{ .alloc_strings = false });
|
||||
defer records.deinit();
|
||||
} else if (std.mem.eql(u8, format, "jsonl")) {
|
||||
var lines = std.mem.splitScalar(u8, data.items, '\n');
|
||||
while (lines.next()) |line| {
|
||||
|
|
|
|||
519
src/srf.zig
519
src/srf.zig
|
|
@ -1,4 +1,22 @@
|
|||
//! By convention, root.zig is the root source file when making a library.
|
||||
//!SRF is a minimal data format designed for L2 caches and simple structured storage suitable for simple configuration as well. It provides human-readable key-value records with basic type hints, while avoiding the parsing complexity and escaping requirements of JSON. Current benchmarking with hyperfine demonstrate approximately twice the performance of JSON parsing, though for L2 caches, JSON may be a poor choice. Compared to jsonl, it is approximately 40x faster. Performance also improves by 8% if you instruct the library not to copy strings around (ParseOptions alloc_strings = false).
|
||||
//!
|
||||
//!**Features:**
|
||||
//!- No escaping required - use length-prefixed strings for complex data
|
||||
//!- Single-pass parsing with minimal memory allocation
|
||||
//!- Basic type system (string, num, bool, null, binary) with explicit type hints
|
||||
//!- Compact format for machine generation, long format for human editing
|
||||
//!- Built-in corruption detection with optional EOF markers
|
||||
//!
|
||||
//!**When to use SRF:**
|
||||
//!- L2 caches that need occasional human inspection
|
||||
//!- Simple configuration files with mixed data types
|
||||
//!- Data exchange where JSON escaping is problematic
|
||||
//!- Applications requiring fast, predictable parsing
|
||||
//!
|
||||
//!**When not to use SRF:**
|
||||
//!- Complex nested data structures (use JSON/TOML instead)
|
||||
//!- Schema validation requirements
|
||||
//!- Arrays or object hierarchies (arrays can be managed in the data itself, however)
|
||||
const std = @import("std");
|
||||
|
||||
const log = std.log.scoped(.srf);
|
||||
|
|
@ -16,6 +34,7 @@ pub const ParseLineError = struct {
|
|||
pub const Diagnostics = struct {
|
||||
errors: *std.ArrayList(ParseLineError),
|
||||
stop_after: usize = 10,
|
||||
arena: std.heap.ArenaAllocator,
|
||||
|
||||
pub fn addError(self: Diagnostics, allocator: std.mem.Allocator, err: ParseLineError) ParseError!void {
|
||||
if (self.errors.items.len >= self.stop_after) {
|
||||
|
|
@ -24,9 +43,14 @@ pub const Diagnostics = struct {
|
|||
}
|
||||
try self.errors.append(allocator, err);
|
||||
}
|
||||
pub fn deinit(self: Diagnostics, allocator: std.mem.Allocator) void {
|
||||
for (self.errors) |e| e.deinit(allocator);
|
||||
self.errors.deinit(allocator);
|
||||
pub fn deinit(self: Parsed) void {
|
||||
// From parse, three things can happen:
|
||||
// 1. Happy path - record comes back, deallocation happens on that deinit
|
||||
// 2. Errors is returned, no diagnostics provided. Deallocation happens in parse on errdefer
|
||||
// 3. Errors are returned, diagnostics provided. Deallocation happens here
|
||||
const child_allocator = self.arena.child_allocator;
|
||||
self.arena.deinit();
|
||||
child_allocator.destroy(self.arena);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -38,13 +62,13 @@ pub const ParseError = error{
|
|||
EndOfStream,
|
||||
};
|
||||
|
||||
const ItemValueWithMetaData = struct {
|
||||
item_value: ?ItemValue,
|
||||
const ValueWithMetaData = struct {
|
||||
item_value: ?Value,
|
||||
error_parsing: bool = false,
|
||||
reader_advanced: bool = false,
|
||||
};
|
||||
pub const ItemValue = union(enum) {
|
||||
number: f128,
|
||||
pub const Value = union(enum) {
|
||||
number: f64,
|
||||
|
||||
/// Bytes are converted to/from base64, string is not
|
||||
bytes: []const u8,
|
||||
|
|
@ -54,22 +78,17 @@ pub const ItemValue = union(enum) {
|
|||
|
||||
boolean: bool,
|
||||
|
||||
pub fn format(self: ItemValue, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
switch (self) {
|
||||
.number => try writer.print("num: {d}", .{self.number}),
|
||||
.bytes => try writer.print("bytes: {x}", .{self.bytes}),
|
||||
.string => try writer.print("string: {s}", .{self.string}),
|
||||
.boolean => try writer.print("boolean: {}", .{self.boolean}),
|
||||
}
|
||||
}
|
||||
pub fn deinit(self: ItemValue, allocator: std.mem.Allocator) void {
|
||||
switch (self) {
|
||||
.number, .boolean => {},
|
||||
.bytes => |b| allocator.free(b),
|
||||
.string => |s| allocator.free(s),
|
||||
}
|
||||
}
|
||||
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *ParseState, delimiter: u8, options: ParseOptions) ParseError!ItemValueWithMetaData {
|
||||
// pub fn format(self: Value, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
// switch (self) {
|
||||
// .number => try writer.print("num: {d}", .{self.number}),
|
||||
// .bytes => try writer.print("bytes: {x}", .{self.bytes}),
|
||||
// .string => try writer.print("string: {s}", .{self.string}),
|
||||
// .boolean => try writer.print("boolean: {}", .{self.boolean}),
|
||||
// }
|
||||
// }
|
||||
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *ParseState, delimiter: u8, options: ParseOptions) ParseError!ValueWithMetaData {
|
||||
const debug = str.len > 2 and str[0] == '1' and str[1] == '1';
|
||||
if (debug) log.debug("parsing {s}", .{str});
|
||||
const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':');
|
||||
if (type_val_sep_raw == null) {
|
||||
try parseError(allocator, options, "no type data or value after key", state.*);
|
||||
|
|
@ -88,7 +107,7 @@ pub const ItemValue = union(enum) {
|
|||
state.column += total_chars;
|
||||
state.partial_line_column += total_chars;
|
||||
return .{
|
||||
.item_value = .{ .string = try allocator.dupe(u8, val) },
|
||||
.item_value = .{ .string = try dupe(allocator, options, val) },
|
||||
};
|
||||
}
|
||||
if (std.mem.eql(u8, "binary", trimmed_meta)) {
|
||||
|
|
@ -127,11 +146,11 @@ pub const ItemValue = union(enum) {
|
|||
const val = it.first();
|
||||
// we need to advance the column/partial_line_column of our parsing state
|
||||
const total_chars = metadata.len + 1 + val.len;
|
||||
log.debug("num total_chars: {d}", .{total_chars});
|
||||
// log.debug("num total_chars: {d}", .{total_chars});
|
||||
state.column += total_chars;
|
||||
state.partial_line_column += total_chars;
|
||||
const val_trimmed = std.mem.trim(u8, val, &std.ascii.whitespace);
|
||||
const number = std.fmt.parseFloat(@FieldType(ItemValue, "number"), val_trimmed) catch {
|
||||
const number = std.fmt.parseFloat(@FieldType(Value, "number"), val_trimmed) catch {
|
||||
try parseError(allocator, options, "error parsing numeric value", state.*);
|
||||
return .{
|
||||
.item_value = null,
|
||||
|
|
@ -187,18 +206,21 @@ pub const ItemValue = union(enum) {
|
|||
.error_parsing = true,
|
||||
};
|
||||
};
|
||||
if (debug) log.debug("found fixed string size {d}. State {f}", .{ size, state });
|
||||
// Update again for number of bytes. All failures beyond this point are
|
||||
// fatal, so this is safe.
|
||||
state.column += size;
|
||||
state.partial_line_column += size;
|
||||
if (debug) log.debug("New state {f}", .{state});
|
||||
|
||||
// If we are being asked specifically for bytes, we no longer care about
|
||||
// delimiters. We just want raw bytes. This might adjust our line/column
|
||||
// in the parse state
|
||||
const rest_of_data = str[type_val_sep + 1 ..];
|
||||
if (rest_of_data.len > size) {
|
||||
if (rest_of_data.len >= size) {
|
||||
// We fit on this line, everything is "normal"
|
||||
const val = rest_of_data[0..size];
|
||||
if (debug) log.debug("val {s}", .{val});
|
||||
return .{
|
||||
.item_value = .{ .string = val },
|
||||
};
|
||||
|
|
@ -206,7 +228,7 @@ pub const ItemValue = union(enum) {
|
|||
// This is not enough, we need more data from the reader
|
||||
log.debug("item value includes newlines {f}", .{state});
|
||||
// We need to advance the reader, so we need a copy of what we have so fa
|
||||
const start = try allocator.dupe(u8, rest_of_data);
|
||||
const start = try dupe(allocator, options, rest_of_data);
|
||||
defer allocator.free(start);
|
||||
// We won't do a parseError here. If we have an allocation error, read
|
||||
// error, or end of stream, all of these are fatal. Our reader is currently
|
||||
|
|
@ -238,36 +260,58 @@ pub const ItemValue = union(enum) {
|
|||
}
|
||||
};
|
||||
|
||||
pub const Item = struct {
|
||||
// A field has a key and a value, but the value may be null
|
||||
pub const Field = struct {
|
||||
key: []const u8,
|
||||
value: ?ItemValue,
|
||||
|
||||
pub fn deinit(self: Item, allocator: std.mem.Allocator) void {
|
||||
// std.debug.print("item deinit, key {s}, val: {?f}\n", .{ self.key, self.value });
|
||||
allocator.free(self.key);
|
||||
if (self.value) |v|
|
||||
v.deinit(allocator);
|
||||
}
|
||||
value: ?Value,
|
||||
};
|
||||
|
||||
// A record has a list of fields, with no assumptions regarding duplication,
|
||||
// etc. This is for parsing speed, but also for more flexibility in terms of
|
||||
// use cases. One can make a defacto array out of this structure by having
|
||||
// something like:
|
||||
//
|
||||
// arr:string:foo
|
||||
// arr:string:bar
|
||||
//
|
||||
// and when you coerce to zig struct have an array .arr that gets populated
|
||||
// with strings "foo" and "bar".
|
||||
pub const Record = struct {
|
||||
items: []Item,
|
||||
fields: []const Field,
|
||||
|
||||
pub fn deinit(self: Record, allocator: std.mem.Allocator) void {
|
||||
for (self.items) |i| i.deinit(allocator);
|
||||
allocator.free(self.items);
|
||||
pub fn fmt(value: Record, options: FormatOptions) RecordFormatter {
|
||||
return .{ .value = value, .options = options };
|
||||
}
|
||||
};
|
||||
|
||||
pub const RecordList = struct {
|
||||
items: []Record,
|
||||
/// The Parsed struct is equivalent to Parsed(T) in std.json. Since most are
|
||||
/// familiar with std.json, it differs in the following ways:
|
||||
///
|
||||
/// * There is a records field instead of a value field. In json, one type of
|
||||
/// value is an array. SRF does not have an array data type, but the set of
|
||||
/// records is an array. json as a format is structred as a single object at
|
||||
/// the outermost
|
||||
///
|
||||
/// * This is not generic. In SRF, it is a separate function to bind the list
|
||||
/// of records to a specific data type. This will add some (hopefully minimal)
|
||||
/// overhead, but also avoid conflating parsing from the coercion from general
|
||||
/// type to specifics, and avoids answering questions like "what if I have
|
||||
/// 15 values for the same key" until you're actually dealing with that problem
|
||||
/// (see std.json.ParseOptions duplicate_field_behavior and ignore_unknown_fields)
|
||||
///
|
||||
/// When implemented, there will include a pub fn bind(self: Parsed, comptime T: type, options, BindOptions) BindError![]T
|
||||
/// function. The options will include things related to duplicate handling and
|
||||
/// missing fields
|
||||
pub const Parsed = struct {
|
||||
records: std.ArrayList(Record),
|
||||
arena: *std.heap.ArenaAllocator,
|
||||
|
||||
pub fn deinit(self: RecordList, allocator: std.mem.Allocator) void {
|
||||
for (self.items) |r|
|
||||
r.deinit(allocator);
|
||||
allocator.free(self.items);
|
||||
pub fn deinit(self: Parsed) void {
|
||||
const child_allocator = self.arena.child_allocator;
|
||||
self.arena.deinit();
|
||||
child_allocator.destroy(self.arena);
|
||||
}
|
||||
pub fn format(self: RecordList, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
pub fn format(self: Parsed, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
_ = self;
|
||||
_ = writer;
|
||||
}
|
||||
|
|
@ -275,6 +319,12 @@ pub const RecordList = struct {
|
|||
|
||||
pub const ParseOptions = struct {
|
||||
diagnostics: ?*Diagnostics = null,
|
||||
|
||||
/// By default, the parser will copy data so it is safe to free the original
|
||||
/// This will impose about 8% overhead, but be safer. If you do not require
|
||||
/// this safety, set alloc_strings to false. Setting this to false is the
|
||||
/// equivalent of the "Leaky" parsing functions of std.json
|
||||
alloc_strings: bool = true,
|
||||
};
|
||||
|
||||
const Directive = union(enum) {
|
||||
|
|
@ -301,6 +351,85 @@ const Directive = union(enum) {
|
|||
return null;
|
||||
}
|
||||
};
|
||||
pub const FormatOptions = struct {
|
||||
long_format: bool = false,
|
||||
|
||||
/// Will emit the eof directive as well as requireeof
|
||||
emit_eof: bool = false,
|
||||
};
|
||||
|
||||
/// Returns a formatter that formats the given value
|
||||
pub fn fmt(value: []const Record, options: FormatOptions) Formatter {
|
||||
return Formatter{ .value = value, .options = options };
|
||||
}
|
||||
test fmt {
|
||||
const records: []const Record = &.{
|
||||
.{ .fields = &.{.{ .key = "foo", .value = .{ .string = "bar" } }} },
|
||||
};
|
||||
var buf: [1024]u8 = undefined;
|
||||
const formatted = try std.fmt.bufPrint(
|
||||
&buf,
|
||||
"{f}",
|
||||
.{fmt(records, .{ .long_format = true })},
|
||||
);
|
||||
try std.testing.expectEqualStrings(
|
||||
\\#!srfv1
|
||||
\\#!long
|
||||
\\foo::bar
|
||||
\\
|
||||
, formatted);
|
||||
}
|
||||
pub const Formatter = struct {
|
||||
value: []const Record,
|
||||
options: FormatOptions,
|
||||
|
||||
pub fn format(self: Formatter, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
try writer.writeAll("#!srfv1\n");
|
||||
if (self.options.long_format)
|
||||
try writer.writeAll("#!long\n");
|
||||
if (self.options.emit_eof)
|
||||
try writer.writeAll("#!requireeof\n");
|
||||
var first = true;
|
||||
for (self.value) |record| {
|
||||
if (!first and self.options.long_format) try writer.writeByte('\n');
|
||||
first = false;
|
||||
try writer.print("{f}\n", .{Record.fmt(record, self.options)});
|
||||
}
|
||||
if (self.options.emit_eof)
|
||||
try writer.writeAll("#!eof\n");
|
||||
}
|
||||
};
|
||||
pub const RecordFormatter = struct {
|
||||
value: Record,
|
||||
options: FormatOptions,
|
||||
|
||||
pub fn format(self: RecordFormatter, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
for (self.value.fields, 0..) |f, i| {
|
||||
try writer.writeAll(f.key);
|
||||
if (f.value == null) {
|
||||
try writer.writeAll(":null:");
|
||||
} else {
|
||||
try writer.writeByte(':');
|
||||
switch (f.value.?) {
|
||||
.string => |s| {
|
||||
const newlines = std.mem.containsAtLeastScalar(u8, s, 1, '\n');
|
||||
// Output the count if newlines exist
|
||||
const count = if (newlines) s.len else null;
|
||||
if (count) |c| try writer.print("{d}", .{c});
|
||||
try writer.writeByte(':');
|
||||
try writer.writeAll(s);
|
||||
},
|
||||
.number => |n| try writer.print("num:{d}", .{n}),
|
||||
.boolean => |b| try writer.print("bool:{}", .{b}),
|
||||
.bytes => |b| try writer.print("binary:{b64}", .{b}),
|
||||
}
|
||||
}
|
||||
const delimiter: u8 = if (self.options.long_format) '\n' else ',';
|
||||
if (i < self.value.fields.len - 1)
|
||||
try writer.writeByte(delimiter);
|
||||
}
|
||||
}
|
||||
};
|
||||
pub const ParseState = struct {
|
||||
reader: *std.Io.Reader,
|
||||
line: usize,
|
||||
|
|
@ -311,52 +440,53 @@ pub const ParseState = struct {
|
|||
try writer.print("line: {}, col: {}", .{ self.line, self.column });
|
||||
}
|
||||
};
|
||||
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordList {
|
||||
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!Parsed {
|
||||
// create an arena allocator for everytyhing related to parsing
|
||||
const arena: *std.heap.ArenaAllocator = try allocator.create(std.heap.ArenaAllocator);
|
||||
errdefer if (options.diagnostics == null) allocator.destroy(arena);
|
||||
arena.* = .init(allocator);
|
||||
errdefer if (options.diagnostics == null) arena.deinit();
|
||||
const aa = arena.allocator();
|
||||
var long_format = false; // Default to compact format
|
||||
var require_eof = false; // Default to no eof required
|
||||
var eof_found: bool = false;
|
||||
var state = ParseState{ .line = 0, .column = 0, .partial_line_column = 0, .reader = reader };
|
||||
const first_line = nextLine(reader, &state) orelse return ParseError.ParseFailed;
|
||||
|
||||
if (try Directive.parse(allocator, first_line, state, options)) |d| {
|
||||
if (d != .magic) try parseError(allocator, options, "Magic header not found on first line", state);
|
||||
} else try parseError(allocator, options, "Magic header not found on first line", state);
|
||||
if (try Directive.parse(aa, first_line, state, options)) |d| {
|
||||
if (d != .magic) try parseError(aa, options, "Magic header not found on first line", state);
|
||||
} else try parseError(aa, options, "Magic header not found on first line", state);
|
||||
|
||||
// Loop through the header material and configure our main parsing
|
||||
var record_list: std.ArrayList(Record) = .empty;
|
||||
errdefer {
|
||||
for (record_list.items) |i| i.deinit(allocator);
|
||||
record_list.deinit(allocator);
|
||||
}
|
||||
var parsed: Parsed = .{
|
||||
.records = .empty,
|
||||
.arena = arena,
|
||||
};
|
||||
const first_data = blk: {
|
||||
while (nextLine(reader, &state)) |line| {
|
||||
if (try Directive.parse(allocator, line, state, options)) |d| {
|
||||
if (try Directive.parse(aa, line, state, options)) |d| {
|
||||
switch (d) {
|
||||
.magic => try parseError(allocator, options, "Found a duplicate magic header", state),
|
||||
.magic => try parseError(aa, options, "Found a duplicate magic header", state),
|
||||
.long_format => long_format = true,
|
||||
.compact_format => long_format = false, // what if we have both?
|
||||
.require_eof => require_eof = true,
|
||||
.eof => {
|
||||
// there needs to be an eof then
|
||||
if (nextLine(reader, &state)) |_| {
|
||||
try parseError(allocator, options, "Data found after #!eof", state);
|
||||
try parseError(aa, options, "Data found after #!eof", state);
|
||||
return ParseError.ParseFailed; // this is terminal
|
||||
} else return .{ .items = try record_list.toOwnedSlice(allocator) };
|
||||
} else return parsed;
|
||||
},
|
||||
}
|
||||
} else break :blk line;
|
||||
}
|
||||
return .{ .items = try record_list.toOwnedSlice(allocator) };
|
||||
return parsed;
|
||||
};
|
||||
|
||||
// Main parsing. We already have the first line of data, which could
|
||||
// be a record (compact format) or a key/value pair (long format)
|
||||
var line: ?[]const u8 = first_data;
|
||||
var items: std.ArrayList(Item) = .empty;
|
||||
errdefer {
|
||||
for (items.items) |i| i.deinit(allocator);
|
||||
items.deinit(allocator);
|
||||
}
|
||||
var items: std.ArrayList(Field) = .empty;
|
||||
|
||||
// Because in long format we don't have newline delimiter, that should really be a noop
|
||||
// but we need this for compact format
|
||||
|
|
@ -372,19 +502,19 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
|||
line = nextLine(reader, &state);
|
||||
continue;
|
||||
}
|
||||
if (try Directive.parse(allocator, l, state, options)) |d| {
|
||||
if (try Directive.parse(aa, l, state, options)) |d| {
|
||||
switch (d) {
|
||||
.eof => {
|
||||
// there needs to be an eof then
|
||||
if (nextLine(reader, &state)) |_| {
|
||||
try parseError(allocator, options, "Data found after #!eof", state);
|
||||
try parseError(aa, options, "Data found after #!eof", state);
|
||||
return ParseError.ParseFailed; // this is terminal
|
||||
} else {
|
||||
eof_found = true;
|
||||
break;
|
||||
}
|
||||
},
|
||||
else => try parseError(allocator, options, "Directive found after data started", state),
|
||||
else => try parseError(aa, options, "Directive found after data started", state),
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
|
@ -397,8 +527,8 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
|||
if (key.len > 0) std.debug.assert(key[0] != delimiter);
|
||||
state.column += key.len + 1;
|
||||
state.partial_line_column += key.len + 1;
|
||||
const value = try ItemValue.parse(
|
||||
allocator,
|
||||
const value = try Value.parse(
|
||||
aa,
|
||||
it.rest(),
|
||||
&state,
|
||||
delimiter,
|
||||
|
|
@ -407,7 +537,7 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
|||
|
||||
if (!value.error_parsing) {
|
||||
// std.debug.print("alloc on key: {s}, val: {?f}\n", .{ key, value.item_value });
|
||||
try items.append(allocator, .{ .key = try allocator.dupe(u8, key), .value = value.item_value });
|
||||
try items.append(aa, .{ .key = try aa.dupe(u8, key), .value = value.item_value });
|
||||
}
|
||||
|
||||
if (value.reader_advanced and !long_format) {
|
||||
|
|
@ -426,16 +556,16 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
|||
const maybe_line = nextLine(reader, &state);
|
||||
if (maybe_line == null) {
|
||||
// close out record, return
|
||||
try record_list.append(allocator, .{
|
||||
.items = try items.toOwnedSlice(allocator),
|
||||
try parsed.records.append(aa, .{
|
||||
.fields = try items.toOwnedSlice(aa),
|
||||
});
|
||||
break;
|
||||
}
|
||||
line = maybe_line.?;
|
||||
if (line.?.len == 0) {
|
||||
// End of record
|
||||
try record_list.append(allocator, .{
|
||||
.items = try items.toOwnedSlice(allocator),
|
||||
try parsed.records.append(aa, .{
|
||||
.fields = try items.toOwnedSlice(aa),
|
||||
});
|
||||
line = nextLine(reader, &state);
|
||||
}
|
||||
|
|
@ -445,8 +575,8 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
|||
state.partial_line_column = 0;
|
||||
if (line.?.len == 0) {
|
||||
// close out record
|
||||
try record_list.append(allocator, .{
|
||||
.items = try items.toOwnedSlice(allocator),
|
||||
try parsed.records.append(aa, .{
|
||||
.fields = try items.toOwnedSlice(aa),
|
||||
});
|
||||
line = nextLine(reader, &state);
|
||||
state.partial_line_column = 0;
|
||||
|
|
@ -461,13 +591,13 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
|||
}
|
||||
// Parsing complete. Add final record to list. Then, if there are any parse errors, throw
|
||||
if (items.items.len > 0)
|
||||
try record_list.append(allocator, .{
|
||||
.items = try items.toOwnedSlice(allocator),
|
||||
try parsed.records.append(aa, .{
|
||||
.fields = try items.toOwnedSlice(aa),
|
||||
});
|
||||
if (options.diagnostics) |d|
|
||||
if (d.errors.items.len > 0) return ParseError.ParseFailed;
|
||||
if (require_eof and !eof_found) return ParseError.ParseFailed;
|
||||
return .{ .items = try record_list.toOwnedSlice(allocator) };
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/// Takes the next line, trimming leading whitespace and ignoring comments
|
||||
|
|
@ -486,11 +616,16 @@ fn nextLine(reader: *std.Io.Reader, state: *ParseState) ?[]const u8 {
|
|||
}
|
||||
}
|
||||
|
||||
inline fn dupe(allocator: std.mem.Allocator, options: ParseOptions, data: []const u8) ParseError![]const u8 {
|
||||
if (options.alloc_strings)
|
||||
return try allocator.dupe(u8, data);
|
||||
return data;
|
||||
}
|
||||
inline fn parseError(allocator: std.mem.Allocator, options: ParseOptions, message: []const u8, state: ParseState) ParseError!void {
|
||||
log.debug("Parse error. Parse state {f}, message: {s}", .{ state, message });
|
||||
if (options.diagnostics) |d| {
|
||||
try d.addError(allocator, .{
|
||||
.message = try allocator.dupe(u8, message),
|
||||
.message = try dupe(allocator, options, message),
|
||||
.level = .err,
|
||||
.line = state.line,
|
||||
.column = state.column,
|
||||
|
|
@ -513,10 +648,10 @@ test "long format single record, no eof" {
|
|||
const allocator = std.testing.allocator;
|
||||
var reader = std.Io.Reader.fixed(data);
|
||||
const records = try parse(&reader, allocator, .{});
|
||||
defer records.deinit(allocator);
|
||||
try std.testing.expectEqual(@as(usize, 1), records.items.len);
|
||||
try std.testing.expectEqual(@as(usize, 1), records.items[0].items.len);
|
||||
const kvps = records.items[0].items;
|
||||
defer records.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 1), records.records.items.len);
|
||||
try std.testing.expectEqual(@as(usize, 1), records.records.items[0].fields.len);
|
||||
const kvps = records.records.items[0].fields;
|
||||
try std.testing.expectEqualStrings("key", kvps[0].key);
|
||||
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", kvps[0].value.?.string);
|
||||
}
|
||||
|
|
@ -528,15 +663,15 @@ test "long format from README - generic data structures, first record only" {
|
|||
\\# A comment
|
||||
\\# empty lines ignored
|
||||
\\
|
||||
\\this is a number:num: 5
|
||||
\\this is a number:num: 5
|
||||
\\#!eof
|
||||
;
|
||||
|
||||
const allocator = std.testing.allocator;
|
||||
var reader = std.Io.Reader.fixed(data);
|
||||
const records = try parse(&reader, allocator, .{});
|
||||
defer records.deinit(allocator);
|
||||
try std.testing.expectEqual(@as(usize, 1), records.items.len);
|
||||
defer records.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 1), records.records.items.len);
|
||||
}
|
||||
|
||||
test "long format from README - generic data structures" {
|
||||
|
|
@ -548,7 +683,7 @@ test "long format from README - generic data structures" {
|
|||
\\# empty lines ignored
|
||||
\\
|
||||
\\key::string value, with any data except a \n. an optional string length between the colons
|
||||
\\this is a number:num: 5
|
||||
\\this is a number:num: 5
|
||||
\\null value:null:
|
||||
\\array::array's don't exist. Use json or toml or something
|
||||
\\data with newlines must have a length:7:foo
|
||||
|
|
@ -557,7 +692,7 @@ test "long format from README - generic data structures" {
|
|||
\\ # Empty line separates records
|
||||
\\
|
||||
\\key::this is the second record
|
||||
\\this is a number:num:42
|
||||
\\this is a number:num:42
|
||||
\\null value:null:
|
||||
\\array::array's still don't exist
|
||||
\\data with newlines must have a length::single line
|
||||
|
|
@ -567,35 +702,35 @@ test "long format from README - generic data structures" {
|
|||
const allocator = std.testing.allocator;
|
||||
var reader = std.Io.Reader.fixed(data);
|
||||
const records = try parse(&reader, allocator, .{});
|
||||
defer records.deinit(allocator);
|
||||
try std.testing.expectEqual(@as(usize, 2), records.items.len);
|
||||
const first = records.items[0];
|
||||
try std.testing.expectEqual(@as(usize, 6), first.items.len);
|
||||
try std.testing.expectEqualStrings("key", first.items[0].key);
|
||||
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", first.items[0].value.?.string);
|
||||
try std.testing.expectEqualStrings("this is a number", first.items[1].key);
|
||||
try std.testing.expectEqual(@as(f128, 5), first.items[1].value.?.number);
|
||||
try std.testing.expectEqualStrings("null value", first.items[2].key);
|
||||
try std.testing.expect(first.items[2].value == null);
|
||||
try std.testing.expectEqualStrings("array", first.items[3].key);
|
||||
try std.testing.expectEqualStrings("array's don't exist. Use json or toml or something", first.items[3].value.?.string);
|
||||
try std.testing.expectEqualStrings("data with newlines must have a length", first.items[4].key);
|
||||
try std.testing.expectEqualStrings("foo\nbar", first.items[4].value.?.string);
|
||||
try std.testing.expectEqualStrings("boolean value", first.items[5].key);
|
||||
try std.testing.expect(!first.items[5].value.?.boolean);
|
||||
defer records.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 2), records.records.items.len);
|
||||
const first = records.records.items[0];
|
||||
try std.testing.expectEqual(@as(usize, 6), first.fields.len);
|
||||
try std.testing.expectEqualStrings("key", first.fields[0].key);
|
||||
try std.testing.expectEqualStrings("string value, with any data except a \\n. an optional string length between the colons", first.fields[0].value.?.string);
|
||||
try std.testing.expectEqualStrings("this is a number", first.fields[1].key);
|
||||
try std.testing.expectEqual(@as(f64, 5), first.fields[1].value.?.number);
|
||||
try std.testing.expectEqualStrings("null value", first.fields[2].key);
|
||||
try std.testing.expect(first.fields[2].value == null);
|
||||
try std.testing.expectEqualStrings("array", first.fields[3].key);
|
||||
try std.testing.expectEqualStrings("array's don't exist. Use json or toml or something", first.fields[3].value.?.string);
|
||||
try std.testing.expectEqualStrings("data with newlines must have a length", first.fields[4].key);
|
||||
try std.testing.expectEqualStrings("foo\nbar", first.fields[4].value.?.string);
|
||||
try std.testing.expectEqualStrings("boolean value", first.fields[5].key);
|
||||
try std.testing.expect(!first.fields[5].value.?.boolean);
|
||||
|
||||
const second = records.items[1];
|
||||
try std.testing.expectEqual(@as(usize, 5), second.items.len);
|
||||
try std.testing.expectEqualStrings("key", second.items[0].key);
|
||||
try std.testing.expectEqualStrings("this is the second record", second.items[0].value.?.string);
|
||||
try std.testing.expectEqualStrings("this is a number", second.items[1].key);
|
||||
try std.testing.expectEqual(@as(f128, 42), second.items[1].value.?.number);
|
||||
try std.testing.expectEqualStrings("null value", second.items[2].key);
|
||||
try std.testing.expect(second.items[2].value == null);
|
||||
try std.testing.expectEqualStrings("array", second.items[3].key);
|
||||
try std.testing.expectEqualStrings("array's still don't exist", second.items[3].value.?.string);
|
||||
try std.testing.expectEqualStrings("data with newlines must have a length", second.items[4].key);
|
||||
try std.testing.expectEqualStrings("single line", second.items[4].value.?.string);
|
||||
const second = records.records.items[1];
|
||||
try std.testing.expectEqual(@as(usize, 5), second.fields.len);
|
||||
try std.testing.expectEqualStrings("key", second.fields[0].key);
|
||||
try std.testing.expectEqualStrings("this is the second record", second.fields[0].value.?.string);
|
||||
try std.testing.expectEqualStrings("this is a number", second.fields[1].key);
|
||||
try std.testing.expectEqual(@as(f64, 42), second.fields[1].value.?.number);
|
||||
try std.testing.expectEqualStrings("null value", second.fields[2].key);
|
||||
try std.testing.expect(second.fields[2].value == null);
|
||||
try std.testing.expectEqualStrings("array", second.fields[3].key);
|
||||
try std.testing.expectEqualStrings("array's still don't exist", second.fields[3].value.?.string);
|
||||
try std.testing.expectEqualStrings("data with newlines must have a length", second.fields[4].key);
|
||||
try std.testing.expectEqualStrings("single line", second.fields[4].value.?.string);
|
||||
}
|
||||
|
||||
test "compact format from README - generic data structures" {
|
||||
|
|
@ -610,25 +745,127 @@ test "compact format from README - generic data structures" {
|
|||
var reader = std.Io.Reader.fixed(data);
|
||||
// We want "parse" and "parseLeaky" probably. Second parameter is a diagnostics
|
||||
const records = try parse(&reader, allocator, .{});
|
||||
defer records.deinit(allocator);
|
||||
try std.testing.expectEqual(@as(usize, 2), records.items.len);
|
||||
const first = records.items[0];
|
||||
try std.testing.expectEqual(@as(usize, 6), first.items.len);
|
||||
try std.testing.expectEqualStrings("key", first.items[0].key);
|
||||
try std.testing.expectEqualStrings("string value must have a length between colons or end with a comma", first.items[0].value.?.string);
|
||||
try std.testing.expectEqualStrings("this is a number", first.items[1].key);
|
||||
try std.testing.expectEqual(@as(f128, 5), first.items[1].value.?.number);
|
||||
try std.testing.expectEqualStrings("null value", first.items[2].key);
|
||||
try std.testing.expect(first.items[2].value == null);
|
||||
try std.testing.expectEqualStrings("array", first.items[3].key);
|
||||
try std.testing.expectEqualStrings("array's don't exist. Use json or toml or something", first.items[3].value.?.string);
|
||||
try std.testing.expectEqualStrings("data with newlines must have a length", first.items[4].key);
|
||||
try std.testing.expectEqualStrings("foo\nbar", first.items[4].value.?.string);
|
||||
try std.testing.expectEqualStrings("boolean value", first.items[5].key);
|
||||
try std.testing.expect(!first.items[5].value.?.boolean);
|
||||
defer records.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 2), records.records.items.len);
|
||||
const first = records.records.items[0];
|
||||
try std.testing.expectEqual(@as(usize, 6), first.fields.len);
|
||||
try std.testing.expectEqualStrings("key", first.fields[0].key);
|
||||
try std.testing.expectEqualStrings("string value must have a length between colons or end with a comma", first.fields[0].value.?.string);
|
||||
try std.testing.expectEqualStrings("this is a number", first.fields[1].key);
|
||||
try std.testing.expectEqual(@as(f64, 5), first.fields[1].value.?.number);
|
||||
try std.testing.expectEqualStrings("null value", first.fields[2].key);
|
||||
try std.testing.expect(first.fields[2].value == null);
|
||||
try std.testing.expectEqualStrings("array", first.fields[3].key);
|
||||
try std.testing.expectEqualStrings("array's don't exist. Use json or toml or something", first.fields[3].value.?.string);
|
||||
try std.testing.expectEqualStrings("data with newlines must have a length", first.fields[4].key);
|
||||
try std.testing.expectEqualStrings("foo\nbar", first.fields[4].value.?.string);
|
||||
try std.testing.expectEqualStrings("boolean value", first.fields[5].key);
|
||||
try std.testing.expect(!first.fields[5].value.?.boolean);
|
||||
|
||||
const second = records.items[1];
|
||||
try std.testing.expectEqual(@as(usize, 1), second.items.len);
|
||||
try std.testing.expectEqualStrings("key", second.items[0].key);
|
||||
try std.testing.expectEqualStrings("this is the second record", second.items[0].value.?.string);
|
||||
const second = records.records.items[1];
|
||||
try std.testing.expectEqual(@as(usize, 1), second.fields.len);
|
||||
try std.testing.expectEqualStrings("key", second.fields[0].key);
|
||||
try std.testing.expectEqualStrings("this is the second record", second.fields[0].value.?.string);
|
||||
}
|
||||
test "format all the things" {
|
||||
const records: []const Record = &.{
|
||||
.{ .fields = &.{
|
||||
.{ .key = "foo", .value = .{ .string = "bar" } },
|
||||
.{ .key = "foo", .value = null },
|
||||
.{ .key = "foo", .value = .{ .bytes = "bar" } },
|
||||
.{ .key = "foo", .value = .{ .number = 42 } },
|
||||
} },
|
||||
.{ .fields = &.{
|
||||
.{ .key = "foo", .value = .{ .string = "bar" } },
|
||||
.{ .key = "foo", .value = null },
|
||||
.{ .key = "foo", .value = .{ .bytes = "bar" } },
|
||||
.{ .key = "foo", .value = .{ .number = 42 } },
|
||||
} },
|
||||
};
|
||||
var buf: [1024]u8 = undefined;
|
||||
const formatted_eof = try std.fmt.bufPrint(
|
||||
&buf,
|
||||
"{f}",
|
||||
.{fmt(records, .{ .long_format = true, .emit_eof = true })},
|
||||
);
|
||||
try std.testing.expectEqualStrings(
|
||||
\\#!srfv1
|
||||
\\#!long
|
||||
\\#!requireeof
|
||||
\\foo::bar
|
||||
\\foo:null:
|
||||
\\foo:binary:YmFy
|
||||
\\foo:num:42
|
||||
\\
|
||||
\\foo::bar
|
||||
\\foo:null:
|
||||
\\foo:binary:YmFy
|
||||
\\foo:num:42
|
||||
\\#!eof
|
||||
\\
|
||||
, formatted_eof);
|
||||
|
||||
const formatted = try std.fmt.bufPrint(
|
||||
&buf,
|
||||
"{f}",
|
||||
.{fmt(records, .{ .long_format = true })},
|
||||
);
|
||||
try std.testing.expectEqualStrings(
|
||||
\\#!srfv1
|
||||
\\#!long
|
||||
\\foo::bar
|
||||
\\foo:null:
|
||||
\\foo:binary:YmFy
|
||||
\\foo:num:42
|
||||
\\
|
||||
\\foo::bar
|
||||
\\foo:null:
|
||||
\\foo:binary:YmFy
|
||||
\\foo:num:42
|
||||
\\
|
||||
, formatted);
|
||||
|
||||
// Round trip and make sure we get equivalent objects back
|
||||
var formatted_reader = std.Io.Reader.fixed(formatted);
|
||||
const parsed = try parse(&formatted_reader, std.testing.allocator, .{});
|
||||
defer parsed.deinit();
|
||||
try std.testing.expectEqualDeep(records, parsed.records.items);
|
||||
|
||||
const compact = try std.fmt.bufPrint(
|
||||
&buf,
|
||||
"{f}",
|
||||
.{fmt(records, .{})},
|
||||
);
|
||||
try std.testing.expectEqualStrings(
|
||||
\\#!srfv1
|
||||
\\foo::bar,foo:null:,foo:binary:YmFy,foo:num:42
|
||||
\\foo::bar,foo:null:,foo:binary:YmFy,foo:num:42
|
||||
\\
|
||||
, compact);
|
||||
// Round trip and make sure we get equivalent objects back
|
||||
var compact_reader = std.Io.Reader.fixed(compact);
|
||||
const parsed_compact = try parse(&compact_reader, std.testing.allocator, .{});
|
||||
defer parsed_compact.deinit();
|
||||
try std.testing.expectEqualDeep(records, parsed_compact.records.items);
|
||||
}
|
||||
test "compact format length-prefixed string as last field" {
|
||||
// When a length-prefixed value is the last field on the line,
|
||||
// rest_of_data.len == size exactly. The check on line 216 uses
|
||||
// strict > instead of >=, falling through to the multi-line path
|
||||
// where size - rest_of_data.len - 1 underflows.
|
||||
const data =
|
||||
\\#!srfv1
|
||||
\\name::alice,desc:5:world
|
||||
;
|
||||
const allocator = std.testing.allocator;
|
||||
var reader = std.Io.Reader.fixed(data);
|
||||
const records = try parse(&reader, allocator, .{});
|
||||
defer records.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 1), records.records.items.len);
|
||||
const rec = records.records.items[0];
|
||||
try std.testing.expectEqual(@as(usize, 2), rec.fields.len);
|
||||
try std.testing.expectEqualStrings("name", rec.fields[0].key);
|
||||
try std.testing.expectEqualStrings("alice", rec.fields[0].value.?.string);
|
||||
try std.testing.expectEqualStrings("desc", rec.fields[1].key);
|
||||
try std.testing.expectEqualStrings("world", rec.fields[1].value.?.string);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue