update readme and docs
This commit is contained in:
parent
fcce8e48d4
commit
1f922c0b33
3 changed files with 397 additions and 85 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
||||||
.zig-cache/
|
.zig-cache/
|
||||||
zig-out/
|
zig-out/
|
||||||
|
docs/
|
||||||
|
|
|
||||||
111
README.md
111
README.md
|
|
@ -4,10 +4,12 @@ SRF is a minimal data format designed for L2 caches and simple structured storag
|
||||||
|
|
||||||
**Features:**
|
**Features:**
|
||||||
- No escaping required - use length-prefixed strings for complex data
|
- No escaping required - use length-prefixed strings for complex data
|
||||||
- Single-pass parsing with minimal memory allocation
|
- Single-pass streaming parser with minimal memory allocation
|
||||||
- Basic type system (string, num, bool, null, binary) with explicit type hints
|
- Basic type system (string, num, bool, null, binary) with explicit type hints
|
||||||
- Compact format for machine generation, long format for human editing
|
- Compact format for machine generation, long format for human editing
|
||||||
- Built-in corruption detection with optional EOF markers
|
- Built-in corruption detection with optional EOF markers
|
||||||
|
- Iterator-based API for zero-copy, low-allocation streaming
|
||||||
|
- Comptime type coercion directly from the iterator (no intermediate collections)
|
||||||
|
|
||||||
**When to use SRF:**
|
**When to use SRF:**
|
||||||
- L2 caches that need occasional human inspection
|
- L2 caches that need occasional human inspection
|
||||||
|
|
@ -20,7 +22,70 @@ SRF is a minimal data format designed for L2 caches and simple structured storag
|
||||||
- Schema validation requirements
|
- Schema validation requirements
|
||||||
- Arrays or object hierarchies (arrays can be managed in the data itself, however)
|
- Arrays or object hierarchies (arrays can be managed in the data itself, however)
|
||||||
|
|
||||||
Long format:
|
## Parsing API
|
||||||
|
|
||||||
|
SRF provides two parsing APIs. The **iterator API is preferred** for most use cases
|
||||||
|
as it avoids collecting all records and fields into memory at once.
|
||||||
|
|
||||||
|
### Iterator (preferred)
|
||||||
|
|
||||||
|
The `iterator` function returns a `RecordIterator` that streams records lazily.
|
||||||
|
Each call to `RecordIterator.next` yields a `FieldIterator` for the next record,
|
||||||
|
and each call to `FieldIterator.next` yields individual `Field` values. No
|
||||||
|
intermediate slices or ArrayLists are allocated -- fields are yielded one at a
|
||||||
|
time directly from the parser state.
|
||||||
|
|
||||||
|
For type coercion, `FieldIterator.to(T)` consumes the remaining fields in the
|
||||||
|
current record and maps them into a Zig struct or tagged union at comptime,
|
||||||
|
with zero additional allocations beyond what field parsing itself requires. This
|
||||||
|
can further be minimized with the parsing option `.alloc_strings = false`.
|
||||||
|
|
||||||
|
```zig
|
||||||
|
const srf = @import("srf");
|
||||||
|
|
||||||
|
const Data = struct {
|
||||||
|
name: []const u8,
|
||||||
|
age: u8,
|
||||||
|
active: bool = false,
|
||||||
|
};
|
||||||
|
|
||||||
|
var reader = std.Io.Reader.fixed(raw_data);
|
||||||
|
var ri = try srf.iterator(&reader, allocator, .{});
|
||||||
|
defer ri.deinit();
|
||||||
|
|
||||||
|
while (try ri.next()) |fi| {
|
||||||
|
const record = try fi.to(Data);
|
||||||
|
// process record...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Batch parse
|
||||||
|
|
||||||
|
The `parse` function collects all records into memory at once, returning a
|
||||||
|
`Parsed` struct with a `records: []Record` slice. This is built on top of
|
||||||
|
the iterator internally. It is convenient when you need random access to all
|
||||||
|
records, but costs more memory since every field is collected into ArrayLists
|
||||||
|
before being converted to owned slices.
|
||||||
|
|
||||||
|
```zig
|
||||||
|
const srf = @import("srf");
|
||||||
|
|
||||||
|
var reader = std.Io.Reader.fixed(raw_data);
|
||||||
|
const parsed = try srf.parse(&reader, allocator, .{});
|
||||||
|
defer parsed.deinit();
|
||||||
|
|
||||||
|
for (parsed.records) |record| {
|
||||||
|
const data = try record.to(Data);
|
||||||
|
// process data...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Formats
|
||||||
|
|
||||||
|
### Long format
|
||||||
|
|
||||||
|
Long format uses newlines to delimit fields and blank lines to separate records.
|
||||||
|
It is human-friendly and suitable for hand-edited configuration files.
|
||||||
|
|
||||||
```
|
```
|
||||||
#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
||||||
|
|
@ -46,7 +111,11 @@ data with newlines must have a length::single line
|
||||||
#!eof # eof marker, useful to make sure your file wasn't cut in half. Only considered if requireeof set at top
|
#!eof # eof marker, useful to make sure your file wasn't cut in half. Only considered if requireeof set at top
|
||||||
```
|
```
|
||||||
|
|
||||||
compact format:
|
### Compact format
|
||||||
|
|
||||||
|
Compact format uses commas to delimit fields and newlines to separate records.
|
||||||
|
It is designed for machine generation where space efficiency matters.
|
||||||
|
|
||||||
```
|
```
|
||||||
#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
#!srfv1 # mandatory comment with format and version. Parser instructions start with #!
|
||||||
key::string value must have a length between colons or end with a comma,this is a number:num:5 ,null value:null:,array::array's don't exist. Use json or toml or something,data with newlines must have a length:7:foo
|
key::string value must have a length between colons or end with a comma,this is a number:num:5 ,null value:null:,array::array's don't exist. Use json or toml or something,data with newlines must have a length:7:foo
|
||||||
|
|
@ -54,6 +123,38 @@ bar,boolean value:bool:false
|
||||||
key::this is the second record
|
key::this is the second record
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Serialization
|
||||||
|
|
||||||
|
SRF supports serializing Zig structs, unions, and enums back to SRF format.
|
||||||
|
Use `Record.from` to create a record from a typed value, or `fmtFrom` to
|
||||||
|
format a slice of values directly to a writer.
|
||||||
|
|
||||||
|
```zig
|
||||||
|
const srf = @import("srf");
|
||||||
|
|
||||||
|
const all_data: []const Data = &.{
|
||||||
|
.{ .name = "alice", .age = 30, .active = true },
|
||||||
|
.{ .name = "bob", .age = 25 },
|
||||||
|
};
|
||||||
|
var buf: [4096]u8 = undefined;
|
||||||
|
const formatted = try std.fmt.bufPrint(&buf, "{f}", .{
|
||||||
|
srf.fmtFrom(Data, allocator, all_data, .{ .long_format = true }),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Type System
|
||||||
|
|
||||||
|
Fields follow the format `key:type_hint:value`:
|
||||||
|
|
||||||
|
| Type | Hint | Example |
|
||||||
|
|------------------------|-----------------------|-------------------------|
|
||||||
|
| String | *(empty)* or `string` | `name::alice` |
|
||||||
|
| Number (internally f64)| `num` | `age:num:30` |
|
||||||
|
| Boolean | `bool` | `active:bool:true` |
|
||||||
|
| Null | `null` | `missing:null:` |
|
||||||
|
| Binary | `binary` | `data:binary:base64...` |
|
||||||
|
| Length-prefixed string | *(byte count)* | `bio:12:hello\nworld!` |
|
||||||
|
|
||||||
## Implementation Concerns
|
## Implementation Concerns
|
||||||
|
|
||||||
**Parser robustness:**
|
**Parser robustness:**
|
||||||
|
|
@ -87,5 +188,5 @@ key::this is the second record
|
||||||
|
|
||||||
## AI Use
|
## AI Use
|
||||||
|
|
||||||
AI was used in this project for comments, parts of the README, and unit test
|
AI was used in this project for comments, parts of the README, benchmarking code,
|
||||||
generation. All other code is human generated.
|
build.zig and unit test generation. All other code is human generated.
|
||||||
|
|
|
||||||
370
src/srf.zig
370
src/srf.zig
|
|
@ -67,25 +67,43 @@ const ValueWithMetaData = struct {
|
||||||
error_parsing: bool = false,
|
error_parsing: bool = false,
|
||||||
reader_advanced: bool = false,
|
reader_advanced: bool = false,
|
||||||
};
|
};
|
||||||
|
/// A parsed SRF value. Each field in a record has a key and an optional `Value`.
|
||||||
pub const Value = union(enum) {
|
pub const Value = union(enum) {
|
||||||
|
/// A numeric value, parsed from the `num` type hint.
|
||||||
number: f64,
|
number: f64,
|
||||||
|
|
||||||
/// Bytes are converted to/from base64, string is not
|
/// Raw bytes decoded from base64, parsed from the `binary` type hint.
|
||||||
bytes: []const u8,
|
bytes: []const u8,
|
||||||
|
|
||||||
/// String is not touched in any way
|
/// A string value, either delimiter-terminated or length-prefixed.
|
||||||
|
/// Not transformed during parsing (no escaping/unescaping), but will be
|
||||||
|
/// allocated if .alloc_strings = true is passed during parsing, or if
|
||||||
|
/// a multi-line string is found in the data
|
||||||
string: []const u8,
|
string: []const u8,
|
||||||
|
|
||||||
|
/// A boolean value, parsed from the `bool` type hint (`true` or `false`).
|
||||||
boolean: bool,
|
boolean: bool,
|
||||||
|
|
||||||
// pub fn format(self: Value, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
/// parses a single srf value, without the key. The the whole field is:
|
||||||
// switch (self) {
|
///
|
||||||
// .number => try writer.print("num: {d}", .{self.number}),
|
/// SRF Field: 'foo:3:bar'
|
||||||
// .bytes => try writer.print("bytes: {x}", .{self.bytes}),
|
///
|
||||||
// .string => try writer.print("string: {s}", .{self.string}),
|
/// The value we expect to be sent to this function is:
|
||||||
// .boolean => try writer.print("boolean: {}", .{self.boolean}),
|
///
|
||||||
// }
|
/// SRF Value: '3:bar'
|
||||||
// }
|
///
|
||||||
|
/// The value is allowed to have extra data...for instance, in compact format
|
||||||
|
/// the value above can be represented by:
|
||||||
|
///
|
||||||
|
/// SRF Value: '3:bar,next_field::foobar'
|
||||||
|
///
|
||||||
|
/// and the next field will be ignored
|
||||||
|
///
|
||||||
|
/// This function may need to advance the reader in the case of multi-line
|
||||||
|
/// strings. It may also allocate data in the case of base64 (binary) values
|
||||||
|
/// as well as multi-line strings. Metadata is returned to assist in tracking
|
||||||
|
///
|
||||||
|
/// This function is intended to be used by the SRF parser
|
||||||
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData {
|
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData {
|
||||||
const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':');
|
const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':');
|
||||||
if (type_val_sep_raw == null) {
|
if (type_val_sep_raw == null) {
|
||||||
|
|
@ -244,22 +262,15 @@ pub const Value = union(enum) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// A field has a key and a value, but the value may be null
|
/// A single key-value pair within a record. The key is always a string.
|
||||||
|
/// The value may be `null` (from the `null` type hint) or one of the
|
||||||
|
/// `Value` variants. Yielded by `RecordIterator.FieldIterator.next`.
|
||||||
pub const Field = struct {
|
pub const Field = struct {
|
||||||
key: []const u8,
|
key: []const u8,
|
||||||
value: ?Value,
|
value: ?Value,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn coerce(name: []const u8, comptime T: type, val: ?Value) !T {
|
fn coerce(name: []const u8, comptime T: type, val: ?Value) !T {
|
||||||
// Here's the deduplicated set of field types that coerce needs to handle:
|
|
||||||
// Direct from SRF values:
|
|
||||||
// Need parsing from string:
|
|
||||||
// - Date, ?Date -- Date.parse(string)
|
|
||||||
//
|
|
||||||
// Won't work with Record.to(T) generically:
|
|
||||||
// - []const OptionContract -- nested sub-records (OptionsChain has calls/puts arrays)
|
|
||||||
// - ?[]const Holding, ?[]const SectorWeight -- nested sub-records in EtfProfile
|
|
||||||
//
|
|
||||||
const ti = @typeInfo(T);
|
const ti = @typeInfo(T);
|
||||||
if (val == null and ti != .optional)
|
if (val == null and ti != .optional)
|
||||||
return error.NullValueCannotBeAssignedToNonNullField;
|
return error.NullValueCannotBeAssignedToNonNullField;
|
||||||
|
|
@ -307,23 +318,31 @@ fn coerce(name: []const u8, comptime T: type, val: ?Value) !T {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// A record has a list of fields, with no assumptions regarding duplication,
|
/// A record is an ordered list of `Field` values, with no uniqueness constraints
|
||||||
// etc. This is for parsing speed, but also for more flexibility in terms of
|
/// on keys. This allows flexible use cases such as encoding arrays by repeating
|
||||||
// use cases. One can make a defacto array out of this structure by having
|
/// the same key. In long form, this could look like:
|
||||||
// something like:
|
///
|
||||||
//
|
/// ```txt
|
||||||
// arr:string:foo
|
/// arr:string:foo
|
||||||
// arr:string:bar
|
/// arr:string:bar
|
||||||
//
|
/// ```
|
||||||
// and when you coerce to zig struct have an array .arr that gets populated
|
///
|
||||||
// with strings "foo" and "bar".
|
/// Records are returned by the batch `parse` function. For streaming, prefer
|
||||||
|
/// `iterator` which yields fields one at a time via `RecordIterator.FieldIterator`
|
||||||
|
/// without collecting them into a slice.
|
||||||
pub const Record = struct {
|
pub const Record = struct {
|
||||||
fields: []const Field,
|
fields: []const Field,
|
||||||
|
|
||||||
|
/// Returns a `RecordFormatter` suitable for use with `std.fmt.bufPrint`
|
||||||
|
/// or any `std.Io.Writer`. Use `FormatOptions` to control compact vs
|
||||||
|
/// long output format.
|
||||||
pub fn fmt(value: Record, options: FormatOptions) RecordFormatter {
|
pub fn fmt(value: Record, options: FormatOptions) RecordFormatter {
|
||||||
return .{ .value = value, .options = options };
|
return .{ .value = value, .options = options };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Looks up the first `Field` whose key matches `field_name`, or returns
|
||||||
|
/// `null` if no such field exists. Only the first occurrence is returned;
|
||||||
|
/// duplicate keys are not considered.
|
||||||
pub fn firstFieldByName(self: Record, field_name: []const u8) ?Field {
|
pub fn firstFieldByName(self: Record, field_name: []const u8) ?Field {
|
||||||
for (self.fields) |f|
|
for (self.fields) |f|
|
||||||
if (std.mem.eql(u8, f.key, field_name)) return f;
|
if (std.mem.eql(u8, f.key, field_name)) return f;
|
||||||
|
|
@ -501,7 +520,20 @@ pub const Record = struct {
|
||||||
return OwnedRecord(T).init(allocator, val);
|
return OwnedRecord(T).init(allocator, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Coerce Record to a type. Does not handle fields with arrays
|
/// Coerce a `Record` to a Zig struct or tagged union. For each field in `T`,
|
||||||
|
/// the first matching `Field` by name is coerced to the target type. Fields
|
||||||
|
/// with default values in `T` that are not present in the data use their
|
||||||
|
/// defaults. Missing fields without defaults return an error. Note that
|
||||||
|
/// by this logic, multiple fields with the same name will have all but the
|
||||||
|
/// first value silently ignored.
|
||||||
|
///
|
||||||
|
/// For tagged unions, the active variant is determined by a field named
|
||||||
|
/// `"active_tag"` (or the value of `T.srf_tag_field` if declared). The
|
||||||
|
/// remaining fields are coerced into the payload struct of that variant.
|
||||||
|
///
|
||||||
|
/// For streaming data without collecting fields first, prefer
|
||||||
|
/// `RecordIterator.FieldIterator.to` which avoids the intermediate
|
||||||
|
/// `[]Field` allocation entirely.
|
||||||
pub fn to(self: Record, comptime T: type) !T {
|
pub fn to(self: Record, comptime T: type) !T {
|
||||||
const ti = @typeInfo(T);
|
const ti = @typeInfo(T);
|
||||||
|
|
||||||
|
|
@ -547,26 +579,39 @@ pub const Record = struct {
|
||||||
}
|
}
|
||||||
return error.CoercionNotPossible;
|
return error.CoercionNotPossible;
|
||||||
}
|
}
|
||||||
|
test to {
|
||||||
|
// Example: coerce a batch-parsed Record into a Zig struct.
|
||||||
|
const Data = struct {
|
||||||
|
city: []const u8,
|
||||||
|
pop: u8,
|
||||||
|
};
|
||||||
|
const data =
|
||||||
|
\\#!srfv1
|
||||||
|
\\city::springfield,pop:num:30
|
||||||
|
;
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
const parsed = try parse(&reader, allocator, .{});
|
||||||
|
defer parsed.deinit();
|
||||||
|
|
||||||
|
const result = try parsed.records[0].to(Data);
|
||||||
|
try std.testing.expectEqualStrings("springfield", result.city);
|
||||||
|
try std.testing.expectEqual(@as(u8, 30), result.pop);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The Parsed struct is equivalent to Parsed(T) in std.json. Since most are
|
/// A streaming record iterator for parsing SRF data. This is the preferred
|
||||||
/// familiar with std.json, it differs in the following ways:
|
/// parsing API because it avoids collecting all records and fields into memory
|
||||||
|
/// at once. Created by calling `iterator`.
|
||||||
///
|
///
|
||||||
/// * There is a records field instead of a value field. In json, one type of
|
/// Each call to `next` yields a `FieldIterator` for one record. Fields within
|
||||||
/// value is an array. SRF does not have an array data type, but the set of
|
/// that record are consumed lazily via `FieldIterator.next` or coerced directly
|
||||||
/// records is an array. json as a format is structred as a single object at
|
/// into a Zig type via `FieldIterator.to`. All allocations go through an
|
||||||
/// the outermost
|
/// internal arena; call `deinit` to release everything when done.
|
||||||
///
|
///
|
||||||
/// * This is not generic. In SRF, it is a separate function to bind the list
|
/// If `RecordIterator.next` is called before the previous `FieldIterator` has
|
||||||
/// of records to a specific data type. This will add some (hopefully minimal)
|
/// been fully consumed, the remaining fields are automatically drained to keep
|
||||||
/// overhead, but also avoid conflating parsing from the coercion from general
|
/// the parser state consistent.
|
||||||
/// type to specifics, and avoids answering questions like "what if I have
|
|
||||||
/// 15 values for the same key" until you're actually dealing with that problem
|
|
||||||
/// (see std.json.ParseOptions duplicate_field_behavior and ignore_unknown_fields)
|
|
||||||
///
|
|
||||||
/// When implemented, there will include a pub fn bind(self: Parsed, comptime T: type, options, BindOptions) BindError![]T
|
|
||||||
/// function. The options will include things related to duplicate handling and
|
|
||||||
/// missing fields
|
|
||||||
pub const RecordIterator = struct {
|
pub const RecordIterator = struct {
|
||||||
arena: *std.heap.ArenaAllocator,
|
arena: *std.heap.ArenaAllocator,
|
||||||
/// optional expiry time for the data. Useful for caching
|
/// optional expiry time for the data. Useful for caching
|
||||||
|
|
@ -611,9 +656,19 @@ pub const RecordIterator = struct {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Advances to the next record in the stream, returning a `FieldIterator`
|
||||||
|
/// for accessing its fields. Returns `null` when all records have been
|
||||||
|
/// consumed.
|
||||||
|
///
|
||||||
|
/// If the previous `FieldIterator` was not fully drained, its remaining
|
||||||
|
/// fields are consumed automatically to keep the reader positioned
|
||||||
|
/// correctly. It is safe (but unnecessary) to fully consume the
|
||||||
|
/// `FieldIterator` before calling `next` again.
|
||||||
|
///
|
||||||
|
/// Note that all state is stored in a shared area accessible to both
|
||||||
|
/// the `RecordIterator` and the `FieldIterator`, so there is no need to
|
||||||
|
/// store the return value as a variable
|
||||||
pub fn next(self: RecordIterator) !?FieldIterator {
|
pub fn next(self: RecordIterator) !?FieldIterator {
|
||||||
// TODO: we need to capture the fieldIterator here and make sure it's run
|
|
||||||
// to the ground to keep our state intact
|
|
||||||
const state = self.state;
|
const state = self.state;
|
||||||
if (state.field_iterator) |f| {
|
if (state.field_iterator) |f| {
|
||||||
// We need to finish the fields on the previous record
|
// We need to finish the fields on the previous record
|
||||||
|
|
@ -666,18 +721,24 @@ pub const RecordIterator = struct {
|
||||||
return state.field_iterator.?;
|
return state.field_iterator.?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Iterates over the fields within a single record. Yielded by
|
||||||
|
/// `RecordIterator.next`. Each call to `next` returns the next `Field`
|
||||||
|
/// in the record, or `null` when the record boundary is reached.
|
||||||
|
///
|
||||||
|
/// For direct type coercion without manually iterating fields, use `to`.
|
||||||
pub const FieldIterator = struct {
|
pub const FieldIterator = struct {
|
||||||
state: *State,
|
state: *State,
|
||||||
arena: *std.heap.ArenaAllocator,
|
arena: *std.heap.ArenaAllocator,
|
||||||
|
|
||||||
|
/// Returns the next `Field` in the current record, or `null` when
|
||||||
|
/// the record boundary has been reached. After `null` is returned,
|
||||||
|
/// subsequent calls continue to return `null`.
|
||||||
pub fn next(self: FieldIterator) !?Field {
|
pub fn next(self: FieldIterator) !?Field {
|
||||||
const state = self.state;
|
const state = self.state;
|
||||||
const aa = self.arena.allocator();
|
const aa = self.arena.allocator();
|
||||||
// Main parsing. We already have the first line of data, which could
|
// Main parsing. We already have the first line of data, which could
|
||||||
// be a record (compact format) or a key/value pair (long format)
|
// be a record (compact format) or a key/value pair (long format)
|
||||||
|
|
||||||
// log.debug("", .{});
|
|
||||||
log.debug("current line:{?s}", .{state.current_line});
|
|
||||||
if (state.current_line == null) return null;
|
if (state.current_line == null) return null;
|
||||||
if (state.end_of_record_reached) return null;
|
if (state.end_of_record_reached) return null;
|
||||||
// non-blank line, but we could have an eof marker
|
// non-blank line, but we could have an eof marker
|
||||||
|
|
@ -771,7 +832,21 @@ pub const RecordIterator = struct {
|
||||||
return field;
|
return field;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Coerce Record to a type. Does not handle fields with arrays
|
/// Consumes remaining fields in this record and coerces them into a
|
||||||
|
/// Zig struct or tagged union `T`. This is the streaming equivalent of
|
||||||
|
/// `Record.to` -- it performs the same field-name matching and default
|
||||||
|
/// value logic, but reads directly from the parser without building an
|
||||||
|
/// intermediate `[]Field` slice.
|
||||||
|
///
|
||||||
|
/// For structs, fields are matched by name. Only the first occurrence
|
||||||
|
/// of each field name is used; duplicates are ignored. Fields in `T`
|
||||||
|
/// that have default values and are not present in the data use those
|
||||||
|
/// defaults. Missing fields without defaults return an error.
|
||||||
|
///
|
||||||
|
/// For tagged unions, the active tag field must appear first in the
|
||||||
|
/// stream (unlike `Record.to` which can do random access). The tag
|
||||||
|
/// field name defaults to `"active_tag"` or `T.srf_tag_field` if
|
||||||
|
/// declared.
|
||||||
pub fn to(self: FieldIterator, comptime T: type) !T {
|
pub fn to(self: FieldIterator, comptime T: type) !T {
|
||||||
const ti = @typeInfo(T);
|
const ti = @typeInfo(T);
|
||||||
|
|
||||||
|
|
@ -847,13 +922,47 @@ pub const RecordIterator = struct {
|
||||||
}
|
}
|
||||||
return error.CoercionNotPossible;
|
return error.CoercionNotPossible;
|
||||||
}
|
}
|
||||||
|
test to {
|
||||||
|
// Example: coerce fields directly into a Zig struct from the iterator,
|
||||||
|
// without collecting into an intermediate Record. This is the most
|
||||||
|
// allocation-efficient path for typed deserialization.
|
||||||
|
const Data = struct {
|
||||||
|
name: []const u8,
|
||||||
|
score: u8,
|
||||||
|
active: bool = true,
|
||||||
};
|
};
|
||||||
|
const data =
|
||||||
|
\\#!srfv1
|
||||||
|
\\name::alice,score:num:99
|
||||||
|
;
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
var ri = try iterator(&reader, allocator, .{});
|
||||||
|
defer ri.deinit();
|
||||||
|
|
||||||
|
const result = try (try ri.next()).?.to(Data);
|
||||||
|
try std.testing.expectEqualStrings("alice", result.name);
|
||||||
|
try std.testing.expectEqual(@as(u8, 99), result.score);
|
||||||
|
// `active` was not in the data, so the default value is used
|
||||||
|
try std.testing.expect(result.active);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
/// Releases all memory owned by this iterator. This frees the internal
|
||||||
|
/// arena (and all parsed data allocated from it), then frees the arena
|
||||||
|
/// struct itself. After calling `deinit`, any slices or string pointers
|
||||||
|
/// obtained from `FieldIterator.next` or `FieldIterator.to` are invalid.
|
||||||
pub fn deinit(self: RecordIterator) void {
|
pub fn deinit(self: RecordIterator) void {
|
||||||
const child_allocator = self.arena.child_allocator;
|
const child_allocator = self.arena.child_allocator;
|
||||||
self.arena.deinit();
|
self.arena.deinit();
|
||||||
child_allocator.destroy(self.arena);
|
child_allocator.destroy(self.arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if the data has not expired based on the `#!expires`
|
||||||
|
/// directive. If no expiry was specified, the data is always considered
|
||||||
|
/// fresh. Callers should check this after parsing to decide whether to
|
||||||
|
/// use or refresh cached data. Note that data will be returned by parse/
|
||||||
|
/// iterator regardless of freshness. This enables callers to use cached
|
||||||
|
/// data temporarily while refreshing it
|
||||||
pub fn isFresh(self: RecordIterator) bool {
|
pub fn isFresh(self: RecordIterator) bool {
|
||||||
if (self.expires) |exp|
|
if (self.expires) |exp|
|
||||||
return std.time.timestamp() < exp;
|
return std.time.timestamp() < exp;
|
||||||
|
|
@ -861,8 +970,25 @@ pub const RecordIterator = struct {
|
||||||
// no expiry: always fresh, never frozen
|
// no expiry: always fresh, never frozen
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
test isFresh {
|
||||||
|
// Example: check expiry on parsed data. Data without an #!expires
|
||||||
|
// directive is always considered fresh.
|
||||||
|
const data =
|
||||||
|
\\#!srfv1
|
||||||
|
\\key::value
|
||||||
|
;
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
var ri = try iterator(&reader, allocator, .{});
|
||||||
|
defer ri.deinit();
|
||||||
|
|
||||||
|
// No expiry set, so always fresh
|
||||||
|
try std.testing.expect(ri.isFresh());
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Options controlling SRF parsing behavior. Passed to both `iterator` and
|
||||||
|
/// `parse`.
|
||||||
pub const ParseOptions = struct {
|
pub const ParseOptions = struct {
|
||||||
diagnostics: ?*Diagnostics = null,
|
diagnostics: ?*Diagnostics = null,
|
||||||
|
|
||||||
|
|
@ -903,7 +1029,12 @@ const Directive = union(enum) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
/// Options controlling SRF output formatting. Used by `fmt`, `fmtFrom`,
|
||||||
|
/// `Record.fmt`, and related formatters.
|
||||||
pub const FormatOptions = struct {
|
pub const FormatOptions = struct {
|
||||||
|
/// When `true`, fields are separated by newlines and records by blank
|
||||||
|
/// lines (`#!long` format). When `false` (default), fields are
|
||||||
|
/// comma-separated and records are newline-separated (compact format).
|
||||||
long_format: bool = false,
|
long_format: bool = false,
|
||||||
|
|
||||||
/// Will emit the eof directive as well as requireeof
|
/// Will emit the eof directive as well as requireeof
|
||||||
|
|
@ -918,14 +1049,19 @@ pub const FormatOptions = struct {
|
||||||
emit_directives: bool = true,
|
emit_directives: bool = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Returns a formatter that formats the given value
|
/// Returns a `Formatter` for writing pre-built `Record` values to a writer.
|
||||||
|
/// Suitable for use with `std.fmt.bufPrint` or any `std.Io.Writer` via the
|
||||||
|
/// `{f}` format specifier.
|
||||||
pub fn fmt(value: []const Record, options: FormatOptions) Formatter {
|
pub fn fmt(value: []const Record, options: FormatOptions) Formatter {
|
||||||
return .{ .value = value, .options = options };
|
return .{ .value = value, .options = options };
|
||||||
}
|
}
|
||||||
/// Returns a formatter that formats the given value. This will take a concrete
|
/// Returns a formatter for writing typed Zig values directly to SRF format.
|
||||||
/// type, convert it to the SRF record format automatically (using srfFormat if
|
/// Each value is converted to a `Record` via `Record.from` and written to
|
||||||
/// found), and output to the writer. It is recommended to use a FixedBufferAllocator
|
/// the output. Custom serialization is supported via the `srfFormat` method
|
||||||
/// for the allocator, which is only used for custom srfFormat functions (I think - what about enum tag names?)
|
/// convention on struct/union fields.
|
||||||
|
///
|
||||||
|
/// The `allocator` is used only for fields that require custom formatting
|
||||||
|
/// (via `srfFormat`). A `FixedBufferAllocator` is recommended for this purpose.
|
||||||
pub fn fmtFrom(comptime T: type, allocator: std.mem.Allocator, value: []const T, options: FormatOptions) FromFormatter(T) {
|
pub fn fmtFrom(comptime T: type, allocator: std.mem.Allocator, value: []const T, options: FormatOptions) FromFormatter(T) {
|
||||||
return .{ .value = value, .options = options, .allocator = allocator };
|
return .{ .value = value, .options = options, .allocator = allocator };
|
||||||
}
|
}
|
||||||
|
|
@ -1033,11 +1169,20 @@ pub const RecordFormatter = struct {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// The result of a batch `parse` call. Contains all records collected into a
|
||||||
|
/// single slice. All data is owned by the internal arena; call `deinit` to
|
||||||
|
/// release everything.
|
||||||
|
///
|
||||||
|
/// For streaming without collecting all records, prefer `iterator` which
|
||||||
|
/// returns a `RecordIterator` instead.
|
||||||
pub const Parsed = struct {
|
pub const Parsed = struct {
|
||||||
records: []Record,
|
records: []Record,
|
||||||
arena: *std.heap.ArenaAllocator,
|
arena: *std.heap.ArenaAllocator,
|
||||||
expires: ?i64,
|
expires: ?i64,
|
||||||
|
|
||||||
|
/// Releases all memory owned by this `Parsed` result, including all
|
||||||
|
/// record and field data. After calling `deinit`, any slices or string
|
||||||
|
/// pointers obtained from `records` are invalid.
|
||||||
pub fn deinit(self: Parsed) void {
|
pub fn deinit(self: Parsed) void {
|
||||||
const ca = self.arena.child_allocator;
|
const ca = self.arena.child_allocator;
|
||||||
self.arena.deinit();
|
self.arena.deinit();
|
||||||
|
|
@ -1045,7 +1190,15 @@ pub const Parsed = struct {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// parse function
|
/// Parses all records from the reader into memory, returning a `Parsed` struct
|
||||||
|
/// with a `records` slice. This is a convenience wrapper around `iterator` that
|
||||||
|
/// collects all fields and records into arena-allocated slices.
|
||||||
|
///
|
||||||
|
/// For most use cases, prefer `iterator` instead -- it streams records lazily
|
||||||
|
/// and avoids the cost of collecting all fields into intermediate `ArrayList`s.
|
||||||
|
///
|
||||||
|
/// All returned data is owned by the `Parsed` arena. Call `Parsed.deinit` to
|
||||||
|
/// free everything at once.
|
||||||
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!Parsed {
|
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!Parsed {
|
||||||
var records = std.ArrayList(Record).empty;
|
var records = std.ArrayList(Record).empty;
|
||||||
var it = try iterator(reader, allocator, options);
|
var it = try iterator(reader, allocator, options);
|
||||||
|
|
@ -1073,7 +1226,21 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets an iterator to stream through the data
|
/// Creates a streaming `RecordIterator` for the given reader. This is the
|
||||||
|
/// preferred entry point for parsing SRF data, as it yields records and
|
||||||
|
/// fields lazily without collecting them into slices.
|
||||||
|
///
|
||||||
|
/// The returned iterator owns an arena allocator that holds all parsed data
|
||||||
|
/// (string values, keys, etc.). Call `RecordIterator.deinit` to free
|
||||||
|
/// everything when done. Parsed field data remains valid until `deinit` is
|
||||||
|
/// called.
|
||||||
|
///
|
||||||
|
/// The iterator handles SRF header directives (`#!srfv1`, `#!long`,
|
||||||
|
/// `#!compact`, `#!requireeof`, `#!expires`) automatically during
|
||||||
|
/// construction. Notably this means you can check isFresh() immediately.
|
||||||
|
///
|
||||||
|
/// Also note that as state is allocated and stored within the recorditerator,
|
||||||
|
/// callers can assign the return value to a constant
|
||||||
pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordIterator {
|
pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordIterator {
|
||||||
|
|
||||||
// The arena and state are heap-allocated because RecordIterator is returned
|
// The arena and state are heap-allocated because RecordIterator is returned
|
||||||
|
|
@ -1615,11 +1782,10 @@ test "compact format length-prefixed string as last field" {
|
||||||
try std.testing.expectEqualStrings("desc", rec.fields[1].key);
|
try std.testing.expectEqualStrings("desc", rec.fields[1].key);
|
||||||
try std.testing.expectEqualStrings("world", rec.fields[1].value.?.string);
|
try std.testing.expectEqualStrings("world", rec.fields[1].value.?.string);
|
||||||
}
|
}
|
||||||
test "iterator" {
|
test iterator {
|
||||||
// When a length-prefixed value is the last field on the line,
|
// Example: streaming through records and fields using the iterator API.
|
||||||
// rest_of_data.len == size exactly. The check on line 216 uses
|
// This is the preferred parsing approach -- no intermediate slices are
|
||||||
// strict > instead of >=, falling through to the multi-line path
|
// allocated for fields or records.
|
||||||
// where size - rest_of_data.len - 1 underflows.
|
|
||||||
const data =
|
const data =
|
||||||
\\#!srfv1
|
\\#!srfv1
|
||||||
\\name::alice,desc:5:world
|
\\name::alice,desc:5:world
|
||||||
|
|
@ -1629,21 +1795,65 @@ test "iterator" {
|
||||||
var ri = try iterator(&reader, allocator, .{});
|
var ri = try iterator(&reader, allocator, .{});
|
||||||
defer ri.deinit();
|
defer ri.deinit();
|
||||||
|
|
||||||
const nfi = try ri.next();
|
// Advance to the first (and only) record
|
||||||
try std.testing.expect(nfi != null);
|
const fi = (try ri.next()).?;
|
||||||
const fi = nfi.?;
|
|
||||||
// defer fi.deinit();
|
|
||||||
const field1 = try fi.next();
|
|
||||||
try std.testing.expect(field1 != null);
|
|
||||||
try std.testing.expectEqualStrings("name", field1.?.key);
|
|
||||||
try std.testing.expectEqualStrings("alice", field1.?.value.?.string);
|
|
||||||
const field2 = try fi.next();
|
|
||||||
try std.testing.expect(field2 != null);
|
|
||||||
try std.testing.expectEqualStrings("desc", field2.?.key);
|
|
||||||
try std.testing.expectEqualStrings("world", field2.?.value.?.string);
|
|
||||||
const field3 = try fi.next();
|
|
||||||
try std.testing.expect(field3 == null);
|
|
||||||
|
|
||||||
const next = try ri.next();
|
// Iterate fields within the record
|
||||||
try std.testing.expect(next == null);
|
const field1 = (try fi.next()).?;
|
||||||
|
try std.testing.expectEqualStrings("name", field1.key);
|
||||||
|
try std.testing.expectEqualStrings("alice", field1.value.?.string);
|
||||||
|
const field2 = (try fi.next()).?;
|
||||||
|
try std.testing.expectEqualStrings("desc", field2.key);
|
||||||
|
try std.testing.expectEqualStrings("world", field2.value.?.string);
|
||||||
|
|
||||||
|
// No more fields in this record
|
||||||
|
try std.testing.expect(try fi.next() == null);
|
||||||
|
// No more records
|
||||||
|
try std.testing.expect(try ri.next() == null);
|
||||||
|
}
|
||||||
|
test parse {
|
||||||
|
// Example: batch parsing collects all records and fields into slices.
|
||||||
|
// Prefer `iterator` for streaming; use `parse` when random access to
|
||||||
|
// all records is needed.
|
||||||
|
const data =
|
||||||
|
\\#!srfv1
|
||||||
|
\\#!long
|
||||||
|
\\name::alice
|
||||||
|
\\age:num:30
|
||||||
|
\\
|
||||||
|
\\name::bob
|
||||||
|
\\age:num:25
|
||||||
|
\\#!eof
|
||||||
|
;
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
var reader = std.Io.Reader.fixed(data);
|
||||||
|
const parsed = try parse(&reader, allocator, .{});
|
||||||
|
defer parsed.deinit();
|
||||||
|
|
||||||
|
try std.testing.expectEqual(@as(usize, 2), parsed.records.len);
|
||||||
|
try std.testing.expectEqualStrings("alice", parsed.records[0].fields[0].value.?.string);
|
||||||
|
try std.testing.expectEqualStrings("bob", parsed.records[1].fields[0].value.?.string);
|
||||||
|
}
|
||||||
|
test fmtFrom {
|
||||||
|
// Example: serialize typed Zig values directly to SRF format.
|
||||||
|
const Data = struct {
|
||||||
|
name: []const u8,
|
||||||
|
age: u8,
|
||||||
|
};
|
||||||
|
const values: []const Data = &.{
|
||||||
|
.{ .name = "alice", .age = 30 },
|
||||||
|
.{ .name = "bob", .age = 25 },
|
||||||
|
};
|
||||||
|
var buf: [4096]u8 = undefined;
|
||||||
|
const result = try std.fmt.bufPrint(
|
||||||
|
&buf,
|
||||||
|
"{f}",
|
||||||
|
.{fmtFrom(Data, std.testing.allocator, values, .{})},
|
||||||
|
);
|
||||||
|
try std.testing.expectEqualStrings(
|
||||||
|
\\#!srfv1
|
||||||
|
\\name::alice,age:num:30
|
||||||
|
\\name::bob,age:num:25
|
||||||
|
\\
|
||||||
|
, result);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue