update readme and docs

This commit is contained in:
Emil Lerch 2026-03-09 13:39:54 -07:00
parent fcce8e48d4
commit 1f922c0b33
Signed by: lobo
GPG key ID: A7B62D657EF764F8
3 changed files with 397 additions and 85 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
.zig-cache/ .zig-cache/
zig-out/ zig-out/
docs/

111
README.md
View file

@ -4,10 +4,12 @@ SRF is a minimal data format designed for L2 caches and simple structured storag
**Features:** **Features:**
- No escaping required - use length-prefixed strings for complex data - No escaping required - use length-prefixed strings for complex data
- Single-pass parsing with minimal memory allocation - Single-pass streaming parser with minimal memory allocation
- Basic type system (string, num, bool, null, binary) with explicit type hints - Basic type system (string, num, bool, null, binary) with explicit type hints
- Compact format for machine generation, long format for human editing - Compact format for machine generation, long format for human editing
- Built-in corruption detection with optional EOF markers - Built-in corruption detection with optional EOF markers
- Iterator-based API for zero-copy, low-allocation streaming
- Comptime type coercion directly from the iterator (no intermediate collections)
**When to use SRF:** **When to use SRF:**
- L2 caches that need occasional human inspection - L2 caches that need occasional human inspection
@ -20,7 +22,70 @@ SRF is a minimal data format designed for L2 caches and simple structured storag
- Schema validation requirements - Schema validation requirements
- Arrays or object hierarchies (arrays can be managed in the data itself, however) - Arrays or object hierarchies (arrays can be managed in the data itself, however)
Long format: ## Parsing API
SRF provides two parsing APIs. The **iterator API is preferred** for most use cases
as it avoids collecting all records and fields into memory at once.
### Iterator (preferred)
The `iterator` function returns a `RecordIterator` that streams records lazily.
Each call to `RecordIterator.next` yields a `FieldIterator` for the next record,
and each call to `FieldIterator.next` yields individual `Field` values. No
intermediate slices or ArrayLists are allocated -- fields are yielded one at a
time directly from the parser state.
For type coercion, `FieldIterator.to(T)` consumes the remaining fields in the
current record and maps them into a Zig struct or tagged union at comptime,
with zero additional allocations beyond what field parsing itself requires. This
can further be minimized with the parsing option `.alloc_strings = false`.
```zig
const srf = @import("srf");
const Data = struct {
name: []const u8,
age: u8,
active: bool = false,
};
var reader = std.Io.Reader.fixed(raw_data);
var ri = try srf.iterator(&reader, allocator, .{});
defer ri.deinit();
while (try ri.next()) |fi| {
const record = try fi.to(Data);
// process record...
}
```
### Batch parse
The `parse` function collects all records into memory at once, returning a
`Parsed` struct with a `records: []Record` slice. This is built on top of
the iterator internally. It is convenient when you need random access to all
records, but costs more memory since every field is collected into ArrayLists
before being converted to owned slices.
```zig
const srf = @import("srf");
var reader = std.Io.Reader.fixed(raw_data);
const parsed = try srf.parse(&reader, allocator, .{});
defer parsed.deinit();
for (parsed.records) |record| {
const data = try record.to(Data);
// process data...
}
```
## Data Formats
### Long format
Long format uses newlines to delimit fields and blank lines to separate records.
It is human-friendly and suitable for hand-edited configuration files.
``` ```
#!srfv1 # mandatory comment with format and version. Parser instructions start with #! #!srfv1 # mandatory comment with format and version. Parser instructions start with #!
@ -46,7 +111,11 @@ data with newlines must have a length::single line
#!eof # eof marker, useful to make sure your file wasn't cut in half. Only considered if requireeof set at top #!eof # eof marker, useful to make sure your file wasn't cut in half. Only considered if requireeof set at top
``` ```
compact format: ### Compact format
Compact format uses commas to delimit fields and newlines to separate records.
It is designed for machine generation where space efficiency matters.
``` ```
#!srfv1 # mandatory comment with format and version. Parser instructions start with #! #!srfv1 # mandatory comment with format and version. Parser instructions start with #!
key::string value must have a length between colons or end with a comma,this is a number:num:5 ,null value:null:,array::array's don't exist. Use json or toml or something,data with newlines must have a length:7:foo key::string value must have a length between colons or end with a comma,this is a number:num:5 ,null value:null:,array::array's don't exist. Use json or toml or something,data with newlines must have a length:7:foo
@ -54,6 +123,38 @@ bar,boolean value:bool:false
key::this is the second record key::this is the second record
``` ```
## Serialization
SRF supports serializing Zig structs, unions, and enums back to SRF format.
Use `Record.from` to create a record from a typed value, or `fmtFrom` to
format a slice of values directly to a writer.
```zig
const srf = @import("srf");
const all_data: []const Data = &.{
.{ .name = "alice", .age = 30, .active = true },
.{ .name = "bob", .age = 25 },
};
var buf: [4096]u8 = undefined;
const formatted = try std.fmt.bufPrint(&buf, "{f}", .{
srf.fmtFrom(Data, allocator, all_data, .{ .long_format = true }),
});
```
## Type System
Fields follow the format `key:type_hint:value`:
| Type | Hint | Example |
|------------------------|-----------------------|-------------------------|
| String | *(empty)* or `string` | `name::alice` |
| Number (internally f64)| `num` | `age:num:30` |
| Boolean | `bool` | `active:bool:true` |
| Null | `null` | `missing:null:` |
| Binary | `binary` | `data:binary:base64...` |
| Length-prefixed string | *(byte count)* | `bio:12:hello\nworld!` |
## Implementation Concerns ## Implementation Concerns
**Parser robustness:** **Parser robustness:**
@ -87,5 +188,5 @@ key::this is the second record
## AI Use ## AI Use
AI was used in this project for comments, parts of the README, and unit test AI was used in this project for comments, parts of the README, benchmarking code,
generation. All other code is human generated. build.zig and unit test generation. All other code is human generated.

View file

@ -67,25 +67,43 @@ const ValueWithMetaData = struct {
error_parsing: bool = false, error_parsing: bool = false,
reader_advanced: bool = false, reader_advanced: bool = false,
}; };
/// A parsed SRF value. Each field in a record has a key and an optional `Value`.
pub const Value = union(enum) { pub const Value = union(enum) {
/// A numeric value, parsed from the `num` type hint.
number: f64, number: f64,
/// Bytes are converted to/from base64, string is not /// Raw bytes decoded from base64, parsed from the `binary` type hint.
bytes: []const u8, bytes: []const u8,
/// String is not touched in any way /// A string value, either delimiter-terminated or length-prefixed.
/// Not transformed during parsing (no escaping/unescaping), but will be
/// allocated if .alloc_strings = true is passed during parsing, or if
/// a multi-line string is found in the data
string: []const u8, string: []const u8,
/// A boolean value, parsed from the `bool` type hint (`true` or `false`).
boolean: bool, boolean: bool,
// pub fn format(self: Value, writer: *std.Io.Writer) std.Io.Writer.Error!void { /// parses a single srf value, without the key. The the whole field is:
// switch (self) { ///
// .number => try writer.print("num: {d}", .{self.number}), /// SRF Field: 'foo:3:bar'
// .bytes => try writer.print("bytes: {x}", .{self.bytes}), ///
// .string => try writer.print("string: {s}", .{self.string}), /// The value we expect to be sent to this function is:
// .boolean => try writer.print("boolean: {}", .{self.boolean}), ///
// } /// SRF Value: '3:bar'
// } ///
/// The value is allowed to have extra data...for instance, in compact format
/// the value above can be represented by:
///
/// SRF Value: '3:bar,next_field::foobar'
///
/// and the next field will be ignored
///
/// This function may need to advance the reader in the case of multi-line
/// strings. It may also allocate data in the case of base64 (binary) values
/// as well as multi-line strings. Metadata is returned to assist in tracking
///
/// This function is intended to be used by the SRF parser
pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData { pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData {
const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':'); const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':');
if (type_val_sep_raw == null) { if (type_val_sep_raw == null) {
@ -244,22 +262,15 @@ pub const Value = union(enum) {
} }
}; };
// A field has a key and a value, but the value may be null /// A single key-value pair within a record. The key is always a string.
/// The value may be `null` (from the `null` type hint) or one of the
/// `Value` variants. Yielded by `RecordIterator.FieldIterator.next`.
pub const Field = struct { pub const Field = struct {
key: []const u8, key: []const u8,
value: ?Value, value: ?Value,
}; };
fn coerce(name: []const u8, comptime T: type, val: ?Value) !T { fn coerce(name: []const u8, comptime T: type, val: ?Value) !T {
// Here's the deduplicated set of field types that coerce needs to handle:
// Direct from SRF values:
// Need parsing from string:
// - Date, ?Date -- Date.parse(string)
//
// Won't work with Record.to(T) generically:
// - []const OptionContract -- nested sub-records (OptionsChain has calls/puts arrays)
// - ?[]const Holding, ?[]const SectorWeight -- nested sub-records in EtfProfile
//
const ti = @typeInfo(T); const ti = @typeInfo(T);
if (val == null and ti != .optional) if (val == null and ti != .optional)
return error.NullValueCannotBeAssignedToNonNullField; return error.NullValueCannotBeAssignedToNonNullField;
@ -307,23 +318,31 @@ fn coerce(name: []const u8, comptime T: type, val: ?Value) !T {
return null; return null;
} }
// A record has a list of fields, with no assumptions regarding duplication, /// A record is an ordered list of `Field` values, with no uniqueness constraints
// etc. This is for parsing speed, but also for more flexibility in terms of /// on keys. This allows flexible use cases such as encoding arrays by repeating
// use cases. One can make a defacto array out of this structure by having /// the same key. In long form, this could look like:
// something like: ///
// /// ```txt
// arr:string:foo /// arr:string:foo
// arr:string:bar /// arr:string:bar
// /// ```
// and when you coerce to zig struct have an array .arr that gets populated ///
// with strings "foo" and "bar". /// Records are returned by the batch `parse` function. For streaming, prefer
/// `iterator` which yields fields one at a time via `RecordIterator.FieldIterator`
/// without collecting them into a slice.
pub const Record = struct { pub const Record = struct {
fields: []const Field, fields: []const Field,
/// Returns a `RecordFormatter` suitable for use with `std.fmt.bufPrint`
/// or any `std.Io.Writer`. Use `FormatOptions` to control compact vs
/// long output format.
pub fn fmt(value: Record, options: FormatOptions) RecordFormatter { pub fn fmt(value: Record, options: FormatOptions) RecordFormatter {
return .{ .value = value, .options = options }; return .{ .value = value, .options = options };
} }
/// Looks up the first `Field` whose key matches `field_name`, or returns
/// `null` if no such field exists. Only the first occurrence is returned;
/// duplicate keys are not considered.
pub fn firstFieldByName(self: Record, field_name: []const u8) ?Field { pub fn firstFieldByName(self: Record, field_name: []const u8) ?Field {
for (self.fields) |f| for (self.fields) |f|
if (std.mem.eql(u8, f.key, field_name)) return f; if (std.mem.eql(u8, f.key, field_name)) return f;
@ -501,7 +520,20 @@ pub const Record = struct {
return OwnedRecord(T).init(allocator, val); return OwnedRecord(T).init(allocator, val);
} }
/// Coerce Record to a type. Does not handle fields with arrays /// Coerce a `Record` to a Zig struct or tagged union. For each field in `T`,
/// the first matching `Field` by name is coerced to the target type. Fields
/// with default values in `T` that are not present in the data use their
/// defaults. Missing fields without defaults return an error. Note that
/// by this logic, multiple fields with the same name will have all but the
/// first value silently ignored.
///
/// For tagged unions, the active variant is determined by a field named
/// `"active_tag"` (or the value of `T.srf_tag_field` if declared). The
/// remaining fields are coerced into the payload struct of that variant.
///
/// For streaming data without collecting fields first, prefer
/// `RecordIterator.FieldIterator.to` which avoids the intermediate
/// `[]Field` allocation entirely.
pub fn to(self: Record, comptime T: type) !T { pub fn to(self: Record, comptime T: type) !T {
const ti = @typeInfo(T); const ti = @typeInfo(T);
@ -547,26 +579,39 @@ pub const Record = struct {
} }
return error.CoercionNotPossible; return error.CoercionNotPossible;
} }
test to {
// Example: coerce a batch-parsed Record into a Zig struct.
const Data = struct {
city: []const u8,
pop: u8,
};
const data =
\\#!srfv1
\\city::springfield,pop:num:30
;
const allocator = std.testing.allocator;
var reader = std.Io.Reader.fixed(data);
const parsed = try parse(&reader, allocator, .{});
defer parsed.deinit();
const result = try parsed.records[0].to(Data);
try std.testing.expectEqualStrings("springfield", result.city);
try std.testing.expectEqual(@as(u8, 30), result.pop);
}
}; };
/// The Parsed struct is equivalent to Parsed(T) in std.json. Since most are /// A streaming record iterator for parsing SRF data. This is the preferred
/// familiar with std.json, it differs in the following ways: /// parsing API because it avoids collecting all records and fields into memory
/// at once. Created by calling `iterator`.
/// ///
/// * There is a records field instead of a value field. In json, one type of /// Each call to `next` yields a `FieldIterator` for one record. Fields within
/// value is an array. SRF does not have an array data type, but the set of /// that record are consumed lazily via `FieldIterator.next` or coerced directly
/// records is an array. json as a format is structred as a single object at /// into a Zig type via `FieldIterator.to`. All allocations go through an
/// the outermost /// internal arena; call `deinit` to release everything when done.
/// ///
/// * This is not generic. In SRF, it is a separate function to bind the list /// If `RecordIterator.next` is called before the previous `FieldIterator` has
/// of records to a specific data type. This will add some (hopefully minimal) /// been fully consumed, the remaining fields are automatically drained to keep
/// overhead, but also avoid conflating parsing from the coercion from general /// the parser state consistent.
/// type to specifics, and avoids answering questions like "what if I have
/// 15 values for the same key" until you're actually dealing with that problem
/// (see std.json.ParseOptions duplicate_field_behavior and ignore_unknown_fields)
///
/// When implemented, there will include a pub fn bind(self: Parsed, comptime T: type, options, BindOptions) BindError![]T
/// function. The options will include things related to duplicate handling and
/// missing fields
pub const RecordIterator = struct { pub const RecordIterator = struct {
arena: *std.heap.ArenaAllocator, arena: *std.heap.ArenaAllocator,
/// optional expiry time for the data. Useful for caching /// optional expiry time for the data. Useful for caching
@ -611,9 +656,19 @@ pub const RecordIterator = struct {
} }
}; };
/// Advances to the next record in the stream, returning a `FieldIterator`
/// for accessing its fields. Returns `null` when all records have been
/// consumed.
///
/// If the previous `FieldIterator` was not fully drained, its remaining
/// fields are consumed automatically to keep the reader positioned
/// correctly. It is safe (but unnecessary) to fully consume the
/// `FieldIterator` before calling `next` again.
///
/// Note that all state is stored in a shared area accessible to both
/// the `RecordIterator` and the `FieldIterator`, so there is no need to
/// store the return value as a variable
pub fn next(self: RecordIterator) !?FieldIterator { pub fn next(self: RecordIterator) !?FieldIterator {
// TODO: we need to capture the fieldIterator here and make sure it's run
// to the ground to keep our state intact
const state = self.state; const state = self.state;
if (state.field_iterator) |f| { if (state.field_iterator) |f| {
// We need to finish the fields on the previous record // We need to finish the fields on the previous record
@ -666,18 +721,24 @@ pub const RecordIterator = struct {
return state.field_iterator.?; return state.field_iterator.?;
} }
/// Iterates over the fields within a single record. Yielded by
/// `RecordIterator.next`. Each call to `next` returns the next `Field`
/// in the record, or `null` when the record boundary is reached.
///
/// For direct type coercion without manually iterating fields, use `to`.
pub const FieldIterator = struct { pub const FieldIterator = struct {
state: *State, state: *State,
arena: *std.heap.ArenaAllocator, arena: *std.heap.ArenaAllocator,
/// Returns the next `Field` in the current record, or `null` when
/// the record boundary has been reached. After `null` is returned,
/// subsequent calls continue to return `null`.
pub fn next(self: FieldIterator) !?Field { pub fn next(self: FieldIterator) !?Field {
const state = self.state; const state = self.state;
const aa = self.arena.allocator(); const aa = self.arena.allocator();
// Main parsing. We already have the first line of data, which could // Main parsing. We already have the first line of data, which could
// be a record (compact format) or a key/value pair (long format) // be a record (compact format) or a key/value pair (long format)
// log.debug("", .{});
log.debug("current line:{?s}", .{state.current_line});
if (state.current_line == null) return null; if (state.current_line == null) return null;
if (state.end_of_record_reached) return null; if (state.end_of_record_reached) return null;
// non-blank line, but we could have an eof marker // non-blank line, but we could have an eof marker
@ -771,7 +832,21 @@ pub const RecordIterator = struct {
return field; return field;
} }
/// Coerce Record to a type. Does not handle fields with arrays /// Consumes remaining fields in this record and coerces them into a
/// Zig struct or tagged union `T`. This is the streaming equivalent of
/// `Record.to` -- it performs the same field-name matching and default
/// value logic, but reads directly from the parser without building an
/// intermediate `[]Field` slice.
///
/// For structs, fields are matched by name. Only the first occurrence
/// of each field name is used; duplicates are ignored. Fields in `T`
/// that have default values and are not present in the data use those
/// defaults. Missing fields without defaults return an error.
///
/// For tagged unions, the active tag field must appear first in the
/// stream (unlike `Record.to` which can do random access). The tag
/// field name defaults to `"active_tag"` or `T.srf_tag_field` if
/// declared.
pub fn to(self: FieldIterator, comptime T: type) !T { pub fn to(self: FieldIterator, comptime T: type) !T {
const ti = @typeInfo(T); const ti = @typeInfo(T);
@ -847,13 +922,47 @@ pub const RecordIterator = struct {
} }
return error.CoercionNotPossible; return error.CoercionNotPossible;
} }
test to {
// Example: coerce fields directly into a Zig struct from the iterator,
// without collecting into an intermediate Record. This is the most
// allocation-efficient path for typed deserialization.
const Data = struct {
name: []const u8,
score: u8,
active: bool = true,
}; };
const data =
\\#!srfv1
\\name::alice,score:num:99
;
const allocator = std.testing.allocator;
var reader = std.Io.Reader.fixed(data);
var ri = try iterator(&reader, allocator, .{});
defer ri.deinit();
const result = try (try ri.next()).?.to(Data);
try std.testing.expectEqualStrings("alice", result.name);
try std.testing.expectEqual(@as(u8, 99), result.score);
// `active` was not in the data, so the default value is used
try std.testing.expect(result.active);
}
};
/// Releases all memory owned by this iterator. This frees the internal
/// arena (and all parsed data allocated from it), then frees the arena
/// struct itself. After calling `deinit`, any slices or string pointers
/// obtained from `FieldIterator.next` or `FieldIterator.to` are invalid.
pub fn deinit(self: RecordIterator) void { pub fn deinit(self: RecordIterator) void {
const child_allocator = self.arena.child_allocator; const child_allocator = self.arena.child_allocator;
self.arena.deinit(); self.arena.deinit();
child_allocator.destroy(self.arena); child_allocator.destroy(self.arena);
} }
/// Returns `true` if the data has not expired based on the `#!expires`
/// directive. If no expiry was specified, the data is always considered
/// fresh. Callers should check this after parsing to decide whether to
/// use or refresh cached data. Note that data will be returned by parse/
/// iterator regardless of freshness. This enables callers to use cached
/// data temporarily while refreshing it
pub fn isFresh(self: RecordIterator) bool { pub fn isFresh(self: RecordIterator) bool {
if (self.expires) |exp| if (self.expires) |exp|
return std.time.timestamp() < exp; return std.time.timestamp() < exp;
@ -861,8 +970,25 @@ pub const RecordIterator = struct {
// no expiry: always fresh, never frozen // no expiry: always fresh, never frozen
return true; return true;
} }
test isFresh {
// Example: check expiry on parsed data. Data without an #!expires
// directive is always considered fresh.
const data =
\\#!srfv1
\\key::value
;
const allocator = std.testing.allocator;
var reader = std.Io.Reader.fixed(data);
var ri = try iterator(&reader, allocator, .{});
defer ri.deinit();
// No expiry set, so always fresh
try std.testing.expect(ri.isFresh());
}
}; };
/// Options controlling SRF parsing behavior. Passed to both `iterator` and
/// `parse`.
pub const ParseOptions = struct { pub const ParseOptions = struct {
diagnostics: ?*Diagnostics = null, diagnostics: ?*Diagnostics = null,
@ -903,7 +1029,12 @@ const Directive = union(enum) {
return null; return null;
} }
}; };
/// Options controlling SRF output formatting. Used by `fmt`, `fmtFrom`,
/// `Record.fmt`, and related formatters.
pub const FormatOptions = struct { pub const FormatOptions = struct {
/// When `true`, fields are separated by newlines and records by blank
/// lines (`#!long` format). When `false` (default), fields are
/// comma-separated and records are newline-separated (compact format).
long_format: bool = false, long_format: bool = false,
/// Will emit the eof directive as well as requireeof /// Will emit the eof directive as well as requireeof
@ -918,14 +1049,19 @@ pub const FormatOptions = struct {
emit_directives: bool = true, emit_directives: bool = true,
}; };
/// Returns a formatter that formats the given value /// Returns a `Formatter` for writing pre-built `Record` values to a writer.
/// Suitable for use with `std.fmt.bufPrint` or any `std.Io.Writer` via the
/// `{f}` format specifier.
pub fn fmt(value: []const Record, options: FormatOptions) Formatter { pub fn fmt(value: []const Record, options: FormatOptions) Formatter {
return .{ .value = value, .options = options }; return .{ .value = value, .options = options };
} }
/// Returns a formatter that formats the given value. This will take a concrete /// Returns a formatter for writing typed Zig values directly to SRF format.
/// type, convert it to the SRF record format automatically (using srfFormat if /// Each value is converted to a `Record` via `Record.from` and written to
/// found), and output to the writer. It is recommended to use a FixedBufferAllocator /// the output. Custom serialization is supported via the `srfFormat` method
/// for the allocator, which is only used for custom srfFormat functions (I think - what about enum tag names?) /// convention on struct/union fields.
///
/// The `allocator` is used only for fields that require custom formatting
/// (via `srfFormat`). A `FixedBufferAllocator` is recommended for this purpose.
pub fn fmtFrom(comptime T: type, allocator: std.mem.Allocator, value: []const T, options: FormatOptions) FromFormatter(T) { pub fn fmtFrom(comptime T: type, allocator: std.mem.Allocator, value: []const T, options: FormatOptions) FromFormatter(T) {
return .{ .value = value, .options = options, .allocator = allocator }; return .{ .value = value, .options = options, .allocator = allocator };
} }
@ -1033,11 +1169,20 @@ pub const RecordFormatter = struct {
} }
}; };
/// The result of a batch `parse` call. Contains all records collected into a
/// single slice. All data is owned by the internal arena; call `deinit` to
/// release everything.
///
/// For streaming without collecting all records, prefer `iterator` which
/// returns a `RecordIterator` instead.
pub const Parsed = struct { pub const Parsed = struct {
records: []Record, records: []Record,
arena: *std.heap.ArenaAllocator, arena: *std.heap.ArenaAllocator,
expires: ?i64, expires: ?i64,
/// Releases all memory owned by this `Parsed` result, including all
/// record and field data. After calling `deinit`, any slices or string
/// pointers obtained from `records` are invalid.
pub fn deinit(self: Parsed) void { pub fn deinit(self: Parsed) void {
const ca = self.arena.child_allocator; const ca = self.arena.child_allocator;
self.arena.deinit(); self.arena.deinit();
@ -1045,7 +1190,15 @@ pub const Parsed = struct {
} }
}; };
/// parse function /// Parses all records from the reader into memory, returning a `Parsed` struct
/// with a `records` slice. This is a convenience wrapper around `iterator` that
/// collects all fields and records into arena-allocated slices.
///
/// For most use cases, prefer `iterator` instead -- it streams records lazily
/// and avoids the cost of collecting all fields into intermediate `ArrayList`s.
///
/// All returned data is owned by the `Parsed` arena. Call `Parsed.deinit` to
/// free everything at once.
pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!Parsed { pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!Parsed {
var records = std.ArrayList(Record).empty; var records = std.ArrayList(Record).empty;
var it = try iterator(reader, allocator, options); var it = try iterator(reader, allocator, options);
@ -1073,7 +1226,21 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
}; };
} }
/// Gets an iterator to stream through the data /// Creates a streaming `RecordIterator` for the given reader. This is the
/// preferred entry point for parsing SRF data, as it yields records and
/// fields lazily without collecting them into slices.
///
/// The returned iterator owns an arena allocator that holds all parsed data
/// (string values, keys, etc.). Call `RecordIterator.deinit` to free
/// everything when done. Parsed field data remains valid until `deinit` is
/// called.
///
/// The iterator handles SRF header directives (`#!srfv1`, `#!long`,
/// `#!compact`, `#!requireeof`, `#!expires`) automatically during
/// construction. Notably this means you can check isFresh() immediately.
///
/// Also note that as state is allocated and stored within the recorditerator,
/// callers can assign the return value to a constant
pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordIterator { pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: ParseOptions) ParseError!RecordIterator {
// The arena and state are heap-allocated because RecordIterator is returned // The arena and state are heap-allocated because RecordIterator is returned
@ -1615,11 +1782,10 @@ test "compact format length-prefixed string as last field" {
try std.testing.expectEqualStrings("desc", rec.fields[1].key); try std.testing.expectEqualStrings("desc", rec.fields[1].key);
try std.testing.expectEqualStrings("world", rec.fields[1].value.?.string); try std.testing.expectEqualStrings("world", rec.fields[1].value.?.string);
} }
test "iterator" { test iterator {
// When a length-prefixed value is the last field on the line, // Example: streaming through records and fields using the iterator API.
// rest_of_data.len == size exactly. The check on line 216 uses // This is the preferred parsing approach -- no intermediate slices are
// strict > instead of >=, falling through to the multi-line path // allocated for fields or records.
// where size - rest_of_data.len - 1 underflows.
const data = const data =
\\#!srfv1 \\#!srfv1
\\name::alice,desc:5:world \\name::alice,desc:5:world
@ -1629,21 +1795,65 @@ test "iterator" {
var ri = try iterator(&reader, allocator, .{}); var ri = try iterator(&reader, allocator, .{});
defer ri.deinit(); defer ri.deinit();
const nfi = try ri.next(); // Advance to the first (and only) record
try std.testing.expect(nfi != null); const fi = (try ri.next()).?;
const fi = nfi.?;
// defer fi.deinit();
const field1 = try fi.next();
try std.testing.expect(field1 != null);
try std.testing.expectEqualStrings("name", field1.?.key);
try std.testing.expectEqualStrings("alice", field1.?.value.?.string);
const field2 = try fi.next();
try std.testing.expect(field2 != null);
try std.testing.expectEqualStrings("desc", field2.?.key);
try std.testing.expectEqualStrings("world", field2.?.value.?.string);
const field3 = try fi.next();
try std.testing.expect(field3 == null);
const next = try ri.next(); // Iterate fields within the record
try std.testing.expect(next == null); const field1 = (try fi.next()).?;
try std.testing.expectEqualStrings("name", field1.key);
try std.testing.expectEqualStrings("alice", field1.value.?.string);
const field2 = (try fi.next()).?;
try std.testing.expectEqualStrings("desc", field2.key);
try std.testing.expectEqualStrings("world", field2.value.?.string);
// No more fields in this record
try std.testing.expect(try fi.next() == null);
// No more records
try std.testing.expect(try ri.next() == null);
}
test parse {
// Example: batch parsing collects all records and fields into slices.
// Prefer `iterator` for streaming; use `parse` when random access to
// all records is needed.
const data =
\\#!srfv1
\\#!long
\\name::alice
\\age:num:30
\\
\\name::bob
\\age:num:25
\\#!eof
;
const allocator = std.testing.allocator;
var reader = std.Io.Reader.fixed(data);
const parsed = try parse(&reader, allocator, .{});
defer parsed.deinit();
try std.testing.expectEqual(@as(usize, 2), parsed.records.len);
try std.testing.expectEqualStrings("alice", parsed.records[0].fields[0].value.?.string);
try std.testing.expectEqualStrings("bob", parsed.records[1].fields[0].value.?.string);
}
test fmtFrom {
// Example: serialize typed Zig values directly to SRF format.
const Data = struct {
name: []const u8,
age: u8,
};
const values: []const Data = &.{
.{ .name = "alice", .age = 30 },
.{ .name = "bob", .age = 25 },
};
var buf: [4096]u8 = undefined;
const result = try std.fmt.bufPrint(
&buf,
"{f}",
.{fmtFrom(Data, std.testing.allocator, values, .{})},
);
try std.testing.expectEqualStrings(
\\#!srfv1
\\name::alice,age:num:30
\\name::bob,age:num:25
\\
, result);
} }