address all allocation edge cases

allow consumers to control allocation more granularly
allow outputting default values
2026-05-27 17:04:26 -07:00 · 2026-05-27 13:38:38 -07:00 · 2026-05-25 13:13:36 -07:00 · 2026-05-25 13:03:59 -07:00 · 2026-05-25 13:02:41 -07:00 · 2026-04-14 14:30:55 -07:00
5 changed files with 400 additions and 68 deletions
--- a/.mise.toml
+++ b/.mise.toml
@ -1,5 +1,5 @@
 [tools]
 prek = "0.3.1"
 "ubi:DonIsaac/zlint" = "0.7.9"
-zig = "0.15.2"
+zig = "0.16.0"
 zls = "0.15.1"
--- a/build.zig
+++ b/build.zig
@ -250,6 +250,7 @@ const BenchmarkStep = struct {
        const b = step.owner;
        const self: *BenchmarkStep = @fieldParentPtr("step", step);

+        const io = b.graph.io;
        const gen_path = b.getInstallPath(.bin, self.gen_exe.name);
        const exe_path = b.getInstallPath(.bin, self.srf_exe.name);
        const count_str = b.fmt("{d}", .{self.record_count});
@ -271,29 +272,32 @@ const BenchmarkStep = struct {

            const hash_str = b.fmt("{x}", .{hash});
            const cache_dir = b.cache_root.join(b.allocator, &.{ "o", hash_str }) catch @panic("OOM");
-            std.fs.cwd().makePath(cache_dir) catch {};
+            b.cache_root.handle.createDirPath(io, cache_dir) catch @panic("Could not create cache path");

            const filename = b.fmt("test-{s}.{s}", .{ fmt.name, fmt.ext });
            const filepath = b.pathJoin(&.{ cache_dir, filename });
            test_files[i] = filepath;

            // Check if file exists
-            if (std.fs.cwd().access(filepath, .{})) {
+            if (b.cache_root.handle.access(io, filepath, .{})) {
                continue; // File exists, skip generation
            } else |_| {}

            // Generate file
-            var child = std.process.Child.init(&.{ gen_path, fmt.name, count_str }, b.allocator);
-            child.stdout_behavior = .Pipe;
-            try child.spawn();
+            var child = try std.process.spawn(io, .{
+                .argv = &.{ gen_path, fmt.name, count_str },
+                .stdout = .pipe,
+            });

-            const output = try child.stdout.?.readToEndAlloc(b.allocator, 100 * 1024 * 1024);
+            var buf: [4096]u8 = undefined;
+            var file_reader = child.stdout.?.reader(io, &buf);
+            var reader = &file_reader.interface;
+            const output = try reader.allocRemaining(b.allocator, .unlimited);
            defer b.allocator.free(output);
+            const term = try child.wait(io);
+            if (term != .exited or term.exited != 0) return error.GenerationFailed;

-            const term = try child.wait();
-            if (term != .Exited or term.Exited != 0) return error.GenerationFailed;
-
-            try std.fs.cwd().writeFile(.{ .sub_path = filepath, .data = output });
+            try b.cache_root.handle.writeFile(io, .{ .sub_path = filepath, .data = output });
        }

        // Run hyperfine
@ -308,16 +312,19 @@ const BenchmarkStep = struct {
            try argv.append(b.allocator, b.fmt("{s} jsonl <{s}", .{ exe_path, test_files[2] }));
        }

-        var child = std.process.Child.init(argv.items, b.allocator);
-
        // We need to lock stderror so hyperfine can output progress in place
-        std.debug.lockStdErr();
-        defer std.debug.unlockStdErr();
+        // SAFETY: buffer for locking
+        var buf: [1024]u8 = undefined; // I have no idea what the right size buffer should be
+        _ = try io.lockStderr(&buf, null);
+        defer io.unlockStderr();

-        try child.spawn();
-        const term = try child.wait();
+        var child = try std.process.spawn(io, .{
+            .argv = argv.items,
+        });

-        if (term != .Exited or term.Exited != 0)
+        const term = try child.wait(io);
+
+        if (term != .exited or term.exited != 0)
            return error.BenchmarkFailed;
    }
 };
--- a/build.zig.zon
+++ b/build.zig.zon
@ -25,7 +25,7 @@
    .fingerprint = 0x102ed002eff998a9, // Changing this has security and trust implications.
    // Tracks the earliest Zig version that the package considers to be a
    // supported use case.
-    .minimum_zig_version = "0.15.2",
+    .minimum_zig_version = "0.16.0",
    // This field is optional.
    // Each dependency must either provide a `url` and `hash`, or a `path`.
    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
--- a/src/main.zig
+++ b/src/main.zig
@ -46,13 +46,10 @@ const CountingAllocator = struct {
    }
 };

-pub fn main() !void {
-    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
-    defer _ = gpa.deinit();
-    const base_allocator = gpa.allocator();
+pub fn main(init: std.process.Init) !void {
+    const gpa = init.gpa;

-    const args = try std.process.argsAlloc(base_allocator);
-    defer std.process.argsFree(base_allocator, args);
+    const args = try init.minimal.args.toSlice(init.arena.allocator());

    if (args.len < 2) {
        std.debug.print("Usage: {s} <srf|json|jsonl>\n", .{args[0]});
@ -61,23 +58,23 @@ pub fn main() !void {

    const format = args[1];

-    const debug_allocs = std.process.hasEnvVarConstant("DEBUG_ALLOCATIONS");
+    const debug_allocs = init.environ_map.contains("DEBUG_ALLOCATIONS");

-    var counting = CountingAllocator{ .child_allocator = base_allocator };
-    const allocator = if (debug_allocs) counting.allocator() else base_allocator;
+    var counting = CountingAllocator{ .child_allocator = gpa };
+    const allocator = if (debug_allocs) counting.allocator() else gpa;

    var stdin_buffer: [1024]u8 = undefined;
-    var stdin_reader = std.fs.File.stdin().reader(&stdin_buffer);
+    var stdin_reader = std.Io.File.stdin().reader(init.io, &stdin_buffer);
    const stdin = &stdin_reader.interface;

    // Load all data into memory first for fair comparison
    var data: std.ArrayList(u8) = .empty;
-    defer data.deinit(base_allocator);
-    try stdin.appendRemaining(base_allocator, &data, @enumFromInt(100 * 1024 * 1024));
+    defer data.deinit(gpa);
+    try stdin.appendRemaining(gpa, &data, @enumFromInt(100 * 1024 * 1024));

    if (std.mem.eql(u8, format, "srf")) {
        var reader = std.Io.Reader.fixed(data.items);
-        const records = try srf.parse(&reader, allocator, .{ .alloc_strings = false });
+        const records = try srf.parse(&reader, allocator, .{ .parse_allocator = .none });
        defer records.deinit();
    } else if (std.mem.eql(u8, format, "jsonl")) {
        var lines = std.mem.splitScalar(u8, data.items, '\n');
--- a/src/srf.zig
+++ b/src/srf.zig
@ -95,6 +95,7 @@ pub const ParseError = error{
    ReadFailed,
    StreamTooLong,
    OutOfMemory,
+    AllocationRequired,
    EndOfStream,
 };

@ -140,7 +141,7 @@ pub const Value = union(enum) {
    /// as well as multi-line strings. Metadata is returned to assist in tracking
    ///
    /// This function is intended to be used by the SRF parser
-    pub fn parse(allocator: std.mem.Allocator, str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData {
+    pub fn parse(str: []const u8, state: *RecordIterator.State, delimiter: u8) ParseError!ValueWithMetaData {
        const type_val_sep_raw = std.mem.indexOfScalar(u8, str, ':');
        if (type_val_sep_raw == null) {
            try parseError("no type data or value after key", state);
@ -159,7 +160,11 @@ pub const Value = union(enum) {
            state.column += total_chars;
            state.partial_line_column += total_chars;
            return .{
-                .item_value = .{ .string = try dupe(allocator, state.options, val) },
+                .item_value = .{ .string = try dupe(
+                    state.*,
+                    val,
+                    .value,
+                ) },
            };
        }
        if (std.mem.eql(u8, "binary", trimmed_meta)) {
@ -179,11 +184,13 @@ pub const Value = union(enum) {
                    .error_parsing = true,
                };
            };
-            const data = try allocator.alloc(u8, size);
-            errdefer allocator.free(data);
+            const alloc = findAllocator(state.*, .value) orelse
+                try fallbackAllocatorFor(state);
+            const data = try alloc.alloc(u8, size);
+            errdefer alloc.free(data);
            Decoder.decode(data, val) catch {
                try parseError("error parsing base64 value", state);
-                allocator.free(data);
+                alloc.free(data);
                return .{
                    .item_value = null,
                    .error_parsing = true,
@ -271,12 +278,18 @@ pub const Value = union(enum) {
            // We fit on this line, everything is "normal"
            const val = rest_of_data[0..size];
            return .{
-                .item_value = .{ .string = val },
+                .item_value = .{ .string = try dupe(
+                    state.*,
+                    val,
+                    .value,
+                ) },
            };
        }
        // This is not enough, we need more data from the reader
-        const buf = try allocator.alloc(u8, size);
-        errdefer allocator.free(buf);
+        const alloc = findAllocator(state.*, .value) orelse
+            try fallbackAllocatorFor(state);
+        const buf = try alloc.alloc(u8, size);
+        errdefer alloc.free(buf);
        @memcpy(buf[0..rest_of_data.len], rest_of_data);
        // add back the newline we are skipping
        buf[rest_of_data.len] = '\n';
@ -296,6 +309,13 @@ pub const Value = union(enum) {
            .reader_advanced = true,
        };
    }
+    inline fn fallbackAllocatorFor(state: *RecordIterator.State) !std.mem.Allocator {
+        if (state.fallback_arena) |f| return f.allocator();
+        if (state.options.parse_allocator == .none) return error.AllocationRequired;
+        state.fallback_arena = try state.allocator.create(std.heap.ArenaAllocator);
+        state.fallback_arena.?.* = .init(state.allocator);
+        return state.fallback_arena.?.allocator();
+    }
 };

 /// A single key-value pair within a record. The key is always a string.
@ -412,6 +432,7 @@ pub const Record = struct {
            fields_allocated: [fields_len]bool = .{false} ** fields_len,
            allocator: std.mem.Allocator,
            source_value: T,
+            format_options: FormatOptions,
            cached_record: ?Record = null,

            const Self = @This();
@ -419,12 +440,13 @@ pub const Record = struct {

            pub const SourceType = T;

-            pub fn init(allocator: std.mem.Allocator, source: T) Self {
+            pub fn init(allocator: std.mem.Allocator, source: T, options: FormatOptions) Self {
                return .{
                    // SAFETY: fields_buf is set by record() and is guarded by fields_set
                    .fields_buf = undefined,
                    .allocator = allocator,
                    .source_value = source,
+                    .format_options = options,
                };
            }

@ -442,7 +464,7 @@ pub const Record = struct {
            ) !usize {
                if (default_value_ptr) |d| {
                    const default_val: *const field_type = @ptrCast(@alignCast(d));
-                    if (std.meta.eql(val, default_val.*)) return inx;
+                    if (!self.format_options.emit_default_values and std.meta.eql(val, default_val.*)) return inx;
                }
                const value = try self.formatField(field_type, field_name, val);
                self.fields_buf[inx] = .{
@ -512,7 +534,7 @@ pub const Record = struct {
                        const key = if (@hasDecl(U, "srf_tag_field"))
                            U.srf_tag_field
                        else
-                            "active_tag";
+                            "type";
                        self.fields_buf[inx] = .{
                            .key = key,
                            .value = .{ .string = active_tag_name },
@ -562,7 +584,13 @@ pub const Record = struct {
    ///
    /// Call `deinit()` to free any allocations made for custom-formatted fields.
    pub fn from(comptime T: type, allocator: std.mem.Allocator, val: T) !OwnedRecord(T) {
-        return OwnedRecord(T).init(allocator, val);
+        return OwnedRecord(T).init(allocator, val, .{});
+    }
+
+    /// Internal function to allow an OwnedRecord to see format options necessary
+    /// to emit default values
+    fn fromWithOptions(comptime T: type, allocator: std.mem.Allocator, val: T, options: FormatOptions) !OwnedRecord(T) {
+        return OwnedRecord(T).init(allocator, val, options);
    }

    /// Coerce a `Record` to a Zig struct or tagged union. For each field in `T`,
@ -573,7 +601,7 @@ pub const Record = struct {
    /// first value silently ignored.
    ///
    /// For tagged unions, the active variant is determined by a field named
-    /// `"active_tag"` (or the value of `T.srf_tag_field` if declared). The
+    /// `"type"` (or the value of `T.srf_tag_field` if declared). The
    /// remaining fields are coerced into the payload struct of that variant.
    ///
    /// For streaming data without collecting fields first, prefer
@ -607,7 +635,7 @@ pub const Record = struct {
                const active_tag_name = if (@hasDecl(T, "srf_tag_field"))
                    T.srf_tag_field
                else
-                    "active_tag";
+                    "type";
                if (self.firstFieldByName(active_tag_name)) |srf_field| {
                    if (srf_field.value == null or srf_field.value.? != .string)
                        return error.ActiveTagValueMustBeAString;
@ -691,6 +719,10 @@ pub const RecordIterator = struct {
        end_of_record_reached: bool = false,
        field_iterator: ?FieldIterator = null,

+        aa: std.mem.Allocator,
+        allocator: std.mem.Allocator,
+        fallback_arena: ?*std.heap.ArenaAllocator = null,
+
        /// Takes the next line, trimming leading whitespace and ignoring comments
        /// Directives (comments starting with #!) are preserved
        pub fn nextLine(state: *State) ?[]const u8 {
@ -790,7 +822,6 @@ pub const RecordIterator = struct {
        /// subsequent calls continue to return `null`.
        pub fn next(self: FieldIterator) !?Field {
            const state = self.state;
-            const aa = self.arena.allocator();
            // Main parsing. We already have the first line of data, which could
            // be a record (compact format) or a key/value pair (long format)

@ -829,7 +860,6 @@ pub const RecordIterator = struct {
            state.column += key.len + 1;
            state.partial_line_column += key.len + 1;
            const value = try Value.parse(
-                aa,
                it.rest(),
                state,
                state.field_delimiter,
@ -837,7 +867,10 @@ pub const RecordIterator = struct {

            var field: ?Field = null;
            if (!value.error_parsing) {
-                field = .{ .key = try dupe(aa, state.options, key), .value = value.item_value };
+                field = .{
+                    .key = try dupe(state.*, key, .key),
+                    .value = value.item_value,
+                };
            }

            if (value.reader_advanced and state.field_delimiter == ',') {
@ -900,7 +933,7 @@ pub const RecordIterator = struct {
        ///
        /// For tagged unions, the active tag field must appear first in the
        /// stream (unlike `Record.to` which can do random access). The tag
-        /// field name defaults to `"active_tag"` or `T.srf_tag_field` if
+        /// field name defaults to `"type"` or `T.srf_tag_field` if
        /// declared.
        pub fn to(self: FieldIterator, comptime T: type) !T {
            const ti = @typeInfo(T);
@ -957,7 +990,7 @@ pub const RecordIterator = struct {
                    const active_tag_name = if (@hasDecl(T, "srf_tag_field"))
                        T.srf_tag_field
                    else
-                        "active_tag";
+                        "type";
                    const first_try = try self.next();
                    if (first_try == null) return error.ActiveTagFieldNotFound;
                    const f = first_try.?;
@ -1018,9 +1051,9 @@ pub const RecordIterator = struct {
    /// use or refresh cached data. Note that data will be returned by parse/
    /// iterator regardless of freshness. This enables callers to use cached
    /// data temporarily while refreshing it
-    pub fn isFresh(self: RecordIterator) bool {
+    pub fn isFresh(self: RecordIterator, io: std.Io) bool {
        if (self.expires) |exp|
-            return std.time.timestamp() < exp;
+            return std.Io.Timestamp.now(io, .real).toSeconds() < exp;

        // no expiry: always fresh, never frozen
        return true;
@ -1038,7 +1071,7 @@ pub const RecordIterator = struct {
        defer ri.deinit();

        // No expiry set, so always fresh
-        try std.testing.expect(ri.isFresh());
+        try std.testing.expect(ri.isFresh(std.testing.io));
    }
 };

@ -1049,10 +1082,99 @@ pub const ParseOptions = struct {
    diagnostics: ?*Diagnostics = null,

    /// By default, the parser will copy data so it is safe to free the original
-    /// This will impose about 8% overhead, but be safer. If you do not require
-    /// this safety, set alloc_strings to false. Setting this to false is the
-    /// equivalent of the "Leaky" parsing functions of std.json
-    alloc_strings: bool = true,
+    /// buffer or use with streaming readers. This will impose about 8% overhead,
+    /// and ties the lifetime of any strings to the deinit() method. For
+    /// fixed buffer parsing, consider using .none, which will not allocate
+    /// strings. More complex use cases can use their own allocator for control
+    /// over string lifetime
+    parse_allocator: ParseAllocator = .parse_arena,
+};
+
+/// Allocator to use for parsing data
+pub const ParseAllocator = union(enum) {
+    /// Absolutely no allocation allowed. This will fail with AllocationRequired under the following circumstances:
+    ///
+    /// * binary data is encountered (needs decoding)
+    /// * multi-line string literals are encountered (allocation needed to accomodate streaming readers)
+    none,
+    /// No allocator. Lifetime of any data parsed is tied to the underlying
+    /// data passed to the reader. This is most appropriate when the caller
+    /// uses a fixed buffer, and is equivalent of the "Leaky" parsing
+    /// functions of std.json. IMPORTANT: This will NOT avoid all allocations.
+    /// Specifically binary data is base64 encoded per the spec and we need
+    /// to allocate space for the decode. Also, multi-line data can not be
+    /// assumed to be available post-reader advance, and therefore allocation
+    /// is performed in that case.
+    ///
+    /// For ABSOLUTELY NO ALLOCATION, use none. Otherwise, the Parsed
+    /// struct has a deinit function that frees everything, and toOwnedFallback
+    /// which will deinit the arena for parsing and return the fallback arena
+    /// that can be released at a later time
+    none_with_fallback,
+    /// Use the arena allocator created by the parser to copy any strings.
+    /// This ties the lifetime of any data parsed to the parser deinit()
+    /// function. Imposes about 8% overhead compared to "none".
+    parse_arena,
+    /// Parser will use the caller-supplied allocator, providing the most
+    /// flexibility over lifetime. Overhead will be contingent on the allocator
+    /// used. If the allocator is an arena allocator, assume 8% overhead over
+    /// "none". It is likely a fixed buffer allocator would be somewhat less.
+    custom: CustomParseAllocator,
+};
+
+/// Allocator to use for a specific scope (either keys or values). Different
+/// from parseAllocator because the custom variant here has to be a std.mem.Allocator
+pub const ScopeAllocator = union(enum) {
+    /// Absolutely no allocation allowed. This will fail with AllocationRequired under the following circumstances:
+    ///
+    /// * binary data is encountered (needs decoding)
+    /// * multi-line string literals are encountered (allocation needed to accomodate streaming readers)
+    none,
+    /// No allocator. Lifetime of any data parsed is tied to the underlying
+    /// data passed to the reader. This is most appropriate when the caller
+    /// uses a fixed buffer, and is equivalent of the "Leaky" parsing
+    /// functions of std.json. IMPORTANT: This will NOT avoid all allocations.
+    /// Specifically binary data is base64 encoded per the spec and we need
+    /// to allocate space for the decode. Also, multi-line data can not be
+    /// assumed to be available post-reader advance, and therefore allocation
+    /// is performed in that case.
+    ///
+    /// For ABSOLUTELY NO ALLOCATION, use none. Otherwise, the Parsed
+    /// struct has a deinit function that frees everything, and toOwnedFallback
+    /// which will deinit the arena for parsing and return the fallback arena
+    /// that can be released at a later time
+    none_with_fallback,
+    /// Use the arena allocator created by the parser to copy any strings.
+    /// This ties the lifetime of any data parsed to the parser deinit()
+    /// function. Imposes about 8% overhead compared to "none".
+    parse_arena,
+    /// Parser will use the caller-supplied allocator, providing the most
+    /// flexibility over lifetime. Overhead will be contingent on the allocator
+    /// used. If the allocator is an arena allocator, assume 8% overhead over
+    /// "none". It is likely a fixed buffer allocator would be somewhat less.
+    allocator: std.mem.Allocator,
+};
+pub const CustomParseAllocator = struct {
+    key_allocator: ScopeAllocator,
+    value_allocator: ScopeAllocator,
+
+    /// Initializes a custom parse allocator suitable for use in common workflows
+    /// where you iterate each record, then iterate through fields with full control
+    pub fn initIterator(allocator: std.mem.Allocator) CustomParseAllocator {
+        return .{
+            .key_allocator = .{ .allocator = allocator },
+            .value_allocator = .{ .allocator = allocator },
+        };
+    }
+
+    /// Initializes a custom parse allocator suitable for use in common workflows
+    /// where you iterate each record and call RecordIterator.to() on the result
+    pub fn initTo(allocator: std.mem.Allocator) CustomParseAllocator {
+        return .{
+            .key_allocator = .{ .none = {} },
+            .value_allocator = .{ .allocator = allocator },
+        };
+    }
 };

 const Directive = union(enum) {
@ -1118,6 +1240,9 @@ pub const FormatOptions = struct {
    /// and just format the record. This is useful for appending to an existing
    /// srf file rather than overwriting all the data
    emit_directives: bool = true,
+
+    /// When set to true, this will output all values, even if they are the default values
+    emit_default_values: bool = false,
 };

 /// Returns a `Formatter` for writing pre-built `Record` values to a writer.
@ -1150,7 +1275,12 @@ pub fn FromFormatter(comptime T: type) type {
            for (self.value) |item| {
                if (!first and self.options.long_format) try writer.writeByte('\n');
                first = false;
-                var owned_record = Record.from(T, self.allocator, item) catch
+                var owned_record = Record.fromWithOptions(
+                    T,
+                    self.allocator,
+                    item,
+                    self.options,
+                ) catch
                    return std.Io.Writer.Error.WriteFailed;
                defer owned_record.deinit();
                const record = owned_record.record() catch return std.Io.Writer.Error.WriteFailed;
@ -1226,8 +1356,9 @@ pub const RecordFormatter = struct {
                switch (f.value.?) {
                    .string => |s| {
                        const newlines = std.mem.containsAtLeastScalar(u8, s, 1, '\n');
+                        const commas = !self.options.long_format and std.mem.containsAtLeastScalar(u8, s, 1, ',');
                        // Output the count if newlines exist
-                        const count = if (newlines) s.len else null;
+                        const count = if (newlines or commas) s.len else null;
                        if (count) |c| try writer.print("{d}", .{c});
                        try writer.writeByte(':');
                        try writer.writeAll(s);
@ -1253,6 +1384,7 @@ pub const RecordFormatter = struct {
 pub const Parsed = struct {
    records: []Record,
    arena: *std.heap.ArenaAllocator,
+    fallback_arena: ?*std.heap.ArenaAllocator,

    /// optional expiry time for the data. Useful for caching
    /// Note that on a parse, data will always be returned and it will be up
@ -1271,10 +1403,26 @@ pub const Parsed = struct {
    /// record and field data. After calling `deinit`, any slices or string
    /// pointers obtained from `records` are invalid.
    pub fn deinit(self: Parsed) void {
+        self.toOwnedFallback().deinit();
+    }
+
+    pub fn toOwnedFallback(self: Parsed) FallbackArena {
        const ca = self.arena.child_allocator;
        self.arena.deinit();
        ca.destroy(self.arena);
+        return .{ .fallback_arena = self.fallback_arena };
    }
+
+    pub const FallbackArena = struct {
+        fallback_arena: ?*std.heap.ArenaAllocator,
+
+        pub fn deinit(self: FallbackArena) void {
+            if (self.fallback_arena) |f| {
+                f.deinit();
+                f.child_allocator.destroy(f);
+            }
+        }
+    };
 };

 /// Parses all records from the reader into memory, returning a `Parsed` struct
@ -1312,6 +1460,7 @@ pub fn parse(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: Pars
        .expires = it.expires,
        .created = it.created,
        .modified = it.modified,
+        .fallback_arena = it.state.fallback_arena,
    };
 }

@ -1344,12 +1493,13 @@ pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: P
    errdefer allocator.destroy(arena);
    arena.* = .init(allocator);
    errdefer arena.deinit();
-    const aa = arena.allocator();
-    const state = try aa.create(RecordIterator.State);
+    const state = try arena.allocator().create(RecordIterator.State);
    state.* = .{
        .reader = reader,
        .current_line = null,
        .options = options,
+        .aa = arena.allocator(),
+        .allocator = allocator,
    };
    var it: RecordIterator = .{
        .arena = arena,
@ -1390,12 +1540,32 @@ pub fn iterator(reader: *std.Io.Reader, allocator: std.mem.Allocator, options: P
    };
    return it; // with current_line
 }
-
-inline fn dupe(allocator: std.mem.Allocator, options: ParseOptions, data: []const u8) ParseError![]const u8 {
-    if (options.alloc_strings)
-        return try allocator.dupe(u8, data);
+const DataScope = enum {
+    key,
+    value,
+};
+inline fn dupe(state: RecordIterator.State, data: []const u8, scope: DataScope) ParseError![]const u8 {
+    if (findAllocator(state, scope)) |a|
+        return try a.dupe(u8, data);
    return data;
 }
+inline fn findAllocator(state: RecordIterator.State, scope: DataScope) ?std.mem.Allocator {
+    switch (state.options.parse_allocator) {
+        .none, .none_with_fallback => return null,
+        .parse_arena => return state.aa,
+        .custom => |a| {
+            const alloc = switch (scope) {
+                .key => a.key_allocator,
+                .value => a.value_allocator,
+            };
+            switch (alloc) {
+                .none, .none_with_fallback => return null,
+                .parse_arena => return state.aa,
+                .allocator => |c| return c,
+            }
+        },
+    }
+}
 /// Logs a parse error to diagnostics. Note that the allocator provided should
 /// *NOT* be an arena, as the message must outlive the parse results, which will
 /// be otherwise cleaned up in the arena deinit
@ -1756,6 +1926,48 @@ test "serialize/deserialize" {
    ;
    try std.testing.expectEqualStrings(expect, compact_from);
 }
+test "serialize/deserialize allows overflow lifetime semantics" {
+    const Data = struct {
+        foo: []const u8,
+        bar: u8,
+        qux: ?TestRecType = .foo,
+        b: bool = false,
+        f: f32 = 4.2,
+        custom: ?TestCustomType = null,
+    };
+
+    const compact =
+        \\#!srfv1
+        \\foo:binary:YmFy,bar:num:42
+        \\foo:binary:YmFy,bar:num:42
+        \\foo:binary:YmFy,bar:num:42,qux::bar
+        \\foo:binary:YmFy,bar:num:42,qux::bar,b:bool:true,f:num:6.9,custom:string:hi
+        \\
+    ;
+    // Round trip and make sure we get equivalent objects back
+    var compact_reader = std.Io.Reader.fixed(compact);
+    const parsed = try parse(
+        &compact_reader,
+        std.testing.allocator,
+        .{ .parse_allocator = .none_with_fallback },
+    );
+    try std.testing.expect(parsed.fallback_arena != null);
+
+    const rec1 = try parsed.records[0].to(Data);
+    const fallback = parsed.toOwnedFallback();
+    defer fallback.deinit();
+    // This would not be possible otherwise
+    try std.testing.expectEqualStrings("bar", rec1.foo);
+    try std.testing.expectEqual(@as(u8, 42), rec1.bar);
+    try std.testing.expectEqual(@as(TestRecType, .foo), rec1.qux);
+
+    var another_reader = std.Io.Reader.fixed(compact);
+    try std.testing.expectError(error.AllocationRequired, parse(
+        &another_reader,
+        std.testing.allocator,
+        .{ .parse_allocator = .none },
+    ));
+}
 test "conversion from string true/false to proper type" {
    const Data = struct {
        foo: []const u8,
@ -1842,8 +2054,8 @@ test "unions" {
    );
    const expect =
        \\#!srfv1
-        \\active_tag::foo,number:num:42,true_or_false:bool:true
-        \\active_tag::bar,sentence::foobar,decimal:num:6.9
+        \\type::foo,number:num:42,true_or_false:bool:true
+        \\type::bar,sentence::foobar,decimal:num:6.9
        \\
    ;
    try std.testing.expectEqualStrings(expect, compact_from);
@ -1966,6 +2178,80 @@ test iterator {
    // No more records
    try std.testing.expect(try ri.next() == null);
 }
+test "iterator with custom allocator" {
+    // Example: streaming through records and fields using the iterator API.
+    // This is the preferred parsing approach -- no intermediate slices are
+    // allocated for fields or records.
+    const data =
+        \\#!srfv1
+        \\name::alice,desc:5:world
+    ;
+    const allocator = std.testing.allocator;
+    var reader = std.Io.Reader.fixed(data);
+    var ri = try iterator(
+        &reader,
+        allocator,
+        .{
+            .parse_allocator = .{ .custom = .initIterator(std.testing.allocator) },
+        },
+    );
+    defer ri.deinit();
+
+    // Advance to the first (and only) record
+    const fi = (try ri.next()).?;
+
+    // Iterate fields within the record
+    const field1 = (try fi.next()).?;
+    defer allocator.free(field1.key);
+    defer allocator.free(field1.value.?.string);
+    try std.testing.expectEqualStrings("name", field1.key);
+    try std.testing.expectEqualStrings("alice", field1.value.?.string);
+    const field2 = (try fi.next()).?;
+    defer allocator.free(field2.key);
+    defer allocator.free(field2.value.?.string);
+    try std.testing.expectEqualStrings("desc", field2.key);
+    try std.testing.expectEqualStrings("world", field2.value.?.string);
+
+    // No more fields in this record
+    try std.testing.expect(try fi.next() == null);
+    // No more records
+    try std.testing.expect(try ri.next() == null);
+}
+test "iterator with custom allocator - to() pattern" {
+    // const ll = std.testing.log_level;
+    // std.testing.log_level = .debug;
+    // defer std.testing.log_level = ll;
+    // Example: streaming through records and fields using the iterator API.
+    // This is the preferred parsing approach -- no intermediate slices are
+    // allocated for fields or records.
+    const data =
+        \\#!srfv1
+        \\name::alice,desc:5:world
+    ;
+    const allocator = std.testing.allocator;
+    var reader = std.Io.Reader.fixed(data);
+    var ri = try iterator(
+        &reader,
+        allocator,
+        .{
+            .parse_allocator = .{ .custom = .initTo(std.testing.allocator) },
+        },
+    );
+    defer ri.deinit();
+
+    // Advance to the first (and only) record
+    const fi = (try ri.next()).?;
+    const rec = try fi.to(struct { name: []const u8, desc: []const u8 });
+    defer allocator.free(rec.name);
+    defer allocator.free(rec.desc);
+    try std.testing.expectEqualStrings("alice", rec.name);
+    try std.testing.expectEqualStrings("world", rec.desc);
+
+    // No more fields in this record
+    try std.testing.expect(try fi.next() == null);
+    // No more records
+    try std.testing.expect(try ri.next() == null);
+}
 test parse {
    // Example: batch parsing collects all records and fields into slices.
    // Prefer `iterator` for streaming; use `parse` when random access to
@ -2016,6 +2302,48 @@ test fmtFrom {
        \\
    , result);
 }
+test "fmtFrom commas" {
+    // Example: serialize typed Zig values directly to SRF format.
+    const Data = struct {
+        name: []const u8 = "bob",
+        age: u8,
+    };
+    const values: []const Data = &.{
+        .{ .name = "alice, yo", .age = 30 },
+    };
+    var buf: [4096]u8 = undefined;
+    const result = try std.fmt.bufPrint(
+        &buf,
+        "{f}",
+        .{fmtFrom(Data, std.testing.allocator, values, .{})},
+    );
+    try std.testing.expectEqualStrings(
+        \\#!srfv1
+        \\name:9:alice, yo,age:num:30
+        \\
+    , result);
+}
+test "fmtFrom outputs defaults with option" {
+    // Example: serialize typed Zig values directly to SRF format.
+    const Data = struct {
+        name: []const u8 = "bob",
+        age: u8,
+    };
+    const values: []const Data = &.{
+        .{ .age = 30 },
+    };
+    var buf: [4096]u8 = undefined;
+    const result = try std.fmt.bufPrint(
+        &buf,
+        "{f}",
+        .{fmtFrom(Data, std.testing.allocator, values, .{ .emit_default_values = true })},
+    );
+    try std.testing.expectEqualStrings(
+        \\#!srfv1
+        \\name::bob,age:num:30
+        \\
+    , result);
+}
 test "parse with diagnostics" {
    // Example: batch parsing collects all records and fields into slices.
    // Prefer `iterator` for streaming; use `parse` when random access to
Author	SHA1	Message	Date
Emil Lerch	e21a7308c3	address all allocation edge cases All checks were successful Generic zig build / build (push) Successful in 59s Details	2026-05-27 17:04:26 -07:00
Emil Lerch	e6b6691a05	allow consumers to control allocation more granularly All checks were successful Generic zig build / build (push) Successful in 21s Details	2026-05-27 13:38:38 -07:00
Emil Lerch	12b755660e	allow outputting default values All checks were successful Generic zig build / build (push) Successful in 30s Details	2026-05-25 13:13:36 -07:00
Emil Lerch	a99adb0b29	handle commas in compact form	2026-05-25 13:03:59 -07:00
Emil Lerch	19246e5f83	switch active_tag default to type, which makes more sense from user perspective	2026-05-25 13:02:41 -07:00
Emil Lerch	512eab0db0	upgrade to zig 0.16.0 All checks were successful Generic zig build / build (push) Successful in 25s Details	2026-04-14 14:30:55 -07:00