zfin/src/history.zig

550 lines
22 KiB
Zig

//! History IO — read `history/<date>-portfolio.srf` files produced by
//! `zfin snapshot` back into typed `Snapshot` structs.
//!
//! Two layers, both pure of rendering concerns:
//!
//! - `parseSnapshotBytes(bytes)` — parse an SRF blob into a `Snapshot`.
//! The snapshot's string fields slice directly into `bytes`, so the
//! caller MUST keep that buffer alive as long as the snapshot.
//! - `loadHistoryDir(dir)` — enumerate `*-portfolio.srf` in a directory
//! and parse each. The returned `LoadedHistory` owns both the
//! snapshots and their backing byte buffers as matched pairs.
//!
//! The snapshot reader is discriminator-driven: every record must carry
//! a `kind::<meta|total|tax_type|account|lot>` field. Records whose
//! `kind` is set to something this version doesn't recognize are
//! skipped (forward-compatibility). Malformed records — missing `kind`,
//! missing required fields within a known kind, coercion failures — are
//! treated as parse errors, not silently dropped.
//!
//! Lives at `src/history.zig` rather than `src/commands/history.zig`
//! because the IO is used by more than the CLI history command (the
//! rollup builder, future TUI history tab, and any external consumer
//! all go through here). The command module stays a thin CLI wrapper.
const std = @import("std");
const srf = @import("srf");
const snapshot_mod = @import("models/snapshot.zig");
const Date = @import("models/date.zig").Date;
const timeline = @import("analytics/timeline.zig");
pub const Error = error{
/// The file didn't open a `#!srfv1` directive or couldn't be
/// iterated as SRF.
InvalidSrf,
/// The file parsed as SRF but had no `kind::meta` record, so we
/// can't identify it as a snapshot.
NoMetaRecord,
/// Allocator returned OOM somewhere during parsing.
OutOfMemory,
};
/// Suffix that identifies snapshot files in `history/` directory.
pub const snapshot_suffix = "-portfolio.srf";
// ── Single-file parsing ──────────────────────────────────────
/// Parse an SRF blob into a `Snapshot`. The snapshot's string fields
/// borrow directly from `bytes` (zero-copy), so the caller MUST keep
/// `bytes` alive for at least as long as the returned snapshot.
///
/// Typical call pattern:
/// ```
/// const bytes = try readFileAlloc(...);
/// defer allocator.free(bytes);
/// var snap = try parseSnapshotBytes(allocator, bytes);
/// defer snap.deinit(allocator);
/// ```
pub fn parseSnapshotBytes(
allocator: std.mem.Allocator,
bytes: []const u8,
) Error!snapshot_mod.Snapshot {
var reader = std.Io.Reader.fixed(bytes);
// `alloc_strings = false` tells srf to return string values as
// slices into `bytes` rather than duping into its own arena.
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidSrf;
defer it.deinit();
var meta_opt: ?snapshot_mod.MetaRow = null;
var totals: std.ArrayList(snapshot_mod.TotalRow) = .empty;
errdefer totals.deinit(allocator);
var taxes: std.ArrayList(snapshot_mod.TaxTypeRow) = .empty;
errdefer taxes.deinit(allocator);
var accounts: std.ArrayList(snapshot_mod.AccountRow) = .empty;
errdefer accounts.deinit(allocator);
var lots: std.ArrayList(snapshot_mod.LotRow) = .empty;
errdefer lots.deinit(allocator);
while (it.next() catch return error.InvalidSrf) |field_it| {
// `to(SnapshotRecord)` reads the `kind` discriminator first, then
// coerces the remaining fields into the matching variant struct.
//
// We skip ONLY `ActiveTagDoesNotExist` — that's the genuine
// forward-compatibility case (a future snapshot version wrote a
// record kind we don't know about). Every other srf error
// indicates malformed data in a record we SHOULD understand, so
// we propagate it up rather than silently losing rows.
const rec = field_it.to(SnapshotRecord) catch |err| switch (err) {
error.ActiveTagDoesNotExist => continue,
else => return error.InvalidSrf,
};
switch (rec) {
.meta => |m| {
if (meta_opt == null) meta_opt = m;
},
.total => |r| try totals.append(allocator, r),
.tax_type => |r| try taxes.append(allocator, r),
.account => |r| try accounts.append(allocator, r),
.lot => |r| try lots.append(allocator, r),
}
}
const meta = meta_opt orelse return error.NoMetaRecord;
return .{
.meta = meta,
.totals = try totals.toOwnedSlice(allocator),
.tax_types = try taxes.toOwnedSlice(allocator),
.accounts = try accounts.toOwnedSlice(allocator),
.lots = try lots.toOwnedSlice(allocator),
};
}
/// Discriminated snapshot record. SRF's `FieldIterator.to(T)` dispatches
/// on the `kind` field (per `srf_tag_field`), consumes it, and then
/// coerces remaining fields into the matching variant struct. Variant
/// names here MUST match the wire-format `kind` values literally.
const SnapshotRecord = union(enum) {
meta: snapshot_mod.MetaRow,
total: snapshot_mod.TotalRow,
tax_type: snapshot_mod.TaxTypeRow,
account: snapshot_mod.AccountRow,
lot: snapshot_mod.LotRow,
pub const srf_tag_field = "kind";
};
// ── Directory loading ────────────────────────────────────────
/// Result of `loadHistoryDir` — caller owns.
///
/// Holds snapshots and their backing byte buffers as parallel slices
/// (same length, matched by index). The buffers are kept alive here
/// because each snapshot borrows strings from its corresponding buffer.
/// `deinit` frees both in the right order.
pub const LoadedHistory = struct {
snapshots: []snapshot_mod.Snapshot,
/// Per-snapshot backing buffers, parallel to `snapshots`. Empty
/// slice when `snapshots` is empty.
buffers: [][]u8,
allocator: std.mem.Allocator,
pub fn deinit(self: *LoadedHistory) void {
for (self.snapshots) |*s| s.deinit(self.allocator);
self.allocator.free(self.snapshots);
for (self.buffers) |b| self.allocator.free(b);
self.allocator.free(self.buffers);
}
};
/// Enumerate `*-portfolio.srf` in `history_dir` and parse each into a
/// `Snapshot`. Files that fail to parse are skipped with a stderr
/// warning; callers get back only the ones that loaded cleanly.
///
/// Returned snapshots are in filesystem enumeration order — NOT sorted.
/// Consumers that want chronological order should feed through
/// `analytics.timeline.buildSeries` (which sorts) rather than relying
/// on the loader's order.
pub fn loadHistoryDir(
allocator: std.mem.Allocator,
history_dir: []const u8,
) !LoadedHistory {
var dir = std.fs.cwd().openDir(history_dir, .{ .iterate = true }) catch |err| switch (err) {
error.FileNotFound => {
// Missing history dir isn't fatal — it just means no
// snapshots captured yet.
return .{ .snapshots = &.{}, .buffers = &.{}, .allocator = allocator };
},
else => return err,
};
defer dir.close();
var snapshots: std.ArrayList(snapshot_mod.Snapshot) = .empty;
var buffers: std.ArrayList([]u8) = .empty;
errdefer {
for (snapshots.items) |*s| s.deinit(allocator);
snapshots.deinit(allocator);
for (buffers.items) |b| allocator.free(b);
buffers.deinit(allocator);
}
var it = dir.iterate();
while (try it.next()) |entry| {
if (entry.kind != .file) continue;
if (!std.mem.endsWith(u8, entry.name, snapshot_suffix)) continue;
const full_path = try std.fs.path.join(allocator, &.{ history_dir, entry.name });
defer allocator.free(full_path);
const bytes = std.fs.cwd().readFileAlloc(allocator, full_path, 16 * 1024 * 1024) catch |err| {
std.log.warn("history: failed to read {s}: {s}", .{ full_path, @errorName(err) });
continue;
};
// `bytes` is freed either by LoadedHistory.deinit on success or
// by the branch below on parse failure — no defer-free here.
const snap = parseSnapshotBytes(allocator, bytes) catch |err| {
std.log.warn("history: failed to parse {s}: {s}", .{ full_path, @errorName(err) });
allocator.free(bytes);
continue;
};
try snapshots.append(allocator, snap);
try buffers.append(allocator, bytes);
}
return .{
.snapshots = try snapshots.toOwnedSlice(allocator),
.buffers = try buffers.toOwnedSlice(allocator),
.allocator = allocator,
};
}
/// Derive `<dirname(portfolio_path)>/history` and return the joined
/// path (caller-owned). Thin helper, but exposed so CLI and TUI agree
/// on the convention (history/ is always a sibling of portfolio.srf).
pub fn deriveHistoryDir(
allocator: std.mem.Allocator,
portfolio_path: []const u8,
) ![]u8 {
const portfolio_dir = std.fs.path.dirname(portfolio_path) orelse ".";
return std.fs.path.join(allocator, &.{ portfolio_dir, "history" });
}
/// Result of `loadTimeline` — bundles the raw snapshot collection and
/// the derived timeline series so callers can reach either without
/// re-parsing.
///
/// `series.points` is sorted ascending by date; `loaded.snapshots` is
/// in filesystem enumeration order. Both are kept alive together —
/// `series.points` references dates that live inside `loaded`'s
/// snapshot rows, and the callers may want `loaded.snapshots` directly
/// for non-timeline uses (e.g. rollup building).
pub const LoadedTimeline = struct {
loaded: LoadedHistory,
series: timeline.TimelineSeries,
/// Directory we loaded from, caller-owned. Carried through for
/// callers that want to print diagnostics or locate sibling files
/// (rollup.srf, etc.).
history_dir: []u8,
allocator: std.mem.Allocator,
pub fn deinit(self: *LoadedTimeline) void {
self.series.deinit();
self.loaded.deinit();
self.allocator.free(self.history_dir);
}
};
/// End-to-end snapshot timeline loader: derives history/, reads every
/// `*-portfolio.srf` file, and builds the sorted timeline series. The
/// single entry point used by both the CLI `zfin history` command and
/// the TUI history tab — their earlier copies had subtle divergences
/// (different dir-split logic, slightly different empty-state ordering)
/// that a shared helper rules out.
///
/// Returns `loaded.snapshots.len == 0` on an empty history dir rather
/// than erroring; callers check and produce their own "no snapshots"
/// message. Parse failures on individual files are logged to stderr by
/// `loadHistoryDir` and the offending file is skipped.
pub fn loadTimeline(
allocator: std.mem.Allocator,
portfolio_path: []const u8,
) !LoadedTimeline {
const history_dir = try deriveHistoryDir(allocator, portfolio_path);
errdefer allocator.free(history_dir);
var loaded = try loadHistoryDir(allocator, history_dir);
errdefer loaded.deinit();
const series = try timeline.buildSeries(allocator, loaded.snapshots);
return .{
.loaded = loaded,
.series = series,
.history_dir = history_dir,
.allocator = allocator,
};
}
// ── Tests ────────────────────────────────────────────────────
const testing = std.testing;
/// Test helper: dupe a string literal into testing.allocator and parse
/// it. Returns the snapshot AND the owned bytes so the test can free
/// both in the correct order (snapshot first, then bytes).
const ParsedLiteral = struct {
snap: snapshot_mod.Snapshot,
bytes: []u8,
fn deinit(self: *ParsedLiteral) void {
self.snap.deinit(testing.allocator);
testing.allocator.free(self.bytes);
}
};
fn parseLiteral(input: []const u8) !ParsedLiteral {
const bytes = try testing.allocator.dupe(u8, input);
errdefer testing.allocator.free(bytes);
const snap = try parseSnapshotBytes(testing.allocator, bytes);
return .{ .snap = snap, .bytes = bytes };
}
test "parseSnapshotBytes: minimal meta + totals round-trip" {
const input =
\\#!srfv1
\\#!created=1700000000
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:1700000000,zfin_version::v0.1.0,stale_count:num:0
\\kind::total,scope::net_worth,value:num:1000
\\kind::total,scope::liquid,value:num:800
\\kind::total,scope::illiquid,value:num:200
\\
;
var parsed = try parseLiteral(input);
defer parsed.deinit();
const snap = parsed.snap;
// Note: `snap.meta.kind` is `""` post-parse — the `kind` discriminator
// is consumed by union dispatch (see `SnapshotRecord`). The union tag
// is the source of truth for record type, not `.kind`.
try testing.expectEqual(@as(u32, 1), snap.meta.snapshot_version);
try testing.expect(snap.meta.as_of_date.eql(Date.fromYmd(2026, 4, 17)));
try testing.expectEqualStrings("v0.1.0", snap.meta.zfin_version);
try testing.expectEqual(@as(i64, 1_700_000_000), snap.meta.captured_at);
try testing.expectEqual(@as(usize, 0), snap.meta.stale_count);
try testing.expect(snap.meta.quote_date_min == null);
try testing.expect(snap.meta.quote_date_max == null);
try testing.expectEqual(@as(usize, 3), snap.totals.len);
try testing.expectEqualStrings("net_worth", snap.totals[0].scope);
try testing.expectEqual(@as(f64, 1000), snap.totals[0].value);
try testing.expectEqualStrings("liquid", snap.totals[1].scope);
try testing.expectEqual(@as(f64, 800), snap.totals[1].value);
try testing.expectEqualStrings("illiquid", snap.totals[2].scope);
try testing.expectEqual(@as(f64, 200), snap.totals[2].value);
try testing.expectEqual(@as(usize, 0), snap.tax_types.len);
try testing.expectEqual(@as(usize, 0), snap.accounts.len);
try testing.expectEqual(@as(usize, 0), snap.lots.len);
}
test "parseSnapshotBytes: with tax_type, account, and lot records" {
const input =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
\\kind::total,scope::net_worth,value:num:1500
\\kind::tax_type,label::Taxable,value:num:1000
\\kind::tax_type,label::Roth (Post-Tax),value:num:500
\\kind::account,name::Emil Roth,value:num:800
\\kind::lot,symbol::VTI,lot_symbol::VTI,account::Emil Roth,security_type::Stock,shares:num:10,open_price:num:200,cost_basis:num:2000,value:num:2500,price:num:250,quote_date::2026-04-17
\\
;
var parsed = try parseLiteral(input);
defer parsed.deinit();
const snap = parsed.snap;
try testing.expectEqual(@as(usize, 2), snap.tax_types.len);
try testing.expectEqualStrings("Taxable", snap.tax_types[0].label);
try testing.expectEqualStrings("Roth (Post-Tax)", snap.tax_types[1].label);
try testing.expectEqual(@as(usize, 1), snap.accounts.len);
try testing.expectEqualStrings("Emil Roth", snap.accounts[0].name);
try testing.expectEqual(@as(f64, 800), snap.accounts[0].value);
try testing.expectEqual(@as(usize, 1), snap.lots.len);
try testing.expectEqualStrings("VTI", snap.lots[0].symbol);
try testing.expect(snap.lots[0].price != null);
try testing.expectEqual(@as(f64, 250), snap.lots[0].price.?);
try testing.expect(snap.lots[0].quote_date != null);
try testing.expect(snap.lots[0].quote_date.?.eql(Date.fromYmd(2026, 4, 17)));
try testing.expect(!snap.lots[0].quote_stale);
}
test "parseSnapshotBytes: lot with stale flag and optional price absent" {
const input =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:1
\\kind::lot,symbol::CASH,lot_symbol::CASH,account::Checking,security_type::Cash,shares:num:500,open_price:num:0,cost_basis:num:0,value:num:500
\\kind::lot,symbol::OLDQ,lot_symbol::OLDQ,account::IRA,security_type::Stock,shares:num:1,open_price:num:100,cost_basis:num:100,value:num:95,price:num:95,quote_date::2026-04-15,quote_stale:bool:true
\\
;
var parsed = try parseLiteral(input);
defer parsed.deinit();
const snap = parsed.snap;
try testing.expectEqual(@as(usize, 2), snap.lots.len);
// Cash: no price, no quote_date, no stale flag
try testing.expect(snap.lots[0].price == null);
try testing.expect(snap.lots[0].quote_date == null);
try testing.expect(!snap.lots[0].quote_stale);
// Stale stock lot
try testing.expect(snap.lots[1].quote_stale);
try testing.expect(snap.lots[1].quote_date != null);
try testing.expect(snap.lots[1].quote_date.?.eql(Date.fromYmd(2026, 4, 15)));
}
test "parseSnapshotBytes: quote_date_min/max in meta round-trip" {
const input =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,quote_date_min::2026-04-14,quote_date_max::2026-04-17,stale_count:num:3
\\
;
var parsed = try parseLiteral(input);
defer parsed.deinit();
const snap = parsed.snap;
try testing.expect(snap.meta.quote_date_min != null);
try testing.expect(snap.meta.quote_date_min.?.eql(Date.fromYmd(2026, 4, 14)));
try testing.expect(snap.meta.quote_date_max != null);
try testing.expect(snap.meta.quote_date_max.?.eql(Date.fromYmd(2026, 4, 17)));
try testing.expectEqual(@as(usize, 3), snap.meta.stale_count);
}
test "parseSnapshotBytes: unknown kind is silently skipped" {
const input =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
\\kind::future_extension,some_field::some_value
\\kind::total,scope::net_worth,value:num:100
\\
;
var parsed = try parseLiteral(input);
defer parsed.deinit();
const snap = parsed.snap;
try testing.expectEqual(@as(usize, 1), snap.totals.len);
}
test "parseSnapshotBytes: record without kind field is a parse error" {
// A record missing the `kind` discriminator is malformed data, not
// forward-compat. We must not silently drop it.
const input =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
\\random_field::random_value
\\kind::total,scope::net_worth,value:num:100
\\
;
try testing.expectError(error.InvalidSrf, parseLiteral(input));
}
test "parseSnapshotBytes: missing meta record returns error" {
const input =
\\#!srfv1
\\kind::total,scope::net_worth,value:num:100
\\
;
try testing.expectError(error.NoMetaRecord, parseLiteral(input));
}
test "parseSnapshotBytes: totally malformed input returns error" {
// Not valid srf at all.
const input = "this is not srf data\x00\xff\x00";
const result = parseLiteral(input);
// Either InvalidSrf (iterator failed) or NoMetaRecord (iterator
// returned nothing). Both are acceptable failure modes; the test
// just asserts we don't panic or succeed.
try testing.expect(std.meta.isError(result));
}
test "loadHistoryDir: missing directory returns empty result" {
// No dir created; should silently yield an empty list rather than
// raising FileNotFound to the caller.
var result = try loadHistoryDir(testing.allocator, "/nonexistent/path/for/testing");
defer result.deinit();
try testing.expectEqual(@as(usize, 0), result.snapshots.len);
}
test "loadHistoryDir: loads snapshots and skips non-matching files" {
var tmp_dir = testing.tmpDir(.{});
defer tmp_dir.cleanup();
// Seed three files:
// 2026-04-17-portfolio.srf — valid
// 2026-04-18-portfolio.srf — valid
// readme.txt — non-matching extension, should be skipped
const snap_bytes =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
\\kind::total,scope::net_worth,value:num:1000
\\
;
const snap2_bytes =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-18,captured_at:num:0,zfin_version::x,stale_count:num:0
\\kind::total,scope::net_worth,value:num:1100
\\
;
{
var f = try tmp_dir.dir.createFile("2026-04-17-portfolio.srf", .{});
try f.writeAll(snap_bytes);
f.close();
}
{
var f = try tmp_dir.dir.createFile("2026-04-18-portfolio.srf", .{});
try f.writeAll(snap2_bytes);
f.close();
}
{
var f = try tmp_dir.dir.createFile("readme.txt", .{});
try f.writeAll("not a snapshot");
f.close();
}
var path_buf: [std.fs.max_path_bytes]u8 = undefined;
const dir_path = try tmp_dir.dir.realpath(".", &path_buf);
var result = try loadHistoryDir(testing.allocator, dir_path);
defer result.deinit();
try testing.expectEqual(@as(usize, 2), result.snapshots.len);
// Each loaded snapshot has a meta and one total. Values differ so we
// can tell them apart regardless of filesystem enumeration order.
var saw_1000 = false;
var saw_1100 = false;
for (result.snapshots) |s| {
try testing.expectEqual(@as(usize, 1), s.totals.len);
if (s.totals[0].value == 1000) saw_1000 = true;
if (s.totals[0].value == 1100) saw_1100 = true;
}
try testing.expect(saw_1000);
try testing.expect(saw_1100);
}
test "loadHistoryDir: corrupt files are skipped, others still load" {
var tmp_dir = testing.tmpDir(.{});
defer tmp_dir.cleanup();
const good_bytes =
\\#!srfv1
\\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
\\
;
{
var f = try tmp_dir.dir.createFile("2026-04-17-portfolio.srf", .{});
try f.writeAll(good_bytes);
f.close();
}
{
var f = try tmp_dir.dir.createFile("2026-04-18-portfolio.srf", .{});
try f.writeAll("totally-not-srf\n");
f.close();
}
var path_buf: [std.fs.max_path_bytes]u8 = undefined;
const dir_path = try tmp_dir.dir.realpath(".", &path_buf);
var result = try loadHistoryDir(testing.allocator, dir_path);
defer result.deinit();
// Only the good one lands.
try testing.expectEqual(@as(usize, 1), result.snapshots.len);
}