zfin/src/history.zig

//! History IO — read `history/<date>-portfolio.srf` files produced by
//! `zfin snapshot` back into typed `Snapshot` structs.
//!
//! Two layers, both pure of rendering concerns:
//!
//! - `parseSnapshotBytes(bytes)` — parse an SRF blob into a `Snapshot`.
//!   The snapshot's string fields slice directly into `bytes`, so the
//!   caller MUST keep that buffer alive as long as the snapshot.
//! - `loadHistoryDir(dir)` — enumerate `*-portfolio.srf` in a directory
//!   and parse each. The returned `LoadedHistory` owns both the
//!   snapshots and their backing byte buffers as matched pairs.
//!
//! The snapshot reader is discriminator-driven: every record must carry
//! a `kind::<meta|total|tax_type|account|lot>` field. Records whose
//! `kind` is set to something this version doesn't recognize are
//! skipped (forward-compatibility). Malformed records — missing `kind`,
//! missing required fields within a known kind, coercion failures — are
//! treated as parse errors, not silently dropped.
//!
//! Lives at `src/history.zig` rather than `src/commands/history.zig`
//! because the IO is used by more than the CLI history command (the
//! rollup builder, future TUI history tab, and any external consumer
//! all go through here). The command module stays a thin CLI wrapper.

const std = @import("std");
const srf = @import("srf");
const snapshot_mod = @import("models/snapshot.zig");
const Date = @import("models/date.zig").Date;
const timeline = @import("analytics/timeline.zig");

pub const Error = error{
    /// The file didn't open a `#!srfv1` directive or couldn't be
    /// iterated as SRF.
    InvalidSrf,
    /// The file parsed as SRF but had no `kind::meta` record, so we
    /// can't identify it as a snapshot.
    NoMetaRecord,
    /// Allocator returned OOM somewhere during parsing.
    OutOfMemory,
};

/// Suffix that identifies snapshot files in `history/` directory.
pub const snapshot_suffix = "-portfolio.srf";

// ── Single-file parsing ──────────────────────────────────────

/// Parse an SRF blob into a `Snapshot`. The snapshot's string fields
/// borrow directly from `bytes` (zero-copy), so the caller MUST keep
/// `bytes` alive for at least as long as the returned snapshot.
///
/// Typical call pattern:
/// ```
/// const bytes = try readFileAlloc(...);
/// defer allocator.free(bytes);
/// var snap = try parseSnapshotBytes(allocator, bytes);
/// defer snap.deinit(allocator);
/// ```
pub fn parseSnapshotBytes(
    allocator: std.mem.Allocator,
    bytes: []const u8,
) Error!snapshot_mod.Snapshot {
    var reader = std.Io.Reader.fixed(bytes);
    // `alloc_strings = false` tells srf to return string values as
    // slices into `bytes` rather than duping into its own arena.
    var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidSrf;
    defer it.deinit();

    var meta_opt: ?snapshot_mod.MetaRow = null;
    var totals: std.ArrayList(snapshot_mod.TotalRow) = .empty;
    errdefer totals.deinit(allocator);
    var taxes: std.ArrayList(snapshot_mod.TaxTypeRow) = .empty;
    errdefer taxes.deinit(allocator);
    var accounts: std.ArrayList(snapshot_mod.AccountRow) = .empty;
    errdefer accounts.deinit(allocator);
    var lots: std.ArrayList(snapshot_mod.LotRow) = .empty;
    errdefer lots.deinit(allocator);

    while (it.next() catch return error.InvalidSrf) |field_it| {
        // `to(SnapshotRecord)` reads the `kind` discriminator first, then
        // coerces the remaining fields into the matching variant struct.
        //
        // We skip ONLY `ActiveTagDoesNotExist` — that's the genuine
        // forward-compatibility case (a future snapshot version wrote a
        // record kind we don't know about). Every other srf error
        // indicates malformed data in a record we SHOULD understand, so
        // we propagate it up rather than silently losing rows.
        const rec = field_it.to(SnapshotRecord) catch |err| switch (err) {
            error.ActiveTagDoesNotExist => continue,
            else => return error.InvalidSrf,
        };
        switch (rec) {
            .meta => |m| {
                if (meta_opt == null) meta_opt = m;
            },
            .total => |r| try totals.append(allocator, r),
            .tax_type => |r| try taxes.append(allocator, r),
            .account => |r| try accounts.append(allocator, r),
            .lot => |r| try lots.append(allocator, r),
        }
    }

    const meta = meta_opt orelse return error.NoMetaRecord;

    return .{
        .meta = meta,
        .totals = try totals.toOwnedSlice(allocator),
        .tax_types = try taxes.toOwnedSlice(allocator),
        .accounts = try accounts.toOwnedSlice(allocator),
        .lots = try lots.toOwnedSlice(allocator),
    };
}

/// Discriminated snapshot record. SRF's `FieldIterator.to(T)` dispatches
/// on the `kind` field (per `srf_tag_field`), consumes it, and then
/// coerces remaining fields into the matching variant struct. Variant
/// names here MUST match the wire-format `kind` values literally.
const SnapshotRecord = union(enum) {
    meta: snapshot_mod.MetaRow,
    total: snapshot_mod.TotalRow,
    tax_type: snapshot_mod.TaxTypeRow,
    account: snapshot_mod.AccountRow,
    lot: snapshot_mod.LotRow,

    pub const srf_tag_field = "kind";
};

// ── Directory loading ────────────────────────────────────────

/// Result of `loadHistoryDir` — caller owns.
///
/// Holds snapshots and their backing byte buffers as parallel slices
/// (same length, matched by index). The buffers are kept alive here
/// because each snapshot borrows strings from its corresponding buffer.
/// `deinit` frees both in the right order.
pub const LoadedHistory = struct {
    snapshots: []snapshot_mod.Snapshot,
    /// Per-snapshot backing buffers, parallel to `snapshots`. Empty
    /// slice when `snapshots` is empty.
    buffers: [][]u8,
    allocator: std.mem.Allocator,

    pub fn deinit(self: *LoadedHistory) void {
        for (self.snapshots) |*s| s.deinit(self.allocator);
        self.allocator.free(self.snapshots);
        for (self.buffers) |b| self.allocator.free(b);
        self.allocator.free(self.buffers);
    }
};

/// Enumerate `*-portfolio.srf` in `history_dir` and parse each into a
/// `Snapshot`. Files that fail to parse are skipped with a stderr
/// warning; callers get back only the ones that loaded cleanly.
///
/// Returned snapshots are in filesystem enumeration order — NOT sorted.
/// Consumers that want chronological order should feed through
/// `analytics.timeline.buildSeries` (which sorts) rather than relying
/// on the loader's order.
pub fn loadHistoryDir(
    allocator: std.mem.Allocator,
    history_dir: []const u8,
) !LoadedHistory {
    var dir = std.fs.cwd().openDir(history_dir, .{ .iterate = true }) catch |err| switch (err) {
        error.FileNotFound => {
            // Missing history dir isn't fatal — it just means no
            // snapshots captured yet.
            return .{ .snapshots = &.{}, .buffers = &.{}, .allocator = allocator };
        },
        else => return err,
    };
    defer dir.close();

    var snapshots: std.ArrayList(snapshot_mod.Snapshot) = .empty;
    var buffers: std.ArrayList([]u8) = .empty;
    errdefer {
        for (snapshots.items) |*s| s.deinit(allocator);
        snapshots.deinit(allocator);
        for (buffers.items) |b| allocator.free(b);
        buffers.deinit(allocator);
    }

    var it = dir.iterate();
    while (try it.next()) |entry| {
        if (entry.kind != .file) continue;
        if (!std.mem.endsWith(u8, entry.name, snapshot_suffix)) continue;

        const full_path = try std.fs.path.join(allocator, &.{ history_dir, entry.name });
        defer allocator.free(full_path);

        const bytes = std.fs.cwd().readFileAlloc(allocator, full_path, 16 * 1024 * 1024) catch |err| {
            std.log.warn("history: failed to read {s}: {s}", .{ full_path, @errorName(err) });
            continue;
        };
        // `bytes` is freed either by LoadedHistory.deinit on success or
        // by the branch below on parse failure — no defer-free here.
        const snap = parseSnapshotBytes(allocator, bytes) catch |err| {
            std.log.warn("history: failed to parse {s}: {s}", .{ full_path, @errorName(err) });
            allocator.free(bytes);
            continue;
        };
        try snapshots.append(allocator, snap);
        try buffers.append(allocator, bytes);
    }

    return .{
        .snapshots = try snapshots.toOwnedSlice(allocator),
        .buffers = try buffers.toOwnedSlice(allocator),
        .allocator = allocator,
    };
}

/// Derive `<dirname(portfolio_path)>/history` and return the joined
/// path (caller-owned). Thin helper, but exposed so CLI and TUI agree
/// on the convention (history/ is always a sibling of portfolio.srf).
pub fn deriveHistoryDir(
    allocator: std.mem.Allocator,
    portfolio_path: []const u8,
) ![]u8 {
    const portfolio_dir = std.fs.path.dirname(portfolio_path) orelse ".";
    return std.fs.path.join(allocator, &.{ portfolio_dir, "history" });
}

/// Result of `loadTimeline` — bundles the raw snapshot collection and
/// the derived timeline series so callers can reach either without
/// re-parsing.
///
/// `series.points` is sorted ascending by date; `loaded.snapshots` is
/// in filesystem enumeration order. Both are kept alive together —
/// `series.points` references dates that live inside `loaded`'s
/// snapshot rows, and the callers may want `loaded.snapshots` directly
/// for non-timeline uses (e.g. rollup building).
pub const LoadedTimeline = struct {
    loaded: LoadedHistory,
    series: timeline.TimelineSeries,
    /// Directory we loaded from, caller-owned. Carried through for
    /// callers that want to print diagnostics or locate sibling files
    /// (rollup.srf, etc.).
    history_dir: []u8,
    allocator: std.mem.Allocator,

    pub fn deinit(self: *LoadedTimeline) void {
        self.series.deinit();
        self.loaded.deinit();
        self.allocator.free(self.history_dir);
    }
};

/// End-to-end snapshot timeline loader: derives history/, reads every
/// `*-portfolio.srf` file, and builds the sorted timeline series. The
/// single entry point used by both the CLI `zfin history` command and
/// the TUI history tab — their earlier copies had subtle divergences
/// (different dir-split logic, slightly different empty-state ordering)
/// that a shared helper rules out.
///
/// Returns `loaded.snapshots.len == 0` on an empty history dir rather
/// than erroring; callers check and produce their own "no snapshots"
/// message. Parse failures on individual files are logged to stderr by
/// `loadHistoryDir` and the offending file is skipped.
pub fn loadTimeline(
    allocator: std.mem.Allocator,
    portfolio_path: []const u8,
) !LoadedTimeline {
    const history_dir = try deriveHistoryDir(allocator, portfolio_path);
    errdefer allocator.free(history_dir);

    var loaded = try loadHistoryDir(allocator, history_dir);
    errdefer loaded.deinit();

    const series = try timeline.buildSeries(allocator, loaded.snapshots);

    return .{
        .loaded = loaded,
        .series = series,
        .history_dir = history_dir,
        .allocator = allocator,
    };
}

// ── Tests ────────────────────────────────────────────────────

const testing = std.testing;

/// Test helper: dupe a string literal into testing.allocator and parse
/// it. Returns the snapshot AND the owned bytes so the test can free
/// both in the correct order (snapshot first, then bytes).
const ParsedLiteral = struct {
    snap: snapshot_mod.Snapshot,
    bytes: []u8,

    fn deinit(self: *ParsedLiteral) void {
        self.snap.deinit(testing.allocator);
        testing.allocator.free(self.bytes);
    }
};

fn parseLiteral(input: []const u8) !ParsedLiteral {
    const bytes = try testing.allocator.dupe(u8, input);
    errdefer testing.allocator.free(bytes);
    const snap = try parseSnapshotBytes(testing.allocator, bytes);
    return .{ .snap = snap, .bytes = bytes };
}

test "parseSnapshotBytes: minimal meta + totals round-trip" {
    const input =
        \\#!srfv1
        \\#!created=1700000000
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:1700000000,zfin_version::v0.1.0,stale_count:num:0
        \\kind::total,scope::net_worth,value:num:1000
        \\kind::total,scope::liquid,value:num:800
        \\kind::total,scope::illiquid,value:num:200
        \\
    ;
    var parsed = try parseLiteral(input);
    defer parsed.deinit();
    const snap = parsed.snap;
    // Note: `snap.meta.kind` is `""` post-parse — the `kind` discriminator
    // is consumed by union dispatch (see `SnapshotRecord`). The union tag
    // is the source of truth for record type, not `.kind`.
    try testing.expectEqual(@as(u32, 1), snap.meta.snapshot_version);
    try testing.expect(snap.meta.as_of_date.eql(Date.fromYmd(2026, 4, 17)));
    try testing.expectEqualStrings("v0.1.0", snap.meta.zfin_version);
    try testing.expectEqual(@as(i64, 1_700_000_000), snap.meta.captured_at);
    try testing.expectEqual(@as(usize, 0), snap.meta.stale_count);
    try testing.expect(snap.meta.quote_date_min == null);
    try testing.expect(snap.meta.quote_date_max == null);

    try testing.expectEqual(@as(usize, 3), snap.totals.len);
    try testing.expectEqualStrings("net_worth", snap.totals[0].scope);
    try testing.expectEqual(@as(f64, 1000), snap.totals[0].value);
    try testing.expectEqualStrings("liquid", snap.totals[1].scope);
    try testing.expectEqual(@as(f64, 800), snap.totals[1].value);
    try testing.expectEqualStrings("illiquid", snap.totals[2].scope);
    try testing.expectEqual(@as(f64, 200), snap.totals[2].value);

    try testing.expectEqual(@as(usize, 0), snap.tax_types.len);
    try testing.expectEqual(@as(usize, 0), snap.accounts.len);
    try testing.expectEqual(@as(usize, 0), snap.lots.len);
}

test "parseSnapshotBytes: with tax_type, account, and lot records" {
    const input =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
        \\kind::total,scope::net_worth,value:num:1500
        \\kind::tax_type,label::Taxable,value:num:1000
        \\kind::tax_type,label::Roth (Post-Tax),value:num:500
        \\kind::account,name::Emil Roth,value:num:800
        \\kind::lot,symbol::VTI,lot_symbol::VTI,account::Emil Roth,security_type::Stock,shares:num:10,open_price:num:200,cost_basis:num:2000,value:num:2500,price:num:250,quote_date::2026-04-17
        \\
    ;
    var parsed = try parseLiteral(input);
    defer parsed.deinit();
    const snap = parsed.snap;

    try testing.expectEqual(@as(usize, 2), snap.tax_types.len);
    try testing.expectEqualStrings("Taxable", snap.tax_types[0].label);
    try testing.expectEqualStrings("Roth (Post-Tax)", snap.tax_types[1].label);

    try testing.expectEqual(@as(usize, 1), snap.accounts.len);
    try testing.expectEqualStrings("Emil Roth", snap.accounts[0].name);
    try testing.expectEqual(@as(f64, 800), snap.accounts[0].value);

    try testing.expectEqual(@as(usize, 1), snap.lots.len);
    try testing.expectEqualStrings("VTI", snap.lots[0].symbol);
    try testing.expect(snap.lots[0].price != null);
    try testing.expectEqual(@as(f64, 250), snap.lots[0].price.?);
    try testing.expect(snap.lots[0].quote_date != null);
    try testing.expect(snap.lots[0].quote_date.?.eql(Date.fromYmd(2026, 4, 17)));
    try testing.expect(!snap.lots[0].quote_stale);
}

test "parseSnapshotBytes: lot with stale flag and optional price absent" {
    const input =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:1
        \\kind::lot,symbol::CASH,lot_symbol::CASH,account::Checking,security_type::Cash,shares:num:500,open_price:num:0,cost_basis:num:0,value:num:500
        \\kind::lot,symbol::OLDQ,lot_symbol::OLDQ,account::IRA,security_type::Stock,shares:num:1,open_price:num:100,cost_basis:num:100,value:num:95,price:num:95,quote_date::2026-04-15,quote_stale:bool:true
        \\
    ;
    var parsed = try parseLiteral(input);
    defer parsed.deinit();
    const snap = parsed.snap;

    try testing.expectEqual(@as(usize, 2), snap.lots.len);
    // Cash: no price, no quote_date, no stale flag
    try testing.expect(snap.lots[0].price == null);
    try testing.expect(snap.lots[0].quote_date == null);
    try testing.expect(!snap.lots[0].quote_stale);
    // Stale stock lot
    try testing.expect(snap.lots[1].quote_stale);
    try testing.expect(snap.lots[1].quote_date != null);
    try testing.expect(snap.lots[1].quote_date.?.eql(Date.fromYmd(2026, 4, 15)));
}

test "parseSnapshotBytes: quote_date_min/max in meta round-trip" {
    const input =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,quote_date_min::2026-04-14,quote_date_max::2026-04-17,stale_count:num:3
        \\
    ;
    var parsed = try parseLiteral(input);
    defer parsed.deinit();
    const snap = parsed.snap;

    try testing.expect(snap.meta.quote_date_min != null);
    try testing.expect(snap.meta.quote_date_min.?.eql(Date.fromYmd(2026, 4, 14)));
    try testing.expect(snap.meta.quote_date_max != null);
    try testing.expect(snap.meta.quote_date_max.?.eql(Date.fromYmd(2026, 4, 17)));
    try testing.expectEqual(@as(usize, 3), snap.meta.stale_count);
}

test "parseSnapshotBytes: unknown kind is silently skipped" {
    const input =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
        \\kind::future_extension,some_field::some_value
        \\kind::total,scope::net_worth,value:num:100
        \\
    ;
    var parsed = try parseLiteral(input);
    defer parsed.deinit();
    const snap = parsed.snap;
    try testing.expectEqual(@as(usize, 1), snap.totals.len);
}

test "parseSnapshotBytes: record without kind field is a parse error" {
    // A record missing the `kind` discriminator is malformed data, not
    // forward-compat. We must not silently drop it.
    const input =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
        \\random_field::random_value
        \\kind::total,scope::net_worth,value:num:100
        \\
    ;
    try testing.expectError(error.InvalidSrf, parseLiteral(input));
}

test "parseSnapshotBytes: missing meta record returns error" {
    const input =
        \\#!srfv1
        \\kind::total,scope::net_worth,value:num:100
        \\
    ;
    try testing.expectError(error.NoMetaRecord, parseLiteral(input));
}

test "parseSnapshotBytes: totally malformed input returns error" {
    // Not valid srf at all.
    const input = "this is not srf data\x00\xff\x00";
    const result = parseLiteral(input);
    // Either InvalidSrf (iterator failed) or NoMetaRecord (iterator
    // returned nothing). Both are acceptable failure modes; the test
    // just asserts we don't panic or succeed.
    try testing.expect(std.meta.isError(result));
}

test "loadHistoryDir: missing directory returns empty result" {
    // No dir created; should silently yield an empty list rather than
    // raising FileNotFound to the caller.
    var result = try loadHistoryDir(testing.allocator, "/nonexistent/path/for/testing");
    defer result.deinit();
    try testing.expectEqual(@as(usize, 0), result.snapshots.len);
}

test "loadHistoryDir: loads snapshots and skips non-matching files" {
    var tmp_dir = testing.tmpDir(.{});
    defer tmp_dir.cleanup();

    // Seed three files:
    //   2026-04-17-portfolio.srf — valid
    //   2026-04-18-portfolio.srf — valid
    //   readme.txt              — non-matching extension, should be skipped
    const snap_bytes =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
        \\kind::total,scope::net_worth,value:num:1000
        \\
    ;
    const snap2_bytes =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-18,captured_at:num:0,zfin_version::x,stale_count:num:0
        \\kind::total,scope::net_worth,value:num:1100
        \\
    ;
    {
        var f = try tmp_dir.dir.createFile("2026-04-17-portfolio.srf", .{});
        try f.writeAll(snap_bytes);
        f.close();
    }
    {
        var f = try tmp_dir.dir.createFile("2026-04-18-portfolio.srf", .{});
        try f.writeAll(snap2_bytes);
        f.close();
    }
    {
        var f = try tmp_dir.dir.createFile("readme.txt", .{});
        try f.writeAll("not a snapshot");
        f.close();
    }

    var path_buf: [std.fs.max_path_bytes]u8 = undefined;
    const dir_path = try tmp_dir.dir.realpath(".", &path_buf);

    var result = try loadHistoryDir(testing.allocator, dir_path);
    defer result.deinit();

    try testing.expectEqual(@as(usize, 2), result.snapshots.len);

    // Each loaded snapshot has a meta and one total. Values differ so we
    // can tell them apart regardless of filesystem enumeration order.
    var saw_1000 = false;
    var saw_1100 = false;
    for (result.snapshots) |s| {
        try testing.expectEqual(@as(usize, 1), s.totals.len);
        if (s.totals[0].value == 1000) saw_1000 = true;
        if (s.totals[0].value == 1100) saw_1100 = true;
    }
    try testing.expect(saw_1000);
    try testing.expect(saw_1100);
}

test "loadHistoryDir: corrupt files are skipped, others still load" {
    var tmp_dir = testing.tmpDir(.{});
    defer tmp_dir.cleanup();

    const good_bytes =
        \\#!srfv1
        \\kind::meta,snapshot_version:num:1,as_of_date::2026-04-17,captured_at:num:0,zfin_version::x,stale_count:num:0
        \\
    ;
    {
        var f = try tmp_dir.dir.createFile("2026-04-17-portfolio.srf", .{});
        try f.writeAll(good_bytes);
        f.close();
    }
    {
        var f = try tmp_dir.dir.createFile("2026-04-18-portfolio.srf", .{});
        try f.writeAll("totally-not-srf\n");
        f.close();
    }

    var path_buf: [std.fs.max_path_bytes]u8 = undefined;
    const dir_path = try tmp_dir.dir.realpath(".", &path_buf);

    var result = try loadHistoryDir(testing.allocator, dir_path);
    defer result.deinit();

    // Only the good one lands.
    try testing.expectEqual(@as(usize, 1), result.snapshots.len);
}