From b9d2ee4bd3caa274948c1ba4987dd6f86e8844b9 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Sun, 24 May 2026 09:44:35 -0700 Subject: [PATCH] track multiple portfolio files in past revisions as well --- AGENTS.md | 70 +++-- src/brokerage/wells_fargo.zig | 3 +- src/commands/framework.zig | 8 + src/commands/snapshot.zig | 188 +++++++----- src/data/imported_values.zig | 4 +- src/git.zig | 77 ++++- src/portfolio_loader.zig | 556 ++++++++++++++++++++++++++++++---- 7 files changed, 724 insertions(+), 182 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 418582b..eef0685 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -341,9 +341,9 @@ Ask the user instead.** add it as a new top-level section; don't leave the parent entry around as a historical wrapper. - **`REPORT.md` is untracked on purpose.** It's a personal workflow doc - living in the repo root only until it moves to `~/finance`. Edit it - freely when asked; don't treat it as part of the repo surface. Don't - mention it in commit messages for unrelated work. + living in the repo root only until it moves to the user's portfolio + directory. Edit it freely when asked; don't treat it as part of the + repo surface. Don't mention it in commit messages for unrelated work. ### Lint warnings — there are no "pre-existing" warnings @@ -465,7 +465,7 @@ across the codebase so search-and-replace stays trivial): `smpl_1234`, `smpl-ira-1234` - Account numbers: `1234`, `5678`, `9012`, `3456`, `7890`, or alphanumeric like `Z123`, `Z111`, `Z222`. Do not use real - trailing-digit values from `~/finance/`. + trailing-digit values from the user's actual accounts file. - Portfolio file names: `portfolio_other.srf`, never `portfolio_.srf`. - Schwab/Fidelity-style: `Schwab Trust`, `Inherited IRA`, `Roth @@ -474,23 +474,22 @@ across the codebase so search-and-replace stays trivial): **Things that are NEVER OK in source:** -- Real first names of family members. (No `Emil`, `Elizabeth`, - `Kelly`, `Mom`, `Dad`, etc.) -- Real account-number trailing digits used in - `~/finance/accounts.srf` (e.g. `6135`, `3522`, `7891`, `716`, - `901`, `503`, `311`, `152`, `118`, `Z30619248`, - `229948882`, etc.). Any number that came from a real - brokerage entry is PII. -- Real portfolio filenames like `portfolio_mom.srf`, - `portfolio_.srf`. -- Composite identifiers that combine the above (`Mom Roth IRA`, - `Joint trust ...716`, etc.). +- Real first names of family members. +- Real account-number trailing digits used in the user's actual + accounts file. Any number that came from a real brokerage entry + is PII, regardless of length or format. +- Real portfolio filenames matching `portfolio_.srf`. +- Composite identifiers that combine real names and real + account-number digits. +- The user's actual portfolio directory path (leaks filesystem + layout) — refer generically to "the portfolio's `history/` + directory" or similar. **Workflow rule when adding a test based on a real-world scenario:** -1. Reproduce the bug locally with real data (in `~/finance/`, - never staged). +1. Reproduce the bug locally with real data (the user's portfolio + directory; never staged). 2. Write the test using **placeholder names and numbers** that preserve the structural shape of the bug (same string lengths, same pattern of digits-vs-letters, same separator @@ -503,24 +502,35 @@ scenario:** **Workflow rule when finding existing PII:** -If you grep for the placeholder vocabulary while working in any -file and find a real name or number that snuck in, fix it in -the same change. Don't add to TODO; PII removal is never -optional, and it never lands in a separate commit unless the -user explicitly asks. +If you grep for PII while working in any file and find a real +name or number that snuck in, fix it in the same change. Don't +add to TODO; PII removal is never optional, and it never lands +in a separate commit unless the user explicitly asks. -**One-line grep that should ALWAYS return zero non-`ie_data.csv` -hits before committing:** +**Pre-commit grep recipe.** Read the user's actual accounts +metadata locally (NOT staged, NOT in this file), extract the +identifying tokens, and grep `src/` for any of them. Conceptual +shape: ``` -grep -rn "\bMom\b\|Elizabeth\|Joint trust\|portfolio_mom\|\bEmil\b\|Fidelity Emil\|6135\|Z30619248" src/ \ - | grep -v ie_data.csv +# Build the alternation FROM the user's accounts file at runtime — +# do NOT hardcode the values into source-tracked tooling. +local_pii_tokens=$(awk -F: '...' "$ZFIN_HOME/accounts.srf" ...) +grep -rn -E "$local_pii_tokens" src/ | grep -v ie_data.csv ``` -(Update the alternation as new real-world identifiers come up. -The `ie_data.csv` exclusion is because the Shiller dataset -contains coincidental numeric matches in historical-year fields -that aren't PII.) +The grep should always return zero non-`ie_data.csv` hits before +committing. The `ie_data.csv` exclusion is because the Shiller +dataset contains coincidental numeric matches in historical-year +fields that aren't PII. + +If you're uncertain whether something is PII, **ask before +committing.** PII can be surgically removed from a working +tree, but once it's in `git log` it's effectively permanent. +**That includes this file** — never put real identifiers in +AGENTS.md or any other tracked file as "examples of what to look +for." The placeholder vocabulary above is the only safe way to +illustrate the patterns. If you're uncertain whether something is PII, **ask before committing.** PII can be surgically removed from a working diff --git a/src/brokerage/wells_fargo.zig b/src/brokerage/wells_fargo.zig index cde5c59..cc625e1 100644 --- a/src/brokerage/wells_fargo.zig +++ b/src/brokerage/wells_fargo.zig @@ -1003,7 +1003,8 @@ test "parsePaste: trailing cash section emits a cash position" { test "parsePaste: cash section absent is a no-op" { // When the user only pastes the positions table (no cash // section), parsePaste returns just the positions. Regression - // for the original 3522.txt-style paste shape. + // for the simpler positions-only paste shape (no trailing + // cash table). const allocator = testing.allocator; const data = "GSLC , popup\n" ++ diff --git a/src/commands/framework.zig b/src/commands/framework.zig index 7a51a15..addcabb 100644 --- a/src/commands/framework.zig +++ b/src/commands/framework.zig @@ -274,6 +274,14 @@ pub const RunCtx = struct { /// portfolio file. Returns the default pattern's first match /// when -p is not set; falls through to a literal default if /// nothing matched. + /// + /// **Do NOT use this for portfolio CONTENT loading.** Portfolio + /// content must come from the multi-file glob via + /// `cli.loadPortfolio` (live) or + /// `portfolio_loader.loadPortfolioFromPathsAtRev` (git + /// historical). This singular helper is for choosing ONE + /// concrete path to derive sibling files from — never for + /// reading lots out of. pub fn resolvePortfolioPath(self: *RunCtx) ResolvedPath { var paths = self.resolvePortfolioPaths() catch { return .{ .path = zfin.Config.default_portfolio_filename, .resolved = null }; diff --git a/src/commands/snapshot.zig b/src/commands/snapshot.zig index d6345cc..7ff4190 100644 --- a/src/commands/snapshot.zig +++ b/src/commands/snapshot.zig @@ -2,10 +2,17 @@ //! //! Flow: //! 1. Locate portfolio.srf via `config.resolveUserFile` (or -p). +//! The first resolved file's directory determines the +//! `history/` location (sibling-of-first convention). //! 2. Derive `history/` dir as `dirname(portfolio.srf)/history/`. -//! 3. Load portfolio composition. Normally from the working-copy -//! file; with `--as-of`, from git history at or before the -//! requested date (falling back to working copy if pre-git). +//! 3. Load portfolio composition as the **union of all matched +//! portfolio files** (multi-file glob support). Normally from +//! the working tree via `cli.loadPortfolio`; with `--as-of`, +//! from git history at the repo-wide latest sha ≤ the +//! requested date via `loadPortfolioFromPathsAtRev`. Files +//! that didn't exist at that sha are silently skipped — the +//! union just doesn't include those lots. Falls back to +//! working copy if git is unavailable. //! 4. Refresh the candle cache via `cli.loadPortfolioPrices` //! (skipped under `--as-of` — past candles don't change). //! 5. Compute `as_of_date`: explicit `--as-of` wins; otherwise mode @@ -17,6 +24,11 @@ //! `--force` wasn't passed, skip (exit 0, stderr message). //! 8. Build the snapshot records and write them atomically. //! +//! The snapshot file itself is a SINGLE union'd record set — +//! consumers (compare, projections --vs, TUI history tab) read one +//! `-portfolio.srf` per date and don't need to know how many +//! source files contributed to it. +//! //! Flags: //! --force overwrite existing snapshot for as_of_date //! --dry-run compute + print to stdout, do not write @@ -40,6 +52,7 @@ const portfolio_mod = @import("../models/portfolio.zig"); const Date = @import("../Date.zig"); const model = @import("../models/snapshot.zig"); const git = @import("../git.zig"); +const portfolio_loader = @import("../portfolio_loader.zig"); // Re-export record types so callers that reach `commands/snapshot.zig` // (tests, mostly) still see the familiar names. New code should prefer @@ -150,6 +163,7 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void { const out = ctx.out; const color = ctx.color; const now_s = ctx.now_s; + const today = ctx.today; const force = parsed.force; const dry_run = parsed.dry_run; @@ -160,22 +174,24 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void { defer pf.deinit(allocator); const portfolio_path = pf.path; - // Load portfolio bytes. In normal (no --as-of) mode this is the - // current working-copy of portfolio.srf. With --as-of, we first try - // to retrieve the portfolio state from git history at or before the - // target date — that gives accurate composition for past snapshots. - // If git lookup fails (portfolio not tracked, no commits before the - // date, git unavailable), we warn and fall back to the working copy, - // which at least approximates "positions the user currently holds" - // and is better than erroring out. - const pf_data = try loadPortfolioAtDate(io, allocator, portfolio_path, as_of_override); - defer allocator.free(pf_data); - - var portfolio = zfin.cache.deserializePortfolio(allocator, pf_data) catch { - cli.stderrPrint(io, "Error parsing portfolio file.\n"); - return error.WriteFailed; - }; - defer portfolio.deinit(); + // Load portfolio. In normal (no --as-of) mode this is the + // current working-copy union of all matched portfolio files. + // With --as-of, we first try to retrieve the portfolio state + // from git history at or before the target date — that gives + // accurate composition for past snapshots. If git lookup fails + // (portfolio not tracked, no commits before the date, git + // unavailable), we warn and fall back to the working-copy + // union, which at least approximates "positions the user + // currently holds" and is better than erroring out. + // + // `portfolio_path` (singular, first-resolved) is used only for + // sibling-derivation (history dir, snapshot file path); the + // portfolio CONTENT comes from the multi-file glob. + var loaded = try loadPortfolioForSnapshot(ctx, today, as_of_override); + defer loaded.deinit(allocator); + var portfolio = loaded.portfolio; + // We don't deinit `portfolio` separately — `loaded.deinit` + // handles it. if (portfolio.lots.len == 0) { cli.stderrPrint(io, "Portfolio is empty; nothing to snapshot.\n"); @@ -393,80 +409,88 @@ pub fn deriveSnapshotPath( /// file" — the file's current state IS its mtime state. A clean exit /// lets bulk-backfill loops keep moving. /// -/// Caller owns returned bytes. -fn loadPortfolioAtDate( - io: std.Io, - allocator: std.mem.Allocator, - portfolio_path: []const u8, +/// Caller owns the returned `LoadedPortfolio`. +/// +/// In normal (no `--as-of`) mode this loads the working-copy union +/// of all matched portfolio files via `cli.loadPortfolio`. With +/// `--as-of`, it discovers the repo-wide latest sha at or before +/// the target date (`git.shaAtOrBefore`) and reads each file in +/// the glob at that sha (`portfolio_loader.loadPortfolioFromPathsAtRev`). +/// Files that didn't exist at that sha are silently skipped — the +/// union just doesn't include those lots, which is correct for +/// "snapshot of state on this date." +/// +/// On git lookup failure (not in a repo, no commit before the +/// target, etc.), warns and falls back to the working-copy union +/// — better to capture today's state than fail entirely. +fn loadPortfolioForSnapshot( + ctx: *framework.RunCtx, + today: Date, as_of: ?Date, -) ![]const u8 { +) !portfolio_loader.LoadedPortfolio { + const io = ctx.io; + const allocator = ctx.allocator; + const target = as_of orelse { - // Normal mode — just read the file. - return std.Io.Dir.cwd().readFileAlloc(io, portfolio_path, allocator, .limited(10 * 1024 * 1024)) catch |err| { - cli.stderrPrint(io, "Error reading portfolio file: "); - cli.stderrPrint(io, @errorName(err)); - cli.stderrPrint(io, "\n"); - return err; - }; + // Normal mode — load working-copy union via the shared + // multi-file loader. `today` is used as the as_of for + // position computation. + return cli.loadPortfolio(ctx, today) orelse return error.WriteFailed; }; - // Try git first. - if (loadPortfolioFromGit(io, allocator, portfolio_path, target)) |bytes| return bytes else |err| switch (err) { - error.NotInGitRepo, error.GitUnavailable, error.PathMissingInRev, error.UnknownRevision, error.NoCommitBeforeDate => { - // Fall through to working-copy fallback below. - var buf: [256]u8 = undefined; - const msg = std.fmt.bufPrint( - &buf, - "warning: no git history for portfolio at {f}; using working copy as approximation\n", - .{target}, - ) catch "warning: no git history for portfolio at requested date\n"; - cli.stderrPrint(io, msg); + // --as-of mode: find the repo-wide sha at or before target. + // Need a single concrete path to discover the repo from; use + // the first resolved portfolio path. + const pf = ctx.resolvePortfolioPath(); + defer pf.deinit(allocator); + const portfolio_path = pf.path; + + const info = git.findRepo(io, allocator, portfolio_path) catch |err| switch (err) { + error.NotInGitRepo, error.GitUnavailable => { + warnGitFallback(io, target, "no git repo"); + return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed; }, - else => |e| return e, - } - - return std.Io.Dir.cwd().readFileAlloc(io, portfolio_path, allocator, .limited(10 * 1024 * 1024)) catch |err| { - cli.stderrPrint(io, "Error reading portfolio file: "); - cli.stderrPrint(io, @errorName(err)); - cli.stderrPrint(io, "\n"); - return err; + else => return err, }; -} - -/// `git show`-based retrieval of portfolio bytes at or before `target`. -/// Returns the raw bytes (caller owns) or an error classifying the -/// failure mode so `loadPortfolioAtDate` can decide whether to fall -/// back. -fn loadPortfolioFromGit( - io: std.Io, - allocator: std.mem.Allocator, - portfolio_path: []const u8, - target: Date, -) ![]const u8 { - const info = try git.findRepo(io, allocator, portfolio_path); defer allocator.free(info.root); defer allocator.free(info.rel_path); - // List all commits that touched this path, newest-first. - const commits = try git.listCommitsTouching(io, allocator, info.root, info.rel_path, null); - defer git.freeCommitTouches(allocator, commits); - - if (commits.len == 0) return error.PathMissingInRev; - - // Find the latest commit whose committer timestamp falls on or before - // the end of `target` day (23:59:59 UTC). The list is newest-first - // per git log's default order, so we scan linearly. - const target_end_ts = target.toEpoch() + std.time.s_per_day - 1; - - const sha: ?[]const u8 = blk: { - for (commits) |c| { - if (c.timestamp <= target_end_ts) break :blk c.commit; - } - break :blk null; + var date_buf: [10]u8 = undefined; + const date_str = std.fmt.bufPrint(&date_buf, "{f}", .{target}) catch "????-??-??"; + const sha_opt = git.shaAtOrBefore(io, allocator, info.root, date_str) catch |err| switch (err) { + error.GitUnavailable, error.GitLogFailed => { + warnGitFallback(io, target, "git lookup failed"); + return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed; + }, + else => return err, }; + const sha = sha_opt orelse { + warnGitFallback(io, target, "no commit at or before target date"); + return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed; + }; + defer allocator.free(sha); - const chosen = sha orelse return error.NoCommitBeforeDate; - return try git.show(io, allocator, info.root, chosen, info.rel_path); + // Resolve the full glob and load each file at `sha`. + var resolved = ctx.resolvePortfolioPaths() catch { + warnGitFallback(io, target, "could not resolve portfolio glob"); + return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed; + }; + defer resolved.deinit(); + + return portfolio_loader.loadPortfolioFromPathsAtRev(io, allocator, resolved.paths, sha, target) orelse { + warnGitFallback(io, target, "could not load portfolio at rev"); + return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed; + }; +} + +fn warnGitFallback(io: std.Io, target: Date, reason: []const u8) void { + var buf: [256]u8 = undefined; + const msg = std.fmt.bufPrint( + &buf, + "warning: {s} for portfolio at {f}; using working copy as approximation\n", + .{ reason, target }, + ) catch "warning: git history unavailable; using working copy as approximation\n"; + cli.stderrPrint(io, msg); } // ── Quote-date / as_of_date helpers ────────────────────────── diff --git a/src/data/imported_values.zig b/src/data/imported_values.zig index 16d2fb3..e3fda9f 100644 --- a/src/data/imported_values.zig +++ b/src/data/imported_values.zig @@ -1,6 +1,6 @@ //! Imported portfolio history values from a manually-curated spreadsheet. //! -//! Pre-dates `~/finance/history/` snapshot capture. Contains weekly +//! Pre-dates the `history/` snapshot capture. Contains weekly //! `(date, liquid, expected_return, projected_retirement)` tuples //! transcribed from a third-party spreadsheet (the "K+E Funds" //! workbook for the primary portfolio, plus optional sibling @@ -11,7 +11,7 @@ //! 1. The user exports the spreadsheet to CSV via `ssconvert -S`. //! 2. `tools/import_values.zig` (a one-shot Zig program) consumes //! the CSV and emits an `imported_values.srf` next to the -//! repo's `~/finance/history/` snapshot directory. +//! portfolio's `history/` snapshot directory. //! 3. This module loads that SRF for downstream consumers //! (`zfin milestones`, projection overlay, forecast-vs-actual). //! diff --git a/src/git.zig b/src/git.zig index 6dbb0b5..f2dec37 100644 --- a/src/git.zig +++ b/src/git.zig @@ -399,6 +399,52 @@ pub fn commitAtOrBeforeDate( return try allocator.dupe(u8, trimmed); } +/// Repo-wide variant of `commitAtOrBeforeDate`. Returns the SHA of +/// the latest commit on HEAD whose committer-date is ≤ `date_iso` +/// 23:59:59 local, regardless of which paths the commit touched. +/// Returns null when no such commit exists. +/// +/// Used by `zfin snapshot --as-of` to pick a single repo-wide +/// "as of date" reference; each portfolio file in the multi-file +/// glob is then read at that one SHA via `git show`. This produces +/// a coherent point-in-time snapshot — files that didn't exist at +/// that SHA (`error.PathMissingInRev`) are treated as absent +/// rather than as errors. +pub fn shaAtOrBefore( + io: std.Io, + allocator: std.mem.Allocator, + root: []const u8, + date_iso: []const u8, +) Error!?[]const u8 { + // Same end-of-day pinning as `commitAtOrBeforeDate` (see that + // function's comment). No `-- ` filter — we want the + // repo-wide latest. + const until_arg = try std.fmt.allocPrint(allocator, "--until={s} 23:59:59", .{date_iso}); + defer allocator.free(until_arg); + + const result = std.process.run(allocator, io, .{ + .argv = &.{ + "git", "-C", root, + "log", "-1", "--format=%H", + "HEAD", until_arg, + }, + .stdout_limit = .limited(64 * 1024), + }) catch return error.GitUnavailable; + defer allocator.free(result.stdout); + defer allocator.free(result.stderr); + + switch (result.term) { + .exited => |code| if (code != 0) return error.GitLogFailed, + else => return error.GitLogFailed, + } + + const trimmed = std.mem.trim(u8, result.stdout, " \t\r\n"); + if (trimmed.len == 0) return null; + if (trimmed.len < 40) return error.GitLogFailed; + for (trimmed) |c| if (!std.ascii.isHex(c)) return error.GitLogFailed; + return try allocator.dupe(u8, trimmed); +} + /// Get the committer-date (Unix timestamp, seconds since epoch) of /// a given ref. Accepts anything `git log -1 --format=%ct ` /// accepts: SHAs, HEAD, HEAD~N. Used by the contributions pipeline @@ -521,7 +567,8 @@ fn resolveSpec(io: std.Io, arena: std.mem.Allocator, repo: RepoInfo, spec: Commi .git_ref => |r| r, .date_at_or_before => |d| blk: { var buf: [10]u8 = undefined; - const date_str = std.fmt.bufPrint(&buf, "{f}", .{d}) catch unreachable; + // SAFETY: 10-byte buffer is exactly the size of "YYYY-MM-DD". + const date_str = std.fmt.bufPrint(&buf, "{f}", .{d}) catch buf[0..]; const sha = (try commitAtOrBeforeDate(io, arena, repo.root, repo.rel_path, date_str)) orelse return error.NoCommitAtOrBefore; break :blk sha; @@ -655,6 +702,34 @@ test "commitAtOrBeforeDate: --until=DATE covers end of day, not current time-of- if (sha_opt) |s| allocator.free(s); } +test "shaAtOrBefore returns a SHA for a past date in the ambient repo" { + // Repo-wide variant of `commitAtOrBeforeDate` — finds the latest + // commit on HEAD ≤ the given date, regardless of paths touched. + // Same future-date trick as the path-scoped variant: a date well + // beyond now should always return the tip of history. + const allocator = std.testing.allocator; + const info = findRepo(std.testing.io, allocator, "build.zig") catch return; + defer allocator.free(info.root); + defer allocator.free(info.rel_path); + + const sha_opt = shaAtOrBefore(std.testing.io, allocator, info.root, "2099-01-01") catch return; + try std.testing.expect(sha_opt != null); + const sha = sha_opt.?; + defer allocator.free(sha); + try std.testing.expect(sha.len == 40 or sha.len == 64); + for (sha) |c| try std.testing.expect(std.ascii.isHex(c)); +} + +test "shaAtOrBefore returns null for date before repo existed" { + const allocator = std.testing.allocator; + const info = findRepo(std.testing.io, allocator, "build.zig") catch return; + defer allocator.free(info.root); + defer allocator.free(info.rel_path); + + const sha_opt = shaAtOrBefore(std.testing.io, allocator, info.root, "1970-01-02") catch return; + try std.testing.expect(sha_opt == null); +} + test "resolveCommitRange: legacy clean → HEAD~1..HEAD" { var arena_state = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena_state.deinit(); diff --git a/src/portfolio_loader.zig b/src/portfolio_loader.zig index 0ca2d4e..7a1d34d 100644 --- a/src/portfolio_loader.zig +++ b/src/portfolio_loader.zig @@ -36,6 +36,19 @@ //! the TUI's reload-button path (re-uses the original resolved //! path slice without re-globbing). //! +//! - `loadPortfolioFromPathsAtRev(io, alloc, paths, rev, as_of)` — +//! git-historical variant: read each file at `rev` via +//! `git.show` instead of from the working tree. Files that don't +//! exist at the rev are silently skipped (the union just doesn't +//! include those lots). Used by `zfin snapshot --as-of` to +//! capture point-in-time portfolio state across a multi-file +//! glob from git history. +//! +//! All three loaders share a single private `loadFromBytes` core +//! that does the deserialize + union-merge + position-compute + +//! symbol-extract pass. Tests target `loadFromBytes` directly with +//! synthetic byte literals to avoid filesystem I/O. +//! //! - `PortfolioData` + `buildPortfolioData(...)` — second-stage //! pipeline: turn a `LoadedPortfolio` (or its parts) plus a //! `prices` map into a `PortfolioSummary` with allocations, @@ -45,6 +58,7 @@ const std = @import("std"); const zfin = @import("root.zig"); const framework = @import("commands/framework.zig"); const stderr = @import("stderr.zig"); +const git = @import("git.zig"); // ── Portfolio loading ──────────────────────────────────────── @@ -178,6 +192,119 @@ pub fn loadPortfolioFromPaths(io: std.Io, allocator: std.mem.Allocator, paths: [ return loadFromPaths(io, allocator, paths_owned, null, as_of); } +/// Like `loadPortfolioFromPaths`, but reads each file from a git +/// revision instead of the working tree. The result is the union +/// of all files at that rev, deserialized + union-merged the same +/// way the live loader does. +/// +/// Files that don't exist at the requested rev (e.g. +/// `portfolio_other.srf` was added later than `rev`) are silently +/// skipped — the union just doesn't include those lots. This is the +/// right behavior for `zfin snapshot --as-of ` against a +/// portfolio that's been split into multiple files over time. +/// +/// All `paths` must live within the same git repository as the +/// first path. Repository discovery uses `git.findRepo` on the +/// first path; the rest derive their rel-paths by trimming the +/// repo-root prefix. +/// +/// Returns null on: +/// - empty paths slice (no portfolio file to load); +/// - first path not in any git repo; +/// - any deserialize / merge / position-compute failure (same as +/// the working-tree loader). +/// +/// `git.show` failures other than `PathMissingInRev` (e.g. +/// `UnknownRevision`) propagate as null; the function prints a +/// stderr message describing which file/rev failed. +pub fn loadPortfolioFromPathsAtRev( + io: std.Io, + allocator: std.mem.Allocator, + paths: []const []const u8, + rev: []const u8, + as_of: zfin.Date, +) ?LoadedPortfolio { + if (paths.len == 0) { + stderr.print(io, "Error: No portfolio file found\n"); + return null; + } + + // Discover the repo from the first path. All other paths are + // assumed to live in the same repo (typical for sibling + // portfolio files in the same directory); we derive their + // rel-paths below. + const info = git.findRepo(io, allocator, paths[0]) catch { + var msg_buf: [512]u8 = undefined; + const msg = std.fmt.bufPrint(&msg_buf, "Error: portfolio path not in a git repo: {s}\n", .{paths[0]}) catch "Error: portfolio path not in a git repo\n"; + stderr.print(io, msg); + return null; + }; + defer allocator.free(info.root); + defer allocator.free(info.rel_path); + + // Read each file at `rev`. On `PathMissingInRev` we substitute + // empty bytes; `loadFromBytes` knows to treat that as "file + // absent at this rev, skip without parsing." Any other git + // error is fatal. + var datas: std.ArrayList([]const u8) = .empty; + var datas_consumed = false; + defer if (!datas_consumed) { + for (datas.items) |d| allocator.free(d); + datas.deinit(allocator); + }; + + for (paths) |abs_path| { + // Derive rel_path from this file's absolute path. The first + // file's rel_path was computed by findRepo; for the rest we + // strip the repo root prefix manually. realpath ensures + // canonicalization (the user could pass a non-canonical + // path). + const real = std.Io.Dir.cwd().realPathFileAlloc(io, abs_path, allocator) catch { + var msg_buf: [512]u8 = undefined; + const msg = std.fmt.bufPrint(&msg_buf, "Error: cannot resolve real path for: {s}\n", .{abs_path}) catch "Error: cannot resolve real path\n"; + stderr.print(io, msg); + return null; + }; + defer allocator.free(real); + + const rel = if (std.mem.startsWith(u8, real, info.root) and real.len > info.root.len) + std.mem.trimStart(u8, real[info.root.len..], "/") + else + std.fs.path.basename(real); + + const data = git.show(io, allocator, info.root, rev, rel) catch |err| switch (err) { + error.PathMissingInRev => empty_blk: { + // File didn't exist at this rev. Substitute empty + // bytes; loadFromBytes skips empty entries. + break :empty_blk allocator.dupe(u8, "") catch return null; + }, + else => { + var msg_buf: [512]u8 = undefined; + const msg = std.fmt.bufPrint(&msg_buf, "Error: git show {s}:{s}: {t}\n", .{ rev, rel, err }) catch "Error: git show failed\n"; + stderr.print(io, msg); + return null; + }, + }; + datas.append(allocator, data) catch { + allocator.free(data); + return null; + }; + } + + const datas_owned = datas.toOwnedSlice(allocator) catch return null; + datas_consumed = true; + + // Hand off ownership of the slice we just dupe'd from the + // caller. `loadFromBytes` frees both `paths_owned` and + // `datas_owned` on any failure. + const paths_owned = allocator.dupe([]const u8, paths) catch { + for (datas_owned) |d| allocator.free(d); + allocator.free(datas_owned); + return null; + }; + return loadFromBytes(io, allocator, paths_owned, null, datas_owned, as_of); +} + /// Internal: load+merge given a pre-resolved paths slice. The slice /// `paths_owned` is taken (will be freed by `LoadedPortfolio.deinit`). /// `resolved_paths_opt` is the optional `Config.ResolvedPaths` to @@ -190,21 +317,8 @@ fn loadFromPaths( resolved_paths_opt: ?zfin.Config.ResolvedPaths, as_of: zfin.Date, ) ?LoadedPortfolio { - // On any error after this point we must free the slice we just - // took ownership of, plus deinit the `resolved_paths_opt` so the - // path strings aren't leaked. - var error_cleanup_armed = true; - defer if (error_cleanup_armed) { - allocator.free(paths_owned); - if (resolved_paths_opt) |rp| rp.deinit(); - }; - // Read every file up front; bail on first error. var file_datas: std.ArrayList([]const u8) = .empty; - errdefer { - for (file_datas.items) |d| allocator.free(d); - file_datas.deinit(allocator); - } for (paths_owned) |p| { const data = std.Io.Dir.cwd().readFileAlloc(io, p, allocator, .limited(10 * 1024 * 1024)) catch { var msg_buf: [512]u8 = undefined; @@ -212,64 +326,109 @@ fn loadFromPaths( stderr.print(io, msg); for (file_datas.items) |d| allocator.free(d); file_datas.deinit(allocator); + allocator.free(paths_owned); + if (resolved_paths_opt) |rp| rp.deinit(); return null; }; file_datas.append(allocator, data) catch { allocator.free(data); for (file_datas.items) |d| allocator.free(d); file_datas.deinit(allocator); + allocator.free(paths_owned); + if (resolved_paths_opt) |rp| rp.deinit(); return null; }; } + const file_datas_owned = file_datas.toOwnedSlice(allocator) catch { + for (file_datas.items) |d| allocator.free(d); + file_datas.deinit(allocator); + allocator.free(paths_owned); + if (resolved_paths_opt) |rp| rp.deinit(); + return null; + }; + + // Hand off to the bytes-only path. `loadFromBytes` takes + // ownership of all four slices and frees them on any error. + return loadFromBytes(io, allocator, paths_owned, resolved_paths_opt, file_datas_owned, as_of); +} + +/// Bytes-only union-merge core. Takes ownership of `paths_owned`, +/// `resolved_paths_opt` (if non-null), and `file_datas_owned` (each +/// element must be `allocator`-owned). Frees them on any error. +/// +/// Tests use this directly with synthetic byte literals; production +/// callers come through `loadFromPaths` (working-tree reads) or +/// `loadPortfolioFromPathsAtRev` (git-historical reads). All three +/// share this single deserialize-merge-positions-syms pass. +/// +/// `paths_owned` and `file_datas_owned` must have the same length; +/// each `file_datas_owned[i]` is the bytes loaded from +/// `paths_owned[i]`. Path strings are used only for diagnostics +/// (which file failed to parse) and in the returned struct. +fn loadFromBytes( + io: std.Io, + allocator: std.mem.Allocator, + paths_owned: []const []const u8, + resolved_paths_opt: ?zfin.Config.ResolvedPaths, + file_datas_owned: []const []const u8, + as_of: zfin.Date, +) ?LoadedPortfolio { + std.debug.assert(paths_owned.len == file_datas_owned.len); + + // Cleanup is staged: lots live on `merged` until + // `toOwnedSlice`, then move into `combined` (which has its + // own deinit). The two cleanup paths are mutually exclusive, + // tracked by `lots_owner`. On success, neither fires. + var merged: std.ArrayList(zfin.Lot) = .empty; + var combined: zfin.Portfolio = .{ .lots = &.{}, .allocator = allocator }; + const LotsOwner = enum { merged_list, combined_struct, none }; + var lots_owner: LotsOwner = .merged_list; + var success = false; + + defer if (!success) { + switch (lots_owner) { + .merged_list => { + for (merged.items) |lot| { + allocator.free(lot.symbol); + if (lot.note) |n| allocator.free(n); + if (lot.account) |a| allocator.free(a); + if (lot.ticker) |t| allocator.free(t); + if (lot.underlying) |u| allocator.free(u); + } + merged.deinit(allocator); + }, + .combined_struct => combined.deinit(), + .none => {}, + } + for (file_datas_owned) |d| allocator.free(d); + allocator.free(file_datas_owned); + allocator.free(paths_owned); + if (resolved_paths_opt) |rp| rp.deinit(); + }; + // Deserialize each into an owned Portfolio, then merge their // lot slices into a single combined slice. We can't simply // concat the underlying slices because each Portfolio expects // to free its own lots in `deinit()`; instead, we steal each // Portfolio's lots[] (string fields are already dupe'd into // `allocator`) and free only the empty Portfolio struct. - var merged: std.ArrayList(zfin.Lot) = .empty; - errdefer { - for (merged.items) |lot| { - allocator.free(lot.symbol); - if (lot.note) |n| allocator.free(n); - if (lot.account) |a| allocator.free(a); - if (lot.ticker) |t| allocator.free(t); - if (lot.underlying) |u| allocator.free(u); - } - merged.deinit(allocator); - } + for (file_datas_owned, 0..) |data, idx| { + // Empty bytes are a legitimate "file absent" signal — + // `loadPortfolioFromPathsAtRev` uses this to indicate a + // file that didn't exist at the requested git rev. Skip + // without trying to parse. + if (data.len == 0) continue; - for (file_datas.items, 0..) |data, idx| { var portfolio = zfin.cache.deserializePortfolio(allocator, data) catch { var msg_buf: [512]u8 = undefined; const msg = std.fmt.bufPrint(&msg_buf, "Error: Cannot parse portfolio file: {s}\n", .{paths_owned[idx]}) catch "Error: Cannot parse portfolio file\n"; stderr.print(io, msg); - for (merged.items) |lot| { - allocator.free(lot.symbol); - if (lot.note) |n| allocator.free(n); - if (lot.account) |a| allocator.free(a); - if (lot.ticker) |t| allocator.free(t); - if (lot.underlying) |u| allocator.free(u); - } - merged.deinit(allocator); - for (file_datas.items) |d| allocator.free(d); - file_datas.deinit(allocator); return null; }; for (portfolio.lots) |lot| { merged.append(allocator, lot) catch { portfolio.deinit(); - for (merged.items) |existing| { - allocator.free(existing.symbol); - if (existing.note) |n| allocator.free(n); - if (existing.account) |a| allocator.free(a); - if (existing.ticker) |t| allocator.free(t); - if (existing.underlying) |u| allocator.free(u); - } - merged.deinit(allocator); - for (file_datas.items) |d| allocator.free(d); - file_datas.deinit(allocator); return null; }; } @@ -278,42 +437,25 @@ fn loadFromPaths( allocator.free(portfolio.lots); } - const merged_slice = merged.toOwnedSlice(allocator) catch { - for (file_datas.items) |d| allocator.free(d); - file_datas.deinit(allocator); - return null; - }; - - var combined: zfin.Portfolio = .{ + const merged_slice = merged.toOwnedSlice(allocator) catch return null; + combined = .{ .lots = merged_slice, .allocator = allocator, }; + lots_owner = .combined_struct; const positions = combined.positions(as_of, allocator) catch { - combined.deinit(); - for (file_datas.items) |d| allocator.free(d); - file_datas.deinit(allocator); stderr.print(io, "Error: Cannot compute positions\n"); return null; }; const syms = combined.stockSymbols(allocator) catch { allocator.free(positions); - combined.deinit(); - for (file_datas.items) |d| allocator.free(d); - file_datas.deinit(allocator); stderr.print(io, "Error: Cannot get stock symbols\n"); return null; }; - const file_datas_owned = file_datas.toOwnedSlice(allocator) catch { - allocator.free(syms); - allocator.free(positions); - combined.deinit(); - return null; - }; - - error_cleanup_armed = false; + success = true; return .{ .paths = paths_owned, .resolved_paths = resolved_paths_opt, @@ -399,3 +541,285 @@ pub fn buildPortfolioData( .snapshots = snapshots, }; } + +// ── loadFromBytes tests (in-memory; no disk I/O) ───────────── +// +// All tests here construct synthetic SRF byte literals and route +// them through `loadFromBytes` directly. Both production callers +// (working-tree `loadFromPaths` and git-historical +// `loadPortfolioFromPathsAtRev`) share this core, so unit-testing +// it covers the union-merge / parse-error / empty-bytes behavior +// without spinning up temp filesystems or git repos. + +const testing = std.testing; + +/// Test helper: dupe a slice of byte-string literals into the +/// allocator so `loadFromBytes` can free them on its own. +fn dupeBytes(allocator: std.mem.Allocator, datas: []const []const u8) ![]const []const u8 { + var owned = try allocator.alloc([]const u8, datas.len); + errdefer { + for (owned[0..]) |d| allocator.free(d); + allocator.free(owned); + } + var filled: usize = 0; + for (datas, 0..) |d, i| { + owned[i] = try allocator.dupe(u8, d); + filled = i + 1; + } + return owned; +} + +test "loadFromBytes: union of two synthetic SRF files" { + const allocator = testing.allocator; + + const file_a = + \\#!srfv1 + \\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA + \\ + ; + const file_b = + \\#!srfv1 + \\symbol::MSFT,shares:num:5,open_date::2024-02-01,open_price:num:300,account::Sample Roth + \\ + ; + + const paths = try allocator.dupe([]const u8, &.{ "a.srf", "b.srf" }); + const datas = try dupeBytes(allocator, &.{ file_a, file_b }); + + var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse { + try testing.expect(false); + return; + }; + defer loaded.deinit(allocator); + + try testing.expectEqual(@as(usize, 2), loaded.portfolio.lots.len); + // Lots are appended in file order. + try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol); + try testing.expectEqualStrings("MSFT", loaded.portfolio.lots[1].symbol); +} + +test "loadFromBytes: single file with valid contents" { + const allocator = testing.allocator; + + const file_a = + \\#!srfv1 + \\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA + \\ + ; + + const paths = try allocator.dupe([]const u8, &.{"a.srf"}); + const datas = try dupeBytes(allocator, &.{file_a}); + + var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse { + try testing.expect(false); + return; + }; + defer loaded.deinit(allocator); + + try testing.expectEqual(@as(usize, 1), loaded.portfolio.lots.len); + try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol); +} + +test "loadFromBytes: empty-bytes entry is treated as absent file" { + // Production use case: `loadPortfolioFromPathsAtRev` returns + // empty bytes for a path that didn't exist at the requested + // git rev. The union-merge silently skips it instead of + // failing — matches "snapshot at a date before this file + // was added" semantics. + const allocator = testing.allocator; + + const file_a = + \\#!srfv1 + \\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA + \\ + ; + const file_b: []const u8 = ""; + + const paths = try allocator.dupe([]const u8, &.{ "a.srf", "b.srf" }); + const datas = try dupeBytes(allocator, &.{ file_a, file_b }); + + var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse { + try testing.expect(false); + return; + }; + defer loaded.deinit(allocator); + + try testing.expectEqual(@as(usize, 1), loaded.portfolio.lots.len); + try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol); +} + +test "loadFromBytes: all-empty bytes returns empty portfolio" { + // Production use case: snapshot at a rev that predates ALL + // tracked portfolio files. No lots; no error. + const allocator = testing.allocator; + + const paths = try allocator.dupe([]const u8, &.{ "a.srf", "b.srf" }); + const datas = try dupeBytes(allocator, &.{ "", "" }); + + var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse { + try testing.expect(false); + return; + }; + defer loaded.deinit(allocator); + + try testing.expectEqual(@as(usize, 0), loaded.portfolio.lots.len); +} + +// ── loadPortfolioFromPathsAtRev tests (disk + git) ─────────── +// +// These spin up a temp git repo so they exercise the full +// `git.show + skip-on-PathMissingInRev` contract end-to-end. +// Kept minimal (one happy-path, one missing-file case); the +// bulk of the logic is in `loadFromBytes`, covered above. + +/// Run a one-shot git command in `cwd` for test setup. Panics on +/// failure — these tests can't proceed without a working repo. +fn gitInTestRepo(allocator: std.mem.Allocator, cwd: []const u8, argv: []const []const u8) !void { + const full_argv = try allocator.alloc([]const u8, argv.len + 3); + defer allocator.free(full_argv); + full_argv[0] = "git"; + full_argv[1] = "-C"; + full_argv[2] = cwd; + @memcpy(full_argv[3..], argv); + const result = try std.process.run(allocator, testing.io, .{ + .argv = full_argv, + .stdout_limit = .limited(64 * 1024), + }); + defer allocator.free(result.stdout); + defer allocator.free(result.stderr); + switch (result.term) { + .exited => |code| if (code != 0) { + std.debug.print("git command failed (code {d}): {s}\nstderr: {s}\n", .{ code, std.mem.join(allocator, " ", argv) catch "?", result.stderr }); + return error.GitFailed; + }, + else => return error.GitFailed, + } +} + +test "loadPortfolioFromPathsAtRev: union of two committed files at HEAD" { + const allocator = testing.allocator; + + // Skip if `git` isn't on PATH (CI sandbox without git). + { + const probe = std.process.run(allocator, testing.io, .{ + .argv = &.{ "git", "--version" }, + .stdout_limit = .limited(1024), + }) catch return; + defer allocator.free(probe.stdout); + defer allocator.free(probe.stderr); + } + + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_len = try tmp.dir.realPathFile(testing.io, ".", &path_buf); + const dir = path_buf[0..dir_len]; + + const file_a = + \\#!srfv1 + \\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA + \\ + ; + const file_b = + \\#!srfv1 + \\symbol::MSFT,shares:num:5,open_date::2024-02-01,open_price:num:300,account::Sample Roth + \\ + ; + try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio.srf", .data = file_a }); + try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio_other.srf", .data = file_b }); + + try gitInTestRepo(allocator, dir, &.{ "init", "-q" }); + try gitInTestRepo(allocator, dir, &.{ "config", "user.email", "test@example.com" }); + try gitInTestRepo(allocator, dir, &.{ "config", "user.name", "Test" }); + try gitInTestRepo(allocator, dir, &.{ "config", "commit.gpgsign", "false" }); + try gitInTestRepo(allocator, dir, &.{ "add", "portfolio.srf", "portfolio_other.srf" }); + try gitInTestRepo(allocator, dir, &.{ "commit", "-q", "-m", "initial" }); + + const p1 = try std.fs.path.join(allocator, &.{ dir, "portfolio.srf" }); + defer allocator.free(p1); + const p2 = try std.fs.path.join(allocator, &.{ dir, "portfolio_other.srf" }); + defer allocator.free(p2); + const paths = [_][]const u8{ p1, p2 }; + + var loaded = loadPortfolioFromPathsAtRev(testing.io, allocator, &paths, "HEAD", zfin.Date.fromYmd(2026, 5, 23)) orelse { + try testing.expect(false); + return; + }; + defer loaded.deinit(allocator); + + try testing.expectEqual(@as(usize, 2), loaded.portfolio.lots.len); +} + +test "loadPortfolioFromPathsAtRev: file added later is silently skipped at earlier rev" { + const allocator = testing.allocator; + + // Skip if `git` isn't on PATH. + { + const probe = std.process.run(allocator, testing.io, .{ + .argv = &.{ "git", "--version" }, + .stdout_limit = .limited(1024), + }) catch return; + defer allocator.free(probe.stdout); + defer allocator.free(probe.stderr); + } + + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_len = try tmp.dir.realPathFile(testing.io, ".", &path_buf); + const dir = path_buf[0..dir_len]; + + const file_a = + \\#!srfv1 + \\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA + \\ + ; + try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio.srf", .data = file_a }); + + try gitInTestRepo(allocator, dir, &.{ "init", "-q" }); + try gitInTestRepo(allocator, dir, &.{ "config", "user.email", "test@example.com" }); + try gitInTestRepo(allocator, dir, &.{ "config", "user.name", "Test" }); + try gitInTestRepo(allocator, dir, &.{ "config", "commit.gpgsign", "false" }); + try gitInTestRepo(allocator, dir, &.{ "add", "portfolio.srf" }); + try gitInTestRepo(allocator, dir, &.{ "commit", "-q", "-m", "initial" }); + + // Capture commit 1 SHA before adding file_b. + const result = try std.process.run(allocator, testing.io, .{ + .argv = &.{ "git", "-C", dir, "rev-parse", "HEAD" }, + .stdout_limit = .limited(1024), + }); + defer allocator.free(result.stdout); + defer allocator.free(result.stderr); + const sha = std.mem.trim(u8, result.stdout, " \t\r\n"); + const sha_owned = try allocator.dupe(u8, sha); + defer allocator.free(sha_owned); + + // Add and commit a second portfolio file. + const file_b = + \\#!srfv1 + \\symbol::MSFT,shares:num:5,open_date::2024-02-01,open_price:num:300,account::Sample Roth + \\ + ; + try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio_other.srf", .data = file_b }); + try gitInTestRepo(allocator, dir, &.{ "add", "portfolio_other.srf" }); + try gitInTestRepo(allocator, dir, &.{ "commit", "-q", "-m", "add second" }); + + const p1 = try std.fs.path.join(allocator, &.{ dir, "portfolio.srf" }); + defer allocator.free(p1); + const p2 = try std.fs.path.join(allocator, &.{ dir, "portfolio_other.srf" }); + defer allocator.free(p2); + const paths = [_][]const u8{ p1, p2 }; + + // Load at commit 1 — second file didn't exist yet. Expect + // only AAPL from portfolio.srf, no error. + var loaded = loadPortfolioFromPathsAtRev(testing.io, allocator, &paths, sha_owned, zfin.Date.fromYmd(2026, 5, 23)) orelse { + try testing.expect(false); + return; + }; + defer loaded.deinit(allocator); + + try testing.expectEqual(@as(usize, 1), loaded.portfolio.lots.len); + try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol); +}