track multiple portfolio files in past revisions as well
All checks were successful
Generic zig build / build (push) Successful in 2m14s
Generic zig build / deploy (push) Successful in 19s

This commit is contained in:
Emil Lerch 2026-05-24 09:44:35 -07:00
parent e9b61ba94c
commit b9d2ee4bd3
Signed by: lobo
GPG key ID: A7B62D657EF764F8
7 changed files with 724 additions and 182 deletions

View file

@ -341,9 +341,9 @@ Ask the user instead.**
add it as a new top-level section; don't leave the parent entry around
as a historical wrapper.
- **`REPORT.md` is untracked on purpose.** It's a personal workflow doc
living in the repo root only until it moves to `~/finance`. Edit it
freely when asked; don't treat it as part of the repo surface. Don't
mention it in commit messages for unrelated work.
living in the repo root only until it moves to the user's portfolio
directory. Edit it freely when asked; don't treat it as part of the
repo surface. Don't mention it in commit messages for unrelated work.
### Lint warnings — there are no "pre-existing" warnings
@ -465,7 +465,7 @@ across the codebase so search-and-replace stays trivial):
`smpl_1234`, `smpl-ira-1234`
- Account numbers: `1234`, `5678`, `9012`, `3456`, `7890`, or
alphanumeric like `Z123`, `Z111`, `Z222`. Do not use real
trailing-digit values from `~/finance/`.
trailing-digit values from the user's actual accounts file.
- Portfolio file names: `portfolio_other.srf`, never
`portfolio_<real-name>.srf`.
- Schwab/Fidelity-style: `Schwab Trust`, `Inherited IRA`, `Roth
@ -474,23 +474,22 @@ across the codebase so search-and-replace stays trivial):
**Things that are NEVER OK in source:**
- Real first names of family members. (No `Emil`, `Elizabeth`,
`Kelly`, `Mom`, `Dad`, etc.)
- Real account-number trailing digits used in
`~/finance/accounts.srf` (e.g. `6135`, `3522`, `7891`, `716`,
`901`, `503`, `311`, `152`, `118`, `Z30619248`,
`229948882`, etc.). Any number that came from a real
brokerage entry is PII.
- Real portfolio filenames like `portfolio_mom.srf`,
`portfolio_<name>.srf`.
- Composite identifiers that combine the above (`Mom Roth IRA`,
`Joint trust ...716`, etc.).
- Real first names of family members.
- Real account-number trailing digits used in the user's actual
accounts file. Any number that came from a real brokerage entry
is PII, regardless of length or format.
- Real portfolio filenames matching `portfolio_<real-name>.srf`.
- Composite identifiers that combine real names and real
account-number digits.
- The user's actual portfolio directory path (leaks filesystem
layout) — refer generically to "the portfolio's `history/`
directory" or similar.
**Workflow rule when adding a test based on a real-world
scenario:**
1. Reproduce the bug locally with real data (in `~/finance/`,
never staged).
1. Reproduce the bug locally with real data (the user's portfolio
directory; never staged).
2. Write the test using **placeholder names and numbers** that
preserve the structural shape of the bug (same string
lengths, same pattern of digits-vs-letters, same separator
@ -503,24 +502,35 @@ scenario:**
**Workflow rule when finding existing PII:**
If you grep for the placeholder vocabulary while working in any
file and find a real name or number that snuck in, fix it in
the same change. Don't add to TODO; PII removal is never
optional, and it never lands in a separate commit unless the
user explicitly asks.
If you grep for PII while working in any file and find a real
name or number that snuck in, fix it in the same change. Don't
add to TODO; PII removal is never optional, and it never lands
in a separate commit unless the user explicitly asks.
**One-line grep that should ALWAYS return zero non-`ie_data.csv`
hits before committing:**
**Pre-commit grep recipe.** Read the user's actual accounts
metadata locally (NOT staged, NOT in this file), extract the
identifying tokens, and grep `src/` for any of them. Conceptual
shape:
```
grep -rn "\bMom\b\|Elizabeth\|Joint trust\|portfolio_mom\|\bEmil\b\|Fidelity Emil\|6135\|Z30619248" src/ \
| grep -v ie_data.csv
# Build the alternation FROM the user's accounts file at runtime —
# do NOT hardcode the values into source-tracked tooling.
local_pii_tokens=$(awk -F: '...' "$ZFIN_HOME/accounts.srf" ...)
grep -rn -E "$local_pii_tokens" src/ | grep -v ie_data.csv
```
(Update the alternation as new real-world identifiers come up.
The `ie_data.csv` exclusion is because the Shiller dataset
contains coincidental numeric matches in historical-year fields
that aren't PII.)
The grep should always return zero non-`ie_data.csv` hits before
committing. The `ie_data.csv` exclusion is because the Shiller
dataset contains coincidental numeric matches in historical-year
fields that aren't PII.
If you're uncertain whether something is PII, **ask before
committing.** PII can be surgically removed from a working
tree, but once it's in `git log` it's effectively permanent.
**That includes this file** — never put real identifiers in
AGENTS.md or any other tracked file as "examples of what to look
for." The placeholder vocabulary above is the only safe way to
illustrate the patterns.
If you're uncertain whether something is PII, **ask before
committing.** PII can be surgically removed from a working

View file

@ -1003,7 +1003,8 @@ test "parsePaste: trailing cash section emits a cash position" {
test "parsePaste: cash section absent is a no-op" {
// When the user only pastes the positions table (no cash
// section), parsePaste returns just the positions. Regression
// for the original 3522.txt-style paste shape.
// for the simpler positions-only paste shape (no trailing
// cash table).
const allocator = testing.allocator;
const data =
"GSLC , popup\n" ++

View file

@ -274,6 +274,14 @@ pub const RunCtx = struct {
/// portfolio file. Returns the default pattern's first match
/// when -p is not set; falls through to a literal default if
/// nothing matched.
///
/// **Do NOT use this for portfolio CONTENT loading.** Portfolio
/// content must come from the multi-file glob via
/// `cli.loadPortfolio` (live) or
/// `portfolio_loader.loadPortfolioFromPathsAtRev` (git
/// historical). This singular helper is for choosing ONE
/// concrete path to derive sibling files from never for
/// reading lots out of.
pub fn resolvePortfolioPath(self: *RunCtx) ResolvedPath {
var paths = self.resolvePortfolioPaths() catch {
return .{ .path = zfin.Config.default_portfolio_filename, .resolved = null };

View file

@ -2,10 +2,17 @@
//!
//! Flow:
//! 1. Locate portfolio.srf via `config.resolveUserFile` (or -p).
//! The first resolved file's directory determines the
//! `history/` location (sibling-of-first convention).
//! 2. Derive `history/` dir as `dirname(portfolio.srf)/history/`.
//! 3. Load portfolio composition. Normally from the working-copy
//! file; with `--as-of`, from git history at or before the
//! requested date (falling back to working copy if pre-git).
//! 3. Load portfolio composition as the **union of all matched
//! portfolio files** (multi-file glob support). Normally from
//! the working tree via `cli.loadPortfolio`; with `--as-of`,
//! from git history at the repo-wide latest sha the
//! requested date via `loadPortfolioFromPathsAtRev`. Files
//! that didn't exist at that sha are silently skipped the
//! union just doesn't include those lots. Falls back to
//! working copy if git is unavailable.
//! 4. Refresh the candle cache via `cli.loadPortfolioPrices`
//! (skipped under `--as-of` past candles don't change).
//! 5. Compute `as_of_date`: explicit `--as-of` wins; otherwise mode
@ -17,6 +24,11 @@
//! `--force` wasn't passed, skip (exit 0, stderr message).
//! 8. Build the snapshot records and write them atomically.
//!
//! The snapshot file itself is a SINGLE union'd record set
//! consumers (compare, projections --vs, TUI history tab) read one
//! `<date>-portfolio.srf` per date and don't need to know how many
//! source files contributed to it.
//!
//! Flags:
//! --force overwrite existing snapshot for as_of_date
//! --dry-run compute + print to stdout, do not write
@ -40,6 +52,7 @@ const portfolio_mod = @import("../models/portfolio.zig");
const Date = @import("../Date.zig");
const model = @import("../models/snapshot.zig");
const git = @import("../git.zig");
const portfolio_loader = @import("../portfolio_loader.zig");
// Re-export record types so callers that reach `commands/snapshot.zig`
// (tests, mostly) still see the familiar names. New code should prefer
@ -150,6 +163,7 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
const out = ctx.out;
const color = ctx.color;
const now_s = ctx.now_s;
const today = ctx.today;
const force = parsed.force;
const dry_run = parsed.dry_run;
@ -160,22 +174,24 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
defer pf.deinit(allocator);
const portfolio_path = pf.path;
// Load portfolio bytes. In normal (no --as-of) mode this is the
// current working-copy of portfolio.srf. With --as-of, we first try
// to retrieve the portfolio state from git history at or before the
// target date that gives accurate composition for past snapshots.
// If git lookup fails (portfolio not tracked, no commits before the
// date, git unavailable), we warn and fall back to the working copy,
// which at least approximates "positions the user currently holds"
// and is better than erroring out.
const pf_data = try loadPortfolioAtDate(io, allocator, portfolio_path, as_of_override);
defer allocator.free(pf_data);
var portfolio = zfin.cache.deserializePortfolio(allocator, pf_data) catch {
cli.stderrPrint(io, "Error parsing portfolio file.\n");
return error.WriteFailed;
};
defer portfolio.deinit();
// Load portfolio. In normal (no --as-of) mode this is the
// current working-copy union of all matched portfolio files.
// With --as-of, we first try to retrieve the portfolio state
// from git history at or before the target date that gives
// accurate composition for past snapshots. If git lookup fails
// (portfolio not tracked, no commits before the date, git
// unavailable), we warn and fall back to the working-copy
// union, which at least approximates "positions the user
// currently holds" and is better than erroring out.
//
// `portfolio_path` (singular, first-resolved) is used only for
// sibling-derivation (history dir, snapshot file path); the
// portfolio CONTENT comes from the multi-file glob.
var loaded = try loadPortfolioForSnapshot(ctx, today, as_of_override);
defer loaded.deinit(allocator);
var portfolio = loaded.portfolio;
// We don't deinit `portfolio` separately `loaded.deinit`
// handles it.
if (portfolio.lots.len == 0) {
cli.stderrPrint(io, "Portfolio is empty; nothing to snapshot.\n");
@ -393,80 +409,88 @@ pub fn deriveSnapshotPath(
/// file" — the file's current state IS its mtime state. A clean exit
/// lets bulk-backfill loops keep moving.
///
/// Caller owns returned bytes.
fn loadPortfolioAtDate(
io: std.Io,
allocator: std.mem.Allocator,
portfolio_path: []const u8,
/// Caller owns the returned `LoadedPortfolio`.
///
/// In normal (no `--as-of`) mode this loads the working-copy union
/// of all matched portfolio files via `cli.loadPortfolio`. With
/// `--as-of`, it discovers the repo-wide latest sha at or before
/// the target date (`git.shaAtOrBefore`) and reads each file in
/// the glob at that sha (`portfolio_loader.loadPortfolioFromPathsAtRev`).
/// Files that didn't exist at that sha are silently skipped the
/// union just doesn't include those lots, which is correct for
/// "snapshot of state on this date."
///
/// On git lookup failure (not in a repo, no commit before the
/// target, etc.), warns and falls back to the working-copy union
/// better to capture today's state than fail entirely.
fn loadPortfolioForSnapshot(
ctx: *framework.RunCtx,
today: Date,
as_of: ?Date,
) ![]const u8 {
) !portfolio_loader.LoadedPortfolio {
const io = ctx.io;
const allocator = ctx.allocator;
const target = as_of orelse {
// Normal mode just read the file.
return std.Io.Dir.cwd().readFileAlloc(io, portfolio_path, allocator, .limited(10 * 1024 * 1024)) catch |err| {
cli.stderrPrint(io, "Error reading portfolio file: ");
cli.stderrPrint(io, @errorName(err));
cli.stderrPrint(io, "\n");
return err;
};
// Normal mode load working-copy union via the shared
// multi-file loader. `today` is used as the as_of for
// position computation.
return cli.loadPortfolio(ctx, today) orelse return error.WriteFailed;
};
// Try git first.
if (loadPortfolioFromGit(io, allocator, portfolio_path, target)) |bytes| return bytes else |err| switch (err) {
error.NotInGitRepo, error.GitUnavailable, error.PathMissingInRev, error.UnknownRevision, error.NoCommitBeforeDate => {
// Fall through to working-copy fallback below.
var buf: [256]u8 = undefined;
const msg = std.fmt.bufPrint(
&buf,
"warning: no git history for portfolio at {f}; using working copy as approximation\n",
.{target},
) catch "warning: no git history for portfolio at requested date\n";
cli.stderrPrint(io, msg);
// --as-of mode: find the repo-wide sha at or before target.
// Need a single concrete path to discover the repo from; use
// the first resolved portfolio path.
const pf = ctx.resolvePortfolioPath();
defer pf.deinit(allocator);
const portfolio_path = pf.path;
const info = git.findRepo(io, allocator, portfolio_path) catch |err| switch (err) {
error.NotInGitRepo, error.GitUnavailable => {
warnGitFallback(io, target, "no git repo");
return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed;
},
else => |e| return e,
}
return std.Io.Dir.cwd().readFileAlloc(io, portfolio_path, allocator, .limited(10 * 1024 * 1024)) catch |err| {
cli.stderrPrint(io, "Error reading portfolio file: ");
cli.stderrPrint(io, @errorName(err));
cli.stderrPrint(io, "\n");
return err;
else => return err,
};
}
/// `git show`-based retrieval of portfolio bytes at or before `target`.
/// Returns the raw bytes (caller owns) or an error classifying the
/// failure mode so `loadPortfolioAtDate` can decide whether to fall
/// back.
fn loadPortfolioFromGit(
io: std.Io,
allocator: std.mem.Allocator,
portfolio_path: []const u8,
target: Date,
) ![]const u8 {
const info = try git.findRepo(io, allocator, portfolio_path);
defer allocator.free(info.root);
defer allocator.free(info.rel_path);
// List all commits that touched this path, newest-first.
const commits = try git.listCommitsTouching(io, allocator, info.root, info.rel_path, null);
defer git.freeCommitTouches(allocator, commits);
if (commits.len == 0) return error.PathMissingInRev;
// Find the latest commit whose committer timestamp falls on or before
// the end of `target` day (23:59:59 UTC). The list is newest-first
// per git log's default order, so we scan linearly.
const target_end_ts = target.toEpoch() + std.time.s_per_day - 1;
const sha: ?[]const u8 = blk: {
for (commits) |c| {
if (c.timestamp <= target_end_ts) break :blk c.commit;
}
break :blk null;
var date_buf: [10]u8 = undefined;
const date_str = std.fmt.bufPrint(&date_buf, "{f}", .{target}) catch "????-??-??";
const sha_opt = git.shaAtOrBefore(io, allocator, info.root, date_str) catch |err| switch (err) {
error.GitUnavailable, error.GitLogFailed => {
warnGitFallback(io, target, "git lookup failed");
return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed;
},
else => return err,
};
const sha = sha_opt orelse {
warnGitFallback(io, target, "no commit at or before target date");
return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed;
};
defer allocator.free(sha);
const chosen = sha orelse return error.NoCommitBeforeDate;
return try git.show(io, allocator, info.root, chosen, info.rel_path);
// Resolve the full glob and load each file at `sha`.
var resolved = ctx.resolvePortfolioPaths() catch {
warnGitFallback(io, target, "could not resolve portfolio glob");
return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed;
};
defer resolved.deinit();
return portfolio_loader.loadPortfolioFromPathsAtRev(io, allocator, resolved.paths, sha, target) orelse {
warnGitFallback(io, target, "could not load portfolio at rev");
return cli.loadPortfolio(ctx, target) orelse return error.WriteFailed;
};
}
fn warnGitFallback(io: std.Io, target: Date, reason: []const u8) void {
var buf: [256]u8 = undefined;
const msg = std.fmt.bufPrint(
&buf,
"warning: {s} for portfolio at {f}; using working copy as approximation\n",
.{ reason, target },
) catch "warning: git history unavailable; using working copy as approximation\n";
cli.stderrPrint(io, msg);
}
// Quote-date / as_of_date helpers

View file

@ -1,6 +1,6 @@
//! Imported portfolio history values from a manually-curated spreadsheet.
//!
//! Pre-dates `~/finance/history/` snapshot capture. Contains weekly
//! Pre-dates the `history/` snapshot capture. Contains weekly
//! `(date, liquid, expected_return, projected_retirement)` tuples
//! transcribed from a third-party spreadsheet (the "K+E Funds"
//! workbook for the primary portfolio, plus optional sibling
@ -11,7 +11,7 @@
//! 1. The user exports the spreadsheet to CSV via `ssconvert -S`.
//! 2. `tools/import_values.zig` (a one-shot Zig program) consumes
//! the CSV and emits an `imported_values.srf` next to the
//! repo's `~/finance/history/` snapshot directory.
//! portfolio's `history/` snapshot directory.
//! 3. This module loads that SRF for downstream consumers
//! (`zfin milestones`, projection overlay, forecast-vs-actual).
//!

View file

@ -399,6 +399,52 @@ pub fn commitAtOrBeforeDate(
return try allocator.dupe(u8, trimmed);
}
/// Repo-wide variant of `commitAtOrBeforeDate`. Returns the SHA of
/// the latest commit on HEAD whose committer-date is `date_iso`
/// 23:59:59 local, regardless of which paths the commit touched.
/// Returns null when no such commit exists.
///
/// Used by `zfin snapshot --as-of` to pick a single repo-wide
/// "as of date" reference; each portfolio file in the multi-file
/// glob is then read at that one SHA via `git show`. This produces
/// a coherent point-in-time snapshot files that didn't exist at
/// that SHA (`error.PathMissingInRev`) are treated as absent
/// rather than as errors.
pub fn shaAtOrBefore(
io: std.Io,
allocator: std.mem.Allocator,
root: []const u8,
date_iso: []const u8,
) Error!?[]const u8 {
// Same end-of-day pinning as `commitAtOrBeforeDate` (see that
// function's comment). No `-- <path>` filter we want the
// repo-wide latest.
const until_arg = try std.fmt.allocPrint(allocator, "--until={s} 23:59:59", .{date_iso});
defer allocator.free(until_arg);
const result = std.process.run(allocator, io, .{
.argv = &.{
"git", "-C", root,
"log", "-1", "--format=%H",
"HEAD", until_arg,
},
.stdout_limit = .limited(64 * 1024),
}) catch return error.GitUnavailable;
defer allocator.free(result.stdout);
defer allocator.free(result.stderr);
switch (result.term) {
.exited => |code| if (code != 0) return error.GitLogFailed,
else => return error.GitLogFailed,
}
const trimmed = std.mem.trim(u8, result.stdout, " \t\r\n");
if (trimmed.len == 0) return null;
if (trimmed.len < 40) return error.GitLogFailed;
for (trimmed) |c| if (!std.ascii.isHex(c)) return error.GitLogFailed;
return try allocator.dupe(u8, trimmed);
}
/// Get the committer-date (Unix timestamp, seconds since epoch) of
/// a given ref. Accepts anything `git log -1 --format=%ct <ref>`
/// accepts: SHAs, HEAD, HEAD~N. Used by the contributions pipeline
@ -521,7 +567,8 @@ fn resolveSpec(io: std.Io, arena: std.mem.Allocator, repo: RepoInfo, spec: Commi
.git_ref => |r| r,
.date_at_or_before => |d| blk: {
var buf: [10]u8 = undefined;
const date_str = std.fmt.bufPrint(&buf, "{f}", .{d}) catch unreachable;
// SAFETY: 10-byte buffer is exactly the size of "YYYY-MM-DD".
const date_str = std.fmt.bufPrint(&buf, "{f}", .{d}) catch buf[0..];
const sha = (try commitAtOrBeforeDate(io, arena, repo.root, repo.rel_path, date_str)) orelse
return error.NoCommitAtOrBefore;
break :blk sha;
@ -655,6 +702,34 @@ test "commitAtOrBeforeDate: --until=DATE covers end of day, not current time-of-
if (sha_opt) |s| allocator.free(s);
}
test "shaAtOrBefore returns a SHA for a past date in the ambient repo" {
// Repo-wide variant of `commitAtOrBeforeDate` finds the latest
// commit on HEAD the given date, regardless of paths touched.
// Same future-date trick as the path-scoped variant: a date well
// beyond now should always return the tip of history.
const allocator = std.testing.allocator;
const info = findRepo(std.testing.io, allocator, "build.zig") catch return;
defer allocator.free(info.root);
defer allocator.free(info.rel_path);
const sha_opt = shaAtOrBefore(std.testing.io, allocator, info.root, "2099-01-01") catch return;
try std.testing.expect(sha_opt != null);
const sha = sha_opt.?;
defer allocator.free(sha);
try std.testing.expect(sha.len == 40 or sha.len == 64);
for (sha) |c| try std.testing.expect(std.ascii.isHex(c));
}
test "shaAtOrBefore returns null for date before repo existed" {
const allocator = std.testing.allocator;
const info = findRepo(std.testing.io, allocator, "build.zig") catch return;
defer allocator.free(info.root);
defer allocator.free(info.rel_path);
const sha_opt = shaAtOrBefore(std.testing.io, allocator, info.root, "1970-01-02") catch return;
try std.testing.expect(sha_opt == null);
}
test "resolveCommitRange: legacy clean → HEAD~1..HEAD" {
var arena_state = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_state.deinit();

View file

@ -36,6 +36,19 @@
//! the TUI's reload-button path (re-uses the original resolved
//! path slice without re-globbing).
//!
//! - `loadPortfolioFromPathsAtRev(io, alloc, paths, rev, as_of)`
//! git-historical variant: read each file at `rev` via
//! `git.show` instead of from the working tree. Files that don't
//! exist at the rev are silently skipped (the union just doesn't
//! include those lots). Used by `zfin snapshot --as-of` to
//! capture point-in-time portfolio state across a multi-file
//! glob from git history.
//!
//! All three loaders share a single private `loadFromBytes` core
//! that does the deserialize + union-merge + position-compute +
//! symbol-extract pass. Tests target `loadFromBytes` directly with
//! synthetic byte literals to avoid filesystem I/O.
//!
//! - `PortfolioData` + `buildPortfolioData(...)` second-stage
//! pipeline: turn a `LoadedPortfolio` (or its parts) plus a
//! `prices` map into a `PortfolioSummary` with allocations,
@ -45,6 +58,7 @@ const std = @import("std");
const zfin = @import("root.zig");
const framework = @import("commands/framework.zig");
const stderr = @import("stderr.zig");
const git = @import("git.zig");
// Portfolio loading
@ -178,6 +192,119 @@ pub fn loadPortfolioFromPaths(io: std.Io, allocator: std.mem.Allocator, paths: [
return loadFromPaths(io, allocator, paths_owned, null, as_of);
}
/// Like `loadPortfolioFromPaths`, but reads each file from a git
/// revision instead of the working tree. The result is the union
/// of all files at that rev, deserialized + union-merged the same
/// way the live loader does.
///
/// Files that don't exist at the requested rev (e.g.
/// `portfolio_other.srf` was added later than `rev`) are silently
/// skipped the union just doesn't include those lots. This is the
/// right behavior for `zfin snapshot --as-of <past-date>` against a
/// portfolio that's been split into multiple files over time.
///
/// All `paths` must live within the same git repository as the
/// first path. Repository discovery uses `git.findRepo` on the
/// first path; the rest derive their rel-paths by trimming the
/// repo-root prefix.
///
/// Returns null on:
/// - empty paths slice (no portfolio file to load);
/// - first path not in any git repo;
/// - any deserialize / merge / position-compute failure (same as
/// the working-tree loader).
///
/// `git.show` failures other than `PathMissingInRev` (e.g.
/// `UnknownRevision`) propagate as null; the function prints a
/// stderr message describing which file/rev failed.
pub fn loadPortfolioFromPathsAtRev(
io: std.Io,
allocator: std.mem.Allocator,
paths: []const []const u8,
rev: []const u8,
as_of: zfin.Date,
) ?LoadedPortfolio {
if (paths.len == 0) {
stderr.print(io, "Error: No portfolio file found\n");
return null;
}
// Discover the repo from the first path. All other paths are
// assumed to live in the same repo (typical for sibling
// portfolio files in the same directory); we derive their
// rel-paths below.
const info = git.findRepo(io, allocator, paths[0]) catch {
var msg_buf: [512]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, "Error: portfolio path not in a git repo: {s}\n", .{paths[0]}) catch "Error: portfolio path not in a git repo\n";
stderr.print(io, msg);
return null;
};
defer allocator.free(info.root);
defer allocator.free(info.rel_path);
// Read each file at `rev`. On `PathMissingInRev` we substitute
// empty bytes; `loadFromBytes` knows to treat that as "file
// absent at this rev, skip without parsing." Any other git
// error is fatal.
var datas: std.ArrayList([]const u8) = .empty;
var datas_consumed = false;
defer if (!datas_consumed) {
for (datas.items) |d| allocator.free(d);
datas.deinit(allocator);
};
for (paths) |abs_path| {
// Derive rel_path from this file's absolute path. The first
// file's rel_path was computed by findRepo; for the rest we
// strip the repo root prefix manually. realpath ensures
// canonicalization (the user could pass a non-canonical
// path).
const real = std.Io.Dir.cwd().realPathFileAlloc(io, abs_path, allocator) catch {
var msg_buf: [512]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, "Error: cannot resolve real path for: {s}\n", .{abs_path}) catch "Error: cannot resolve real path\n";
stderr.print(io, msg);
return null;
};
defer allocator.free(real);
const rel = if (std.mem.startsWith(u8, real, info.root) and real.len > info.root.len)
std.mem.trimStart(u8, real[info.root.len..], "/")
else
std.fs.path.basename(real);
const data = git.show(io, allocator, info.root, rev, rel) catch |err| switch (err) {
error.PathMissingInRev => empty_blk: {
// File didn't exist at this rev. Substitute empty
// bytes; loadFromBytes skips empty entries.
break :empty_blk allocator.dupe(u8, "") catch return null;
},
else => {
var msg_buf: [512]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, "Error: git show {s}:{s}: {t}\n", .{ rev, rel, err }) catch "Error: git show failed\n";
stderr.print(io, msg);
return null;
},
};
datas.append(allocator, data) catch {
allocator.free(data);
return null;
};
}
const datas_owned = datas.toOwnedSlice(allocator) catch return null;
datas_consumed = true;
// Hand off ownership of the slice we just dupe'd from the
// caller. `loadFromBytes` frees both `paths_owned` and
// `datas_owned` on any failure.
const paths_owned = allocator.dupe([]const u8, paths) catch {
for (datas_owned) |d| allocator.free(d);
allocator.free(datas_owned);
return null;
};
return loadFromBytes(io, allocator, paths_owned, null, datas_owned, as_of);
}
/// Internal: load+merge given a pre-resolved paths slice. The slice
/// `paths_owned` is taken (will be freed by `LoadedPortfolio.deinit`).
/// `resolved_paths_opt` is the optional `Config.ResolvedPaths` to
@ -190,21 +317,8 @@ fn loadFromPaths(
resolved_paths_opt: ?zfin.Config.ResolvedPaths,
as_of: zfin.Date,
) ?LoadedPortfolio {
// On any error after this point we must free the slice we just
// took ownership of, plus deinit the `resolved_paths_opt` so the
// path strings aren't leaked.
var error_cleanup_armed = true;
defer if (error_cleanup_armed) {
allocator.free(paths_owned);
if (resolved_paths_opt) |rp| rp.deinit();
};
// Read every file up front; bail on first error.
var file_datas: std.ArrayList([]const u8) = .empty;
errdefer {
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
}
for (paths_owned) |p| {
const data = std.Io.Dir.cwd().readFileAlloc(io, p, allocator, .limited(10 * 1024 * 1024)) catch {
var msg_buf: [512]u8 = undefined;
@ -212,64 +326,109 @@ fn loadFromPaths(
stderr.print(io, msg);
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
allocator.free(paths_owned);
if (resolved_paths_opt) |rp| rp.deinit();
return null;
};
file_datas.append(allocator, data) catch {
allocator.free(data);
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
allocator.free(paths_owned);
if (resolved_paths_opt) |rp| rp.deinit();
return null;
};
}
const file_datas_owned = file_datas.toOwnedSlice(allocator) catch {
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
allocator.free(paths_owned);
if (resolved_paths_opt) |rp| rp.deinit();
return null;
};
// Hand off to the bytes-only path. `loadFromBytes` takes
// ownership of all four slices and frees them on any error.
return loadFromBytes(io, allocator, paths_owned, resolved_paths_opt, file_datas_owned, as_of);
}
/// Bytes-only union-merge core. Takes ownership of `paths_owned`,
/// `resolved_paths_opt` (if non-null), and `file_datas_owned` (each
/// element must be `allocator`-owned). Frees them on any error.
///
/// Tests use this directly with synthetic byte literals; production
/// callers come through `loadFromPaths` (working-tree reads) or
/// `loadPortfolioFromPathsAtRev` (git-historical reads). All three
/// share this single deserialize-merge-positions-syms pass.
///
/// `paths_owned` and `file_datas_owned` must have the same length;
/// each `file_datas_owned[i]` is the bytes loaded from
/// `paths_owned[i]`. Path strings are used only for diagnostics
/// (which file failed to parse) and in the returned struct.
fn loadFromBytes(
io: std.Io,
allocator: std.mem.Allocator,
paths_owned: []const []const u8,
resolved_paths_opt: ?zfin.Config.ResolvedPaths,
file_datas_owned: []const []const u8,
as_of: zfin.Date,
) ?LoadedPortfolio {
std.debug.assert(paths_owned.len == file_datas_owned.len);
// Cleanup is staged: lots live on `merged` until
// `toOwnedSlice`, then move into `combined` (which has its
// own deinit). The two cleanup paths are mutually exclusive,
// tracked by `lots_owner`. On success, neither fires.
var merged: std.ArrayList(zfin.Lot) = .empty;
var combined: zfin.Portfolio = .{ .lots = &.{}, .allocator = allocator };
const LotsOwner = enum { merged_list, combined_struct, none };
var lots_owner: LotsOwner = .merged_list;
var success = false;
defer if (!success) {
switch (lots_owner) {
.merged_list => {
for (merged.items) |lot| {
allocator.free(lot.symbol);
if (lot.note) |n| allocator.free(n);
if (lot.account) |a| allocator.free(a);
if (lot.ticker) |t| allocator.free(t);
if (lot.underlying) |u| allocator.free(u);
}
merged.deinit(allocator);
},
.combined_struct => combined.deinit(),
.none => {},
}
for (file_datas_owned) |d| allocator.free(d);
allocator.free(file_datas_owned);
allocator.free(paths_owned);
if (resolved_paths_opt) |rp| rp.deinit();
};
// Deserialize each into an owned Portfolio, then merge their
// lot slices into a single combined slice. We can't simply
// concat the underlying slices because each Portfolio expects
// to free its own lots in `deinit()`; instead, we steal each
// Portfolio's lots[] (string fields are already dupe'd into
// `allocator`) and free only the empty Portfolio struct.
var merged: std.ArrayList(zfin.Lot) = .empty;
errdefer {
for (merged.items) |lot| {
allocator.free(lot.symbol);
if (lot.note) |n| allocator.free(n);
if (lot.account) |a| allocator.free(a);
if (lot.ticker) |t| allocator.free(t);
if (lot.underlying) |u| allocator.free(u);
}
merged.deinit(allocator);
}
for (file_datas_owned, 0..) |data, idx| {
// Empty bytes are a legitimate "file absent" signal
// `loadPortfolioFromPathsAtRev` uses this to indicate a
// file that didn't exist at the requested git rev. Skip
// without trying to parse.
if (data.len == 0) continue;
for (file_datas.items, 0..) |data, idx| {
var portfolio = zfin.cache.deserializePortfolio(allocator, data) catch {
var msg_buf: [512]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, "Error: Cannot parse portfolio file: {s}\n", .{paths_owned[idx]}) catch "Error: Cannot parse portfolio file\n";
stderr.print(io, msg);
for (merged.items) |lot| {
allocator.free(lot.symbol);
if (lot.note) |n| allocator.free(n);
if (lot.account) |a| allocator.free(a);
if (lot.ticker) |t| allocator.free(t);
if (lot.underlying) |u| allocator.free(u);
}
merged.deinit(allocator);
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
return null;
};
for (portfolio.lots) |lot| {
merged.append(allocator, lot) catch {
portfolio.deinit();
for (merged.items) |existing| {
allocator.free(existing.symbol);
if (existing.note) |n| allocator.free(n);
if (existing.account) |a| allocator.free(a);
if (existing.ticker) |t| allocator.free(t);
if (existing.underlying) |u| allocator.free(u);
}
merged.deinit(allocator);
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
return null;
};
}
@ -278,42 +437,25 @@ fn loadFromPaths(
allocator.free(portfolio.lots);
}
const merged_slice = merged.toOwnedSlice(allocator) catch {
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
return null;
};
var combined: zfin.Portfolio = .{
const merged_slice = merged.toOwnedSlice(allocator) catch return null;
combined = .{
.lots = merged_slice,
.allocator = allocator,
};
lots_owner = .combined_struct;
const positions = combined.positions(as_of, allocator) catch {
combined.deinit();
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
stderr.print(io, "Error: Cannot compute positions\n");
return null;
};
const syms = combined.stockSymbols(allocator) catch {
allocator.free(positions);
combined.deinit();
for (file_datas.items) |d| allocator.free(d);
file_datas.deinit(allocator);
stderr.print(io, "Error: Cannot get stock symbols\n");
return null;
};
const file_datas_owned = file_datas.toOwnedSlice(allocator) catch {
allocator.free(syms);
allocator.free(positions);
combined.deinit();
return null;
};
error_cleanup_armed = false;
success = true;
return .{
.paths = paths_owned,
.resolved_paths = resolved_paths_opt,
@ -399,3 +541,285 @@ pub fn buildPortfolioData(
.snapshots = snapshots,
};
}
// loadFromBytes tests (in-memory; no disk I/O)
//
// All tests here construct synthetic SRF byte literals and route
// them through `loadFromBytes` directly. Both production callers
// (working-tree `loadFromPaths` and git-historical
// `loadPortfolioFromPathsAtRev`) share this core, so unit-testing
// it covers the union-merge / parse-error / empty-bytes behavior
// without spinning up temp filesystems or git repos.
const testing = std.testing;
/// Test helper: dupe a slice of byte-string literals into the
/// allocator so `loadFromBytes` can free them on its own.
fn dupeBytes(allocator: std.mem.Allocator, datas: []const []const u8) ![]const []const u8 {
var owned = try allocator.alloc([]const u8, datas.len);
errdefer {
for (owned[0..]) |d| allocator.free(d);
allocator.free(owned);
}
var filled: usize = 0;
for (datas, 0..) |d, i| {
owned[i] = try allocator.dupe(u8, d);
filled = i + 1;
}
return owned;
}
test "loadFromBytes: union of two synthetic SRF files" {
const allocator = testing.allocator;
const file_a =
\\#!srfv1
\\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA
\\
;
const file_b =
\\#!srfv1
\\symbol::MSFT,shares:num:5,open_date::2024-02-01,open_price:num:300,account::Sample Roth
\\
;
const paths = try allocator.dupe([]const u8, &.{ "a.srf", "b.srf" });
const datas = try dupeBytes(allocator, &.{ file_a, file_b });
var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse {
try testing.expect(false);
return;
};
defer loaded.deinit(allocator);
try testing.expectEqual(@as(usize, 2), loaded.portfolio.lots.len);
// Lots are appended in file order.
try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol);
try testing.expectEqualStrings("MSFT", loaded.portfolio.lots[1].symbol);
}
test "loadFromBytes: single file with valid contents" {
const allocator = testing.allocator;
const file_a =
\\#!srfv1
\\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA
\\
;
const paths = try allocator.dupe([]const u8, &.{"a.srf"});
const datas = try dupeBytes(allocator, &.{file_a});
var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse {
try testing.expect(false);
return;
};
defer loaded.deinit(allocator);
try testing.expectEqual(@as(usize, 1), loaded.portfolio.lots.len);
try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol);
}
test "loadFromBytes: empty-bytes entry is treated as absent file" {
// Production use case: `loadPortfolioFromPathsAtRev` returns
// empty bytes for a path that didn't exist at the requested
// git rev. The union-merge silently skips it instead of
// failing matches "snapshot at a date before this file
// was added" semantics.
const allocator = testing.allocator;
const file_a =
\\#!srfv1
\\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA
\\
;
const file_b: []const u8 = "";
const paths = try allocator.dupe([]const u8, &.{ "a.srf", "b.srf" });
const datas = try dupeBytes(allocator, &.{ file_a, file_b });
var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse {
try testing.expect(false);
return;
};
defer loaded.deinit(allocator);
try testing.expectEqual(@as(usize, 1), loaded.portfolio.lots.len);
try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol);
}
test "loadFromBytes: all-empty bytes returns empty portfolio" {
// Production use case: snapshot at a rev that predates ALL
// tracked portfolio files. No lots; no error.
const allocator = testing.allocator;
const paths = try allocator.dupe([]const u8, &.{ "a.srf", "b.srf" });
const datas = try dupeBytes(allocator, &.{ "", "" });
var loaded = loadFromBytes(testing.io, allocator, paths, null, datas, zfin.Date.fromYmd(2026, 5, 23)) orelse {
try testing.expect(false);
return;
};
defer loaded.deinit(allocator);
try testing.expectEqual(@as(usize, 0), loaded.portfolio.lots.len);
}
// loadPortfolioFromPathsAtRev tests (disk + git)
//
// These spin up a temp git repo so they exercise the full
// `git.show + skip-on-PathMissingInRev` contract end-to-end.
// Kept minimal (one happy-path, one missing-file case); the
// bulk of the logic is in `loadFromBytes`, covered above.
/// Run a one-shot git command in `cwd` for test setup. Panics on
/// failure these tests can't proceed without a working repo.
fn gitInTestRepo(allocator: std.mem.Allocator, cwd: []const u8, argv: []const []const u8) !void {
const full_argv = try allocator.alloc([]const u8, argv.len + 3);
defer allocator.free(full_argv);
full_argv[0] = "git";
full_argv[1] = "-C";
full_argv[2] = cwd;
@memcpy(full_argv[3..], argv);
const result = try std.process.run(allocator, testing.io, .{
.argv = full_argv,
.stdout_limit = .limited(64 * 1024),
});
defer allocator.free(result.stdout);
defer allocator.free(result.stderr);
switch (result.term) {
.exited => |code| if (code != 0) {
std.debug.print("git command failed (code {d}): {s}\nstderr: {s}\n", .{ code, std.mem.join(allocator, " ", argv) catch "?", result.stderr });
return error.GitFailed;
},
else => return error.GitFailed,
}
}
test "loadPortfolioFromPathsAtRev: union of two committed files at HEAD" {
const allocator = testing.allocator;
// Skip if `git` isn't on PATH (CI sandbox without git).
{
const probe = std.process.run(allocator, testing.io, .{
.argv = &.{ "git", "--version" },
.stdout_limit = .limited(1024),
}) catch return;
defer allocator.free(probe.stdout);
defer allocator.free(probe.stderr);
}
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
var path_buf: [std.fs.max_path_bytes]u8 = undefined;
const dir_len = try tmp.dir.realPathFile(testing.io, ".", &path_buf);
const dir = path_buf[0..dir_len];
const file_a =
\\#!srfv1
\\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA
\\
;
const file_b =
\\#!srfv1
\\symbol::MSFT,shares:num:5,open_date::2024-02-01,open_price:num:300,account::Sample Roth
\\
;
try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio.srf", .data = file_a });
try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio_other.srf", .data = file_b });
try gitInTestRepo(allocator, dir, &.{ "init", "-q" });
try gitInTestRepo(allocator, dir, &.{ "config", "user.email", "test@example.com" });
try gitInTestRepo(allocator, dir, &.{ "config", "user.name", "Test" });
try gitInTestRepo(allocator, dir, &.{ "config", "commit.gpgsign", "false" });
try gitInTestRepo(allocator, dir, &.{ "add", "portfolio.srf", "portfolio_other.srf" });
try gitInTestRepo(allocator, dir, &.{ "commit", "-q", "-m", "initial" });
const p1 = try std.fs.path.join(allocator, &.{ dir, "portfolio.srf" });
defer allocator.free(p1);
const p2 = try std.fs.path.join(allocator, &.{ dir, "portfolio_other.srf" });
defer allocator.free(p2);
const paths = [_][]const u8{ p1, p2 };
var loaded = loadPortfolioFromPathsAtRev(testing.io, allocator, &paths, "HEAD", zfin.Date.fromYmd(2026, 5, 23)) orelse {
try testing.expect(false);
return;
};
defer loaded.deinit(allocator);
try testing.expectEqual(@as(usize, 2), loaded.portfolio.lots.len);
}
test "loadPortfolioFromPathsAtRev: file added later is silently skipped at earlier rev" {
const allocator = testing.allocator;
// Skip if `git` isn't on PATH.
{
const probe = std.process.run(allocator, testing.io, .{
.argv = &.{ "git", "--version" },
.stdout_limit = .limited(1024),
}) catch return;
defer allocator.free(probe.stdout);
defer allocator.free(probe.stderr);
}
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
var path_buf: [std.fs.max_path_bytes]u8 = undefined;
const dir_len = try tmp.dir.realPathFile(testing.io, ".", &path_buf);
const dir = path_buf[0..dir_len];
const file_a =
\\#!srfv1
\\symbol::AAPL,shares:num:10,open_date::2024-01-15,open_price:num:150,account::Sample IRA
\\
;
try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio.srf", .data = file_a });
try gitInTestRepo(allocator, dir, &.{ "init", "-q" });
try gitInTestRepo(allocator, dir, &.{ "config", "user.email", "test@example.com" });
try gitInTestRepo(allocator, dir, &.{ "config", "user.name", "Test" });
try gitInTestRepo(allocator, dir, &.{ "config", "commit.gpgsign", "false" });
try gitInTestRepo(allocator, dir, &.{ "add", "portfolio.srf" });
try gitInTestRepo(allocator, dir, &.{ "commit", "-q", "-m", "initial" });
// Capture commit 1 SHA before adding file_b.
const result = try std.process.run(allocator, testing.io, .{
.argv = &.{ "git", "-C", dir, "rev-parse", "HEAD" },
.stdout_limit = .limited(1024),
});
defer allocator.free(result.stdout);
defer allocator.free(result.stderr);
const sha = std.mem.trim(u8, result.stdout, " \t\r\n");
const sha_owned = try allocator.dupe(u8, sha);
defer allocator.free(sha_owned);
// Add and commit a second portfolio file.
const file_b =
\\#!srfv1
\\symbol::MSFT,shares:num:5,open_date::2024-02-01,open_price:num:300,account::Sample Roth
\\
;
try tmp.dir.writeFile(testing.io, .{ .sub_path = "portfolio_other.srf", .data = file_b });
try gitInTestRepo(allocator, dir, &.{ "add", "portfolio_other.srf" });
try gitInTestRepo(allocator, dir, &.{ "commit", "-q", "-m", "add second" });
const p1 = try std.fs.path.join(allocator, &.{ dir, "portfolio.srf" });
defer allocator.free(p1);
const p2 = try std.fs.path.join(allocator, &.{ dir, "portfolio_other.srf" });
defer allocator.free(p2);
const paths = [_][]const u8{ p1, p2 };
// Load at commit 1 second file didn't exist yet. Expect
// only AAPL from portfolio.srf, no error.
var loaded = loadPortfolioFromPathsAtRev(testing.io, allocator, &paths, sha_owned, zfin.Date.fromYmd(2026, 5, 23)) orelse {
try testing.expect(false);
return;
};
defer loaded.deinit(allocator);
try testing.expectEqual(@as(usize, 1), loaded.portfolio.lots.len);
try testing.expectEqualStrings("AAPL", loaded.portfolio.lots[0].symbol);
}