From ce24878e3b0d191ed009826e267081626cf019bd Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Sat, 23 May 2026 08:09:31 -0700 Subject: [PATCH] import merge capabilities --- TODO.md | 60 ---- src/commands/import.zig | 631 ++++++++++++++++++++++++++++++++++------ 2 files changed, 548 insertions(+), 143 deletions(-) diff --git a/TODO.md b/TODO.md index a804099..90b6f97 100644 --- a/TODO.md +++ b/TODO.md @@ -5,66 +5,6 @@ ordered roughly by priority within each section. Priority labels (`HIGH` / `MEDIUM` / `LOW`) mark items that deserve explicit ranking; unlabeled items are "someday, if the mood strikes." -## `zfin import`: preserve prior `open_date` on re-import — priority MEDIUM - -`zfin import` (PR 3) currently writes every synthesized lot with -`open_date::1970-01-01` — an honest "we don't know" sentinel. -Brokerage holdings CSVs don't carry per-buy dates, so a single -import has no real signal to seed `open_date` from. But once a -portfolio file exists, the *prior* import's lots ARE a signal: a -position that was in the file last week with `open_date::2026-04-15` -should keep that date on the next import, not get rewritten. - -Without this, every re-import zeroes out trailing-return and -ST/LT classifications for positions that have actually been held -since the previous run. Sentinel epoch dodges the "today resets -the clock" version of this bug, but it doesn't fix it — it just -makes the broken state inert. - -### Sketch - -When `import` runs and the target file already exists: - -1. Read and parse the existing portfolio file (cleanly, via - `cache.deserializePortfolio`). -2. Build a lookup: `(symbol, account) → existing Lot`. -3. For each synthesized lot, look up by `(symbol, account)`: - - **Match:** preserve `existing.open_date` and (probably) - `existing.open_price`. The synthesized cost-basis-per-share - is "current avg" not "what I paid for THIS lot," so keeping - the existing open_price is more honest if the prior import - captured it. - - **No match:** new position. Use today as the open_date - (we genuinely don't know but today is the best honest guess - for "first time we saw it"). -4. Closed-lot detection: for each existing lot in the file with - no matching brokerage row, decide what to do — append a - `close_date::,close_price::` pair? Drop - it silently? This is the trickier half; punt to a sub-task - if needed. - -### Match key - -`(symbol, account)` pair. Discussed during PR 3: -- Including `security_type` is overkill (we don't have stock and - cash AAPL in the same account). -- Symbol-only matching across accounts hides real errors (a - position moving accounts is a transfer, not a no-op). -- If the existing file has multiple lots for the same (symbol, - account), preserve the EARLIEST `open_date` — that's the - longest-standing buy, the right anchor for trailing-return math. - -### Driver - -Lossless re-import. Today's epoch sentinel is correct-but- -inert; this PR makes import actually useful for tracking -positions over time without forcing the user to hand-edit -dates after every refresh. - -This also gets us most of the way to "transactions import" -without needing a separate transactions CSV — closed lots fall -out of the symbol-set diff between two consecutive imports. - ## Projections: future enhancements - **Configurable return cap per position — priority MEDIUM.** diff --git a/src/commands/import.zig b/src/commands/import.zig index 2ff3293..ae2e464 100644 --- a/src/commands/import.zig +++ b/src/commands/import.zig @@ -16,37 +16,67 @@ //! //! - `symbol::` from the export //! - `shares::` from the export's quantity -//! - `open_date::` = `1970-01-01` (sentinel, see below) -//! - `open_price::` = `cost_basis / quantity` if both > 0, else +//! - `open_date::` from the prior portfolio's matching lot (see +//! "Re-import merge" below) when present, else `1970-01-01` +//! (sentinel — we don't have a real signal for new positions) +//! - `open_price::` from the prior matching lot when present, +//! else `cost_basis / quantity` if both > 0, else //! `current_value / quantity`, else 0 //! - `account::` resolved via `accounts.srf` from the export's //! account_number; refuse to import if any number is unmapped -//! - `note::` is deliberately unset for now. The natural value -//! would be `"imported "`, but that changes on -//! every re-import and would dirty `git diff` for held -//! positions that didn't actually change. The merge-aware -//! follow-up PR will reintroduce a refresh-date note at the -//! same time it preserves prior `open_date` values across -//! re-imports. +//! - `note::` from the prior matching lot when present (carries +//! the original first-seen import date), else +//! `"imported YYYY-MM-DD"` for newly-introduced +//! positions //! - `security_type::cash` for cash-classified positions //! -//! ### Why the epoch sentinel for `open_date`? +//! ## Re-import merge +//! +//! When the target portfolio file already exists, `import` reads +//! it, builds a `(symbol, account) → Lot` lookup, and uses that +//! to inherit per-position metadata that the brokerage CSV +//! doesn't carry. The merge rules: +//! +//! - **Held positions** (in both prior file and new export): +//! keep `open_date`, `open_price`, and `note` from the prior +//! lot; only `shares` and `security_type` come from the new +//! export. A re-import of an unchanged held position +//! produces byte-identical output, so `git diff` only +//! surfaces actual brokerage changes (lot-size drift, +//! real cost-basis adjustments). +//! - **New positions** (in new export, not in prior): treat +//! as a fresh lot — `open_date::1970-01-01` sentinel, +//! synthesized `open_price`, today-stamped note. The note +//! records "first-seen" rather than "every-time-seen", so +//! it doesn't churn on subsequent imports. +//! - **Closed positions** (in prior, not in new export): +//! silently dropped. `import` is a "replace, don't merge +//! transactions" tool; if you sold AAPL between imports, +//! the new file just stops including AAPL. The git history +//! is the audit trail. +//! - **Multiple lots for same `(symbol, account)`** in the +//! prior file: the EARLIEST `open_date` wins. That's the +//! longest-standing buy and the right anchor for trailing- +//! return math. +//! +//! ### What the merge does NOT preserve +//! +//! Hand-edited fields like `price::`, `price_ratio::`, +//! `ticker::`, or `drip::` on a prior lot get blown away on +//! re-import. If you've manually annotated a managed-account +//! portfolio with such fields, `import` is the wrong tool — +//! either edit by hand or rebuild the annotations after each +//! refresh. +//! +//! ### Why `1970-01-01` (Date.epoch) for new lots? //! //! Brokerage holdings CSVs don't carry per-lot buy dates, so any -//! synthesized `open_date` is a guess. Earlier versions used -//! `today`, which is actively misleading: every re-import resets -//! the clock, blowing away trailing-return and ST/LT -//! classifications for positions that have actually been held for -//! years. Using `1970-01-01` is honest — "we don't know" — and -//! makes a `git diff` between two imports trivial: the only thing -//! that changes for a held position is `shares` and possibly -//! `open_price`, never the date column. -//! -//! Preserving the prior `open_date` from an existing portfolio -//! file (so the first import seeds it and subsequent imports -//! leave it alone) is a tracked follow-up; it requires reading -//! the existing file at import time, which the current -//! "blow-away-and-rewrite" model avoids. +//! synthesized `open_date` for a brand-new position is a guess. +//! Using `today` would be actively misleading because the next +//! import would rewrite it again. Using `1970-01-01` is honest — +//! "we don't know" — and is the merge anchor for the SECOND +//! import's prior-lookup, by which point the user has had a +//! chance to hand-edit the date if they care. //! //! This loses per-buy lot history, which is acceptable for managed //! accounts where we don't track buys/sells anyway. Git serves as @@ -147,16 +177,23 @@ pub const meta: framework.Meta = .{ \\ \\Synthesize a portfolio file from a brokerage positions export. \\Each run REPLACES the target portfolio file with synthetic lots - \\drawn from the export — one lot per (account, symbol) at - \\open_date=1970-01-01 (sentinel; brokerage CSVs don't carry - \\per-buy dates) and open_price=cost-basis-average. + \\drawn from the export — one lot per (account, symbol). \\ \\Designed for managed accounts (direct-indexing baskets, accounts \\you don't track at lot granularity). Per-buy history is lost; - \\git serves as the file-level history. Re-running import does - \\NOT preserve a prior `open_date` (yet — see TODO.md for the - \\merge-aware follow-up); the sentinel makes that re-write - \\inert rather than misleading. + \\git serves as the file-level history. + \\ + \\Re-import merge: when the target file already exists, lots that + \\are still in the new export inherit their prior `open_date`, + \\`open_price`, and `note::` — so trailing-return / ST/LT + \\classifications stay stable across re-imports and `git diff` + \\only flags genuine brokerage changes. Newly-introduced + \\positions get `open_date::1970-01-01` (a "we don't know" + \\sentinel; the next import will treat it as the prior anchor). + \\Lots that disappear from the export are silently dropped — if + \\you sold a position between imports, it just stops appearing. + \\Hand-edited fields (`price::`, `ticker::`, etc.) on prior + \\lots are NOT preserved. \\ \\Required: \\ -p, --portfolio Target portfolio file (must be a single @@ -356,8 +393,42 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void { try wells_fargo.applyAccountToPositions(io, account_map, csv_path, wf_args.account_override, positions); } + // ── Read the existing target file (if any) for merge ────── + // + // When the target file already exists, we treat its lots as + // signal: a position that was in the file last week with + // `open_date::2026-04-15` should keep that date this week, + // even though brokerage CSVs don't carry per-buy dates and + // the synthesizer's default `open_date` is the + // `1970-01-01` sentinel. See TODO history and the module + // doc-block "Re-import merge" section. + // + // Parse failure aborts. A garbage existing file is a weird + // state; silently treating it as fresh would mask the + // problem. The user fixes or deletes the file and retries. + const prior_data = std.Io.Dir.cwd().readFileAlloc(io, target_path, allocator, .limited(10 * 1024 * 1024)) catch null; + defer if (prior_data) |d| allocator.free(d); + + var prior_portfolio_opt: ?zfin.Portfolio = null; + defer if (prior_portfolio_opt) |*p| p.deinit(); + if (prior_data) |data| { + prior_portfolio_opt = cache.deserializePortfolio(allocator, data) catch { + var msg_buf: [512]u8 = undefined; + const msg = std.fmt.bufPrint(&msg_buf, "Error: Cannot parse existing portfolio file: {s}\n", .{target_path}) catch "Error: Cannot parse existing portfolio file\n"; + try cli.stderrPrint(io, msg); + try cli.stderrPrint(io, " Fix or delete the file, then re-run the import.\n"); + return error.WriteFailed; + }; + } + + var prior_lookup_opt: ?PriorLotsLookup = null; + defer if (prior_lookup_opt) |*p| p.deinit(); + if (prior_portfolio_opt) |pf| { + prior_lookup_opt = try PriorLotsLookup.init(allocator, pf.lots); + } + // ── Synthesize lots ─────────────────────────────────────── - const lots = synthesizeLots(io, allocator, positions, account_map, parsed.source, ctx.today) catch |err| switch (err) { + const lots = synthesizeLots(io, allocator, positions, account_map, parsed.source, ctx.today, prior_lookup_opt) catch |err| switch (err) { error.UnmappedAccount => { // synthesizeLots already printed the offending account // numbers to stderr; just propagate as a user-level error. @@ -500,16 +571,116 @@ fn confirmOverwrite(io: std.Io, path: []const u8) !bool { return trimmed.len > 0 and (trimmed[0] == 'y' or trimmed[0] == 'Y'); } +/// Lookup table built from a previously-imported portfolio's lots, +/// keyed by `(symbol, account)`. Used by `synthesizeLots` to +/// preserve the prior `open_date` / `open_price` / `note::` for a +/// position that's still present in the new export. +/// +/// When multiple lots share the same `(symbol, account)` (the +/// merge-aware design accepts this — a hand-edited file might +/// have several lots, or a prior version of import wrote +/// multiple), the EARLIEST `open_date` wins. That's the +/// longest-standing buy and the right anchor for trailing-return +/// math. +/// +/// Backed by a `StringHashMap` keyed by `"\x00"`. +/// The composite-key string is owned by the lookup and freed in +/// `deinit`. Lot pointers are borrowed from the caller's +/// `Portfolio` and remain valid as long as the source portfolio +/// does. +const PriorLotsLookup = struct { + map: std.StringHashMap(*const portfolio_mod.Lot), + allocator: std.mem.Allocator, + + fn init(allocator: std.mem.Allocator, lots: []const portfolio_mod.Lot) !PriorLotsLookup { + var map = std.StringHashMap(*const portfolio_mod.Lot).init(allocator); + errdefer { + var it = map.keyIterator(); + while (it.next()) |k| allocator.free(k.*); + map.deinit(); + } + + for (lots) |*lot| { + // Skip closed lots: they shouldn't anchor a re-import's + // open_date for a position the brokerage shows as held. + // (Today's import doesn't write `close_date`/`close_price` + // anyway, so this is also defensive against hand-edited + // closed lots in the file.) + if (lot.close_date != null) continue; + // Cash lots have no symbol/account-meaningful identity + // for matching across imports — skip. + if (lot.security_type == .cash) continue; + const account = lot.account orelse continue; + + const key = try makeKey(allocator, lot.symbol, account); + const gop = try map.getOrPut(key); + if (gop.found_existing) { + // Duplicate (symbol, account) — keep the EARLIEST + // open_date as the merge anchor. Free the freshly + // built key (the one already in the map stays). + allocator.free(key); + if (lot.open_date.lessThan(gop.value_ptr.*.open_date)) { + gop.value_ptr.* = lot; + } + } else { + gop.value_ptr.* = lot; + } + } + return .{ .map = map, .allocator = allocator }; + } + + fn deinit(self: *PriorLotsLookup) void { + var it = self.map.keyIterator(); + while (it.next()) |k| self.allocator.free(k.*); + self.map.deinit(); + } + + /// Find the prior `Lot` for `(symbol, account)`, returning + /// null when no match exists. Caller must keep the source + /// portfolio alive while using the returned pointer. + fn find(self: PriorLotsLookup, symbol: []const u8, account: []const u8) !?*const portfolio_mod.Lot { + var key_buf: [256]u8 = undefined; + // Fast path: stack-allocate the key. Fall back to heap + // for unusually long symbols/accounts. + const total = symbol.len + 1 + account.len; + if (total <= key_buf.len) { + @memcpy(key_buf[0..symbol.len], symbol); + key_buf[symbol.len] = 0; + @memcpy(key_buf[symbol.len + 1 ..][0..account.len], account); + return self.map.get(key_buf[0..total]); + } + const key = try makeKey(self.allocator, symbol, account); + defer self.allocator.free(key); + return self.map.get(key); + } + + fn makeKey(allocator: std.mem.Allocator, symbol: []const u8, account: []const u8) ![]u8 { + return std.fmt.allocPrint(allocator, "{s}\x00{s}", .{ symbol, account }); + } +}; + /// Synthesize a `Lot` per `BrokeragePosition`. Resolves each /// brokerage account_number to a portfolio account name via /// `account_map`; refuses (with a stderr listing of unmapped /// numbers) if any can't be resolved. /// -/// `today` is currently unused at the lot level (the would-be -/// `note::` stamp is suppressed — see the in-body comment for -/// rationale). The parameter stays in the signature so the -/// merge-aware follow-up PR doesn't have to reshape the call -/// site again when the note comes back. +/// `today` stamps the `note::` field on lots that don't have a +/// matching prior entry (`note::imported YYYY-MM-DD`). +/// Lots that DO match a prior entry inherit that prior lot's +/// note (which carries the original first-seen date), so a +/// re-import of an unchanged held position produces a +/// byte-identical line — the `git diff` only shows genuine +/// brokerage changes. +/// +/// `prior_lookup` (if non-null) carries the lots from the +/// existing target file. For each synthesized lot, we look up +/// `(symbol, account)`: +/// - **Match:** preserve `open_date`, `open_price`, and `note` +/// from the prior lot. Brokerage shares/cost-basis still come +/// from the new export. +/// - **No match:** new position. Use `Date.epoch` for +/// `open_date` (no signal to do better) and stamp today's +/// date in the note. /// /// Takes `io` so it can print the unmapped-account-number /// enumeration directly to stderr — easier than threading the list @@ -526,6 +697,7 @@ fn synthesizeLots( account_map: analysis.AccountMap, source: Source, today: Date, + prior_lookup: ?PriorLotsLookup, ) ![]portfolio_mod.Lot { const institution = source.label(); @@ -576,16 +748,12 @@ fn synthesizeLots( lots.deinit(allocator); } - // The synthetic note (`"imported YYYY-MM-DD"`) is - // intentionally omitted for now. It changes on every re-import - // (because the date moves), which means held positions show - // up as modified in `git diff` even when nothing actually - // changed at the brokerage. The merge-aware follow-up PR will - // bring the note back together with prior-open_date - // preservation, at which point the note can stamp "last - // refreshed" without polluting the diff for unchanged - // positions. See TODO.md "preserve prior open_date on re-import". - _ = today; // keep the parameter wired for the follow-up PR + // Stamp for newly-introduced lots (no prior match). Held + // positions inherit their prior note instead, so the + // `git diff` between two imports of an unchanged held + // position stays empty. + var fresh_note_buf: [64]u8 = undefined; + const fresh_note = try std.fmt.bufPrint(&fresh_note_buf, "imported {s} {f}", .{ institution, today }); for (positions) |pos| { const acct_name = account_map.findByInstitutionAccount(institution, pos.account_number).?; @@ -602,7 +770,7 @@ fn synthesizeLots( break :blk 0; }; - const open_price: f64 = if (pos.is_cash) + const synthesized_open_price: f64 = if (pos.is_cash) 1.0 else if (pos.cost_basis) |cb| if (shares > 0) cb / shares else 0 @@ -613,20 +781,30 @@ fn synthesizeLots( const security_type: portfolio_mod.LotType = if (pos.is_cash) .cash else .stock; + // Merge the prior lot's open_date / open_price / note if + // we have one. Cash lots are excluded from `prior_lookup` + // (they have no account-meaningful identity) and always + // get the synthesized values. + const prior: ?*const portfolio_mod.Lot = if (prior_lookup) |pl| + (if (security_type == .cash) null else try pl.find(pos.symbol, acct_name)) + else + null; + + const open_date: Date = if (prior) |p| p.open_date else Date.epoch; + const open_price: f64 = if (prior) |p| p.open_price else synthesized_open_price; + const note_text: []const u8 = if (prior) |p| + (if (p.note) |n| n else fresh_note) + else + fresh_note; + try lots.append(allocator, .{ .symbol = try allocator.dupe(u8, pos.symbol), .shares = shares, - // `Date.epoch` (1970-01-01) is the "we don't know" - // sentinel — see the module doc-block. Once the - // merge-aware follow-up PR lands, prior `open_date` - // values get preserved across re-imports and a note - // can capture the refresh date without diff noise. - .open_date = Date.epoch, + .open_date = open_date, .open_price = open_price, .account = try allocator.dupe(u8, acct_name), .security_type = security_type, - // .note: deliberately unset — see the comment block - // above the lots ArrayList init. + .note = try allocator.dupe(u8, note_text), }); } @@ -688,7 +866,7 @@ test "synthesizeLots: stock positions get open_price = cost_basis / quantity" { }; const today = Date.fromYmd(2026, 5, 21); - const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, today); + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, today, null); defer freeLots(allocator, lots); try testing.expectEqual(@as(usize, 1), lots.len); @@ -696,11 +874,14 @@ test "synthesizeLots: stock positions get open_price = cost_basis / quantity" { try testing.expectApproxEqAbs(@as(f64, 100), lots[0].shares, 0.01); try testing.expectApproxEqAbs(@as(f64, 120.0), lots[0].open_price, 0.01); // 12000 / 100 try testing.expectEqualStrings("Mom Brokerage", lots[0].account.?); - // `open_date` is the sentinel (`Date.epoch`, 1970-01-01) — see - // module doc-block. No note for now (see synthesizeLots body). + // No prior portfolio (`prior_lookup = null`) → new-lot path: + // `open_date` is the sentinel and the note carries the + // import date so the user can tell when it was first seen. try testing.expectEqual(Date.epoch.days, lots[0].open_date.days); try testing.expectEqual(portfolio_mod.LotType.stock, lots[0].security_type); - try testing.expect(lots[0].note == null); + try testing.expect(lots[0].note != null); + try testing.expect(std.mem.indexOf(u8, lots[0].note.?, "imported fidelity") != null); + try testing.expect(std.mem.indexOf(u8, lots[0].note.?, "2026-05-21") != null); } test "synthesizeLots: missing cost_basis falls back to current_value" { @@ -724,7 +905,7 @@ test "synthesizeLots: missing cost_basis falls back to current_value" { }; const today = Date.fromYmd(2026, 5, 21); - const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .schwab = "" }, today); + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .schwab = "" }, today, null); defer freeLots(allocator, lots); try testing.expectEqual(@as(usize, 1), lots.len); @@ -752,7 +933,7 @@ test "synthesizeLots: cash positions become security_type=cash with shares=value }; const today = Date.fromYmd(2026, 5, 21); - const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, today); + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, today, null); defer freeLots(allocator, lots); try testing.expectEqual(@as(usize, 1), lots.len); @@ -761,16 +942,15 @@ test "synthesizeLots: cash positions become security_type=cash with shares=value try testing.expectApproxEqAbs(@as(f64, 1.0), lots[0].open_price, 0.01); } -test "synthesizeLots: lots are byte-identical regardless of import date" { - // Pins the diff-cleanliness property: two imports of the same - // brokerage state on different days produce lots whose - // serialized form is byte-identical. Without this, held - // positions show up as modified in `git diff` even when - // nothing changed at the brokerage. The day of the run leaks - // into a lot only via the (currently suppressed) note; - // reintroducing the note in the merge-aware follow-up will - // require an exception here that's gated on (symbol, account) - // newness. +test "synthesizeLots: lots are byte-identical across imports when prior_lookup matches" { + // Pins the diff-cleanliness property of the merge path: + // when the existing portfolio file already carries a lot for + // a `(symbol, account)` pair, a re-import on any future date + // reuses that lot's `open_date` / `open_price` / `note`, + // producing byte-identical serialized output. Without this, + // held positions would show up as modified in `git diff` on + // every import — exactly what the merge layer was added to + // prevent. const allocator = testing.allocator; var account_map = try testAccountMap(allocator, &.{ .{ .account = "Mom Brokerage", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" }, @@ -781,19 +961,43 @@ test "synthesizeLots: lots are byte-identical regardless of import date" { .{ .account_number = "Z123", .account_name = "I", .symbol = "AAPL", .description = "", .quantity = 1, .current_value = 100, .cost_basis = 100, .is_cash = false }, }; - const lots_a = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21)); + // Build a one-lot prior portfolio that matches the position + // by (symbol, account). The merge path should preserve every + // field of this lot (open_date, open_price, note) regardless + // of the import date. + const prior_lots = [_]portfolio_mod.Lot{ + .{ + .symbol = "AAPL", + .shares = 1, + .open_date = Date.fromYmd(2024, 1, 15), + .open_price = 95.0, + .account = "Mom Brokerage", + .security_type = .stock, + .note = "imported fidelity 2024-01-15", + }, + }; + var prior_lookup = try PriorLotsLookup.init(allocator, &prior_lots); + defer prior_lookup.deinit(); + + const lots_a = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), prior_lookup); defer freeLots(allocator, lots_a); - const lots_b = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 9, 14)); + const lots_b = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 9, 14), prior_lookup); defer freeLots(allocator, lots_b); - // open_date is the sentinel on both runs. - try testing.expectEqual(Date.epoch.days, lots_a[0].open_date.days); - try testing.expectEqual(Date.epoch.days, lots_b[0].open_date.days); - // No note → date-of-run can't leak into the diff. - try testing.expect(lots_a[0].note == null); - try testing.expect(lots_b[0].note == null); - // The serialized bytes match — what `git diff` would actually - // see. This is the property we care about for re-imports. + // Prior open_date is preserved — NOT today's date and NOT + // the sentinel. + try testing.expectEqual(Date.fromYmd(2024, 1, 15).days, lots_a[0].open_date.days); + try testing.expectEqual(Date.fromYmd(2024, 1, 15).days, lots_b[0].open_date.days); + // Prior open_price preserved — even though the brokerage + // export shows cost_basis=100 → would-synthesize $100/share. + try testing.expectApproxEqAbs(@as(f64, 95.0), lots_a[0].open_price, 0.01); + try testing.expectApproxEqAbs(@as(f64, 95.0), lots_b[0].open_price, 0.01); + // Prior note preserved (carries the original 2024-01-15 + // import date, not today's). + try testing.expectEqualStrings("imported fidelity 2024-01-15", lots_a[0].note.?); + try testing.expectEqualStrings("imported fidelity 2024-01-15", lots_b[0].note.?); + // The serialized bytes match — what `git diff` would + // actually see. This is the property the merge layer adds. const bytes_a = try cache.serializePortfolio(allocator, lots_a); defer allocator.free(bytes_a); const bytes_b = try cache.serializePortfolio(allocator, lots_b); @@ -828,6 +1032,7 @@ test "synthesizeLots: unmapped account_number fails with UnmappedAccount" { account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), + null, )); } @@ -861,6 +1066,7 @@ test "synthesizeLots: institution mismatch counts as unmapped" { account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), + null, )); } @@ -877,7 +1083,7 @@ test "synthesizeLots: multi-account export fans out per-account" { .{ .account_number = "Z222", .account_name = "Brokerage", .symbol = "VTI", .description = "", .quantity = 100, .current_value = 22000, .cost_basis = 18000, .is_cash = false }, }; - const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21)); + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), null); defer freeLots(allocator, lots); try testing.expectEqual(@as(usize, 2), lots.len); @@ -887,6 +1093,265 @@ test "synthesizeLots: multi-account export fans out per-account" { try testing.expectApproxEqAbs(@as(f64, 180.0), lots[1].open_price, 0.01); // 18000 / 100 } +// ── Merge-aware re-import tests ────────────────────────────── + +test "synthesizeLots: prior lot for (symbol, account) preserves open_date and open_price" { + // Held position case: a (symbol, account) that's in both + // the prior portfolio and the new export inherits the + // prior lot's open_date and open_price. Brokerage shares + // come from the new export (positions might have grown). + const allocator = testing.allocator; + var account_map = try testAccountMap(allocator, &.{ + .{ .account = "Mom Brokerage", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" }, + }); + defer account_map.deinit(); + + const prior_lots = [_]portfolio_mod.Lot{ + .{ + .symbol = "AAPL", + .shares = 100, + .open_date = Date.fromYmd(2024, 6, 1), + .open_price = 90.0, + .account = "Mom Brokerage", + .security_type = .stock, + .note = "imported fidelity 2024-06-01", + }, + }; + var prior = try PriorLotsLookup.init(allocator, &prior_lots); + defer prior.deinit(); + + // Export shows 120 shares now (user bought more) at avg + // cost $100 — but the merge keeps the prior open_price. + const positions = [_]BrokeragePosition{ + .{ .account_number = "Z123", .account_name = "I", .symbol = "AAPL", .description = "", .quantity = 120, .current_value = 18000, .cost_basis = 12000, .is_cash = false }, + }; + + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), prior); + defer freeLots(allocator, lots); + + try testing.expectEqual(@as(usize, 1), lots.len); + // shares from new export + try testing.expectApproxEqAbs(@as(f64, 120), lots[0].shares, 0.01); + // open_date preserved from prior + try testing.expectEqual(Date.fromYmd(2024, 6, 1).days, lots[0].open_date.days); + // open_price preserved from prior (NOT 12000/120 = 100) + try testing.expectApproxEqAbs(@as(f64, 90.0), lots[0].open_price, 0.01); + // note preserved from prior + try testing.expectEqualStrings("imported fidelity 2024-06-01", lots[0].note.?); +} + +test "synthesizeLots: new position with no prior match gets sentinel + today's note" { + // A (symbol, account) that doesn't appear in the prior + // portfolio is treated as a brand-new position. open_date + // is the sentinel (we don't know when the user bought), + // open_price is computed from the export's cost basis, + // note carries today's date. + const allocator = testing.allocator; + var account_map = try testAccountMap(allocator, &.{ + .{ .account = "Mom Brokerage", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" }, + }); + defer account_map.deinit(); + + // Prior has only AAPL; new export has both AAPL and a new + // GOOG. + const prior_lots = [_]portfolio_mod.Lot{ + .{ + .symbol = "AAPL", + .shares = 100, + .open_date = Date.fromYmd(2024, 6, 1), + .open_price = 90.0, + .account = "Mom Brokerage", + .security_type = .stock, + }, + }; + var prior = try PriorLotsLookup.init(allocator, &prior_lots); + defer prior.deinit(); + + const positions = [_]BrokeragePosition{ + .{ .account_number = "Z123", .account_name = "I", .symbol = "AAPL", .description = "", .quantity = 100, .current_value = 15000, .cost_basis = 9000, .is_cash = false }, + .{ .account_number = "Z123", .account_name = "I", .symbol = "GOOG", .description = "", .quantity = 25, .current_value = 4000, .cost_basis = 3500, .is_cash = false }, + }; + + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), prior); + defer freeLots(allocator, lots); + + try testing.expectEqual(@as(usize, 2), lots.len); + + // AAPL: prior path + try testing.expectEqualStrings("AAPL", lots[0].symbol); + try testing.expectEqual(Date.fromYmd(2024, 6, 1).days, lots[0].open_date.days); + + // GOOG: new-lot path. Sentinel open_date, computed + // open_price ($3500 / 25 = $140), today-stamped note. + try testing.expectEqualStrings("GOOG", lots[1].symbol); + try testing.expectEqual(Date.epoch.days, lots[1].open_date.days); + try testing.expectApproxEqAbs(@as(f64, 140.0), lots[1].open_price, 0.01); + try testing.expect(std.mem.indexOf(u8, lots[1].note.?, "2026-05-21") != null); +} + +test "synthesizeLots: when prior has multiple lots for same (symbol, account), earliest open_date wins" { + // Hand-edited or legacy file might carry multiple lots + // for the same (symbol, account). The merge should anchor + // on the EARLIEST open_date — that's the longest-standing + // buy and the right basis for trailing-return math. + const allocator = testing.allocator; + var account_map = try testAccountMap(allocator, &.{ + .{ .account = "Mom Brokerage", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" }, + }); + defer account_map.deinit(); + + const prior_lots = [_]portfolio_mod.Lot{ + .{ + .symbol = "AAPL", + .shares = 50, + .open_date = Date.fromYmd(2025, 8, 15), + .open_price = 220.0, + .account = "Mom Brokerage", + .security_type = .stock, + }, + .{ + .symbol = "AAPL", + .shares = 50, + .open_date = Date.fromYmd(2022, 3, 10), // earlier — should win + .open_price = 150.0, + .account = "Mom Brokerage", + .security_type = .stock, + }, + .{ + .symbol = "AAPL", + .shares = 50, + .open_date = Date.fromYmd(2024, 1, 1), + .open_price = 180.0, + .account = "Mom Brokerage", + .security_type = .stock, + }, + }; + var prior = try PriorLotsLookup.init(allocator, &prior_lots); + defer prior.deinit(); + + const positions = [_]BrokeragePosition{ + .{ .account_number = "Z123", .account_name = "I", .symbol = "AAPL", .description = "", .quantity = 150, .current_value = 30000, .cost_basis = 27500, .is_cash = false }, + }; + + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), prior); + defer freeLots(allocator, lots); + + try testing.expectEqual(@as(usize, 1), lots.len); + // Earliest open_date (2022-03-10) wins, and so does its + // open_price ($150). + try testing.expectEqual(Date.fromYmd(2022, 3, 10).days, lots[0].open_date.days); + try testing.expectApproxEqAbs(@as(f64, 150.0), lots[0].open_price, 0.01); +} + +test "synthesizeLots: prior closed lot does NOT anchor a held position" { + // If a prior lot has `close_date` set, treat it as gone + // — don't let it leak into the merge anchor. The new + // export shows the position is back; we treat it as a + // new lot. + const allocator = testing.allocator; + var account_map = try testAccountMap(allocator, &.{ + .{ .account = "Mom Brokerage", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" }, + }); + defer account_map.deinit(); + + const prior_lots = [_]portfolio_mod.Lot{ + .{ + .symbol = "AAPL", + .shares = 100, + .open_date = Date.fromYmd(2024, 1, 1), + .open_price = 90.0, + .close_date = Date.fromYmd(2025, 6, 1), + .close_price = 200.0, + .account = "Mom Brokerage", + .security_type = .stock, + }, + }; + var prior = try PriorLotsLookup.init(allocator, &prior_lots); + defer prior.deinit(); + + const positions = [_]BrokeragePosition{ + .{ .account_number = "Z123", .account_name = "I", .symbol = "AAPL", .description = "", .quantity = 50, .current_value = 7500, .cost_basis = 6000, .is_cash = false }, + }; + + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), prior); + defer freeLots(allocator, lots); + + // Closed prior lot was skipped; new lot got the new-lot + // path (sentinel open_date, computed open_price). + try testing.expectEqualStrings("AAPL", lots[0].symbol); + try testing.expectEqual(Date.epoch.days, lots[0].open_date.days); + try testing.expectApproxEqAbs(@as(f64, 120.0), lots[0].open_price, 0.01); // 6000 / 50 +} + +test "synthesizeLots: positions dropped from new export are excluded (closed-lot drop)" { + // The merge isn't reciprocal: lots in the prior file + // that don't appear in the new export are silently + // dropped. This is the "import is a replace, not a + // merge of transactions" design. Documented as a + // limitation in the module doc-block. + const allocator = testing.allocator; + var account_map = try testAccountMap(allocator, &.{ + .{ .account = "Mom Brokerage", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" }, + }); + defer account_map.deinit(); + + const prior_lots = [_]portfolio_mod.Lot{ + .{ + .symbol = "AAPL", + .shares = 100, + .open_date = Date.fromYmd(2024, 1, 1), + .open_price = 90.0, + .account = "Mom Brokerage", + .security_type = .stock, + }, + .{ + .symbol = "GOOG", + .shares = 50, + .open_date = Date.fromYmd(2024, 1, 1), + .open_price = 100.0, + .account = "Mom Brokerage", + .security_type = .stock, + }, + }; + var prior = try PriorLotsLookup.init(allocator, &prior_lots); + defer prior.deinit(); + + // Export only shows AAPL (user sold all GOOG). + const positions = [_]BrokeragePosition{ + .{ .account_number = "Z123", .account_name = "I", .symbol = "AAPL", .description = "", .quantity = 100, .current_value = 15000, .cost_basis = 9000, .is_cash = false }, + }; + + const lots = try synthesizeLots(testing.io, allocator, &positions, account_map, .{ .fidelity = "" }, Date.fromYmd(2026, 5, 21), prior); + defer freeLots(allocator, lots); + + // Only the AAPL lot survives; GOOG is silently dropped. + try testing.expectEqual(@as(usize, 1), lots.len); + try testing.expectEqualStrings("AAPL", lots[0].symbol); +} + +test "PriorLotsLookup: cash lots are excluded from the lookup" { + // Cash lots have synthetic symbols (often "CASH" or a + // money-market ticker) and aren't matched across imports + // — the brokerage's cash balance is the source of truth + // every time. Pin that they don't enter the lookup so we + // don't accidentally inherit a stale cash open_price/note. + const allocator = testing.allocator; + const prior_lots = [_]portfolio_mod.Lot{ + .{ + .symbol = "FZFXX", + .shares = 1000, + .open_date = Date.fromYmd(2024, 1, 1), + .open_price = 1.0, + .account = "Mom Brokerage", + .security_type = .cash, + }, + }; + var prior = try PriorLotsLookup.init(allocator, &prior_lots); + defer prior.deinit(); + + try testing.expect((try prior.find("FZFXX", "Mom Brokerage")) == null); +} + test "parseArgs: --fidelity captures path" { var ctx: framework.RunCtx = undefined; ctx.io = testing.io;