From 8be73c222e2e53040030cbca2ce8f84b49cb2c13 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Sat, 23 May 2026 11:37:52 -0700 Subject: [PATCH] wells fargo parser fixes --- src/brokerage/wells_fargo.zig | 311 +++++++++++++++++++++++++++++++--- 1 file changed, 283 insertions(+), 28 deletions(-) diff --git a/src/brokerage/wells_fargo.zig b/src/brokerage/wells_fargo.zig index 626b2eb..d0a2a7b 100644 --- a/src/brokerage/wells_fargo.zig +++ b/src/brokerage/wells_fargo.zig @@ -100,14 +100,25 @@ pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePo try staged.append(allocator, trimmed); } - // Locate each record by scanning for ` , popup`. WF - // emits this exact suffix as part of the symbol column's - // hover-popup affordance; it's a very stable record anchor. + // Locate each record by scanning for a popup anchor. WF + // emits a `, popup` (or `,popup`) suffix as part of the + // symbol column's hover affordance; it's a very stable + // record anchor. + // + // We do NOT stop at intermediate totals lines (`Stocks Total`, + // `ETFs Total`). The WF holdings page splits positions into + // multiple sections (Stocks, ETFs, Bonds, …), each terminated + // by its own totals line; the second/third section's records + // appear AFTER an intermediate totals line and we want to + // capture them. The only structural boundary between + // positions and the (optional) cash table is the + // `Cash, Cash Alternatives and Margin` header, which we + // detect explicitly below. + const cash_header = "Cash, Cash Alternatives and Margin"; var i: usize = 0; while (i < staged.items.len) : (i += 1) { const line = staged.items[i]; - // Stop on totals footer (`ETFs Total`, `Total`, etc). - if (isTotalLine(line)) break; + if (std.mem.eql(u8, line, cash_header)) break; if (!isPopupAnchor(line)) continue; const symbol = popupSymbol(line) orelse continue; @@ -169,24 +180,104 @@ pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePo }); } + // ── Cash section (optional) ───────────────────────────── + // + // The user may include the WF "Cash, Cash Alternatives and + // Margin" table at the end of the paste. The shape is: + // + // Cash, Cash Alternatives and Margin + // Cash alternatives and margin table has been sorted ... + // + // ← e.g. "Sample IRA *1234" + // $14,216.88 ← per-account cash balance + // + // Cash Total + // $14,216.88 ← grand total (skip) + // + // We capture the per-account balance as a synthetic cash + // position with empty `symbol`. The downstream + // `applyAccountToPositions` stamps the account fields, and + // `synthesizeLots` emits a `security_type::cash` lot. + // + // Multiple per-account rows are tolerated (one cash position + // per row); in practice the WF paste flow is single-account so + // there's typically one. Stops at "Cash Total". + { + // After the positions loop, `i` is either at the cash + // header (if the user included the cash table) or at the + // end of the staged array (no cash table). Walk forward + // looking for the header in case it appears after extra + // intermediate lines, then parse account/amount pairs. + var j: usize = i; + while (j < staged.items.len and !std.mem.eql(u8, staged.items[j], cash_header)) : (j += 1) {} + if (j < staged.items.len) { + // Skip the header + the explanatory subtitle line. + j += 1; // past "Cash, Cash Alternatives and Margin" + if (j < staged.items.len and std.mem.startsWith(u8, staged.items[j], "Cash alternatives")) { + j += 1; + } + // Walk rows. Each row is an account name (anything + // non-blank, non-Total) followed by a `$AMOUNT` line. + // Stop at "Cash Total" (the grand-total marker). + while (j < staged.items.len) : (j += 1) { + const row = staged.items[j]; + if (row.len == 0) continue; + if (std.mem.eql(u8, row, "Cash Total")) break; + // Account-name row. Look ahead for the dollar amount. + var k: usize = j + 1; + while (k < staged.items.len and staged.items[k].len == 0) : (k += 1) {} + if (k >= staged.items.len) break; + const amount_text = staged.items[k]; + // The dollar amount must start with `$` to count as + // a cash balance. Any other shape (e.g. "Cash Total" + // appearing here would mean a malformed paste) → skip. + if (amount_text.len == 0 or amount_text[0] != '$') continue; + const cash_amount = parseDollarAmount(amount_text) orelse continue; + try positions.append(allocator, .{ + .account_number = "", + .account_name = "", + .symbol = "", + .description = "Cash", + .quantity = null, + .current_value = cash_amount, + .cost_basis = null, + .is_cash = true, + }); + j = k; // resume past the consumed amount line + } + } + } + return positions.toOwnedSlice(allocator); } -/// True when `line` is a record-start anchor like `GSLC , popup`. -/// The trailing `, popup` is the stable signal — WF's hover -/// affordance. Whitespace between the symbol and the comma is -/// tolerated (WF emits exactly one space, but trim defends -/// against future-proofing). +/// True when `line` is a record-start anchor like `GSLC , popup` +/// or `XOM,popup`. The trailing `popup` is the stable signal — WF's +/// hover affordance — and the comma immediately precedes it (with +/// optional whitespace either side, which varies between paste +/// shapes for stocks vs ETFs). fn isPopupAnchor(line: []const u8) bool { - return std.mem.endsWith(u8, line, ", popup"); + if (!std.mem.endsWith(u8, line, "popup")) return false; + // The portion before `popup` must end with a comma (with + // optional whitespace). This rejects e.g. "popup" alone or + // "something popup" without a separator. + const before = line[0 .. line.len - "popup".len]; + const trimmed = std.mem.trimEnd(u8, before, &.{ ' ', '\t' }); + return std.mem.endsWith(u8, trimmed, ","); } /// Extract the symbol from a popup anchor line. Returns null if /// the line is the right shape but the symbol part is empty. +/// Accepts both `SYMBOL,popup` and `SYMBOL , popup` shapes. fn popupSymbol(line: []const u8) ?[]const u8 { if (!isPopupAnchor(line)) return null; - const before_comma = line[0 .. line.len - ", popup".len]; - const symbol = std.mem.trim(u8, before_comma, &.{ ' ', '\t' }); + // Strip trailing `popup`, surrounding whitespace, and the + // separator comma to get the symbol token. + var end = line.len - "popup".len; + while (end > 0 and (line[end - 1] == ' ' or line[end - 1] == '\t')) end -= 1; + if (end == 0 or line[end - 1] != ',') return null; + end -= 1; // drop the comma + const symbol = std.mem.trim(u8, line[0..end], &.{ ' ', '\t' }); if (symbol.len == 0) return null; return symbol; } @@ -670,13 +761,14 @@ test "parsePaste: input with only header preamble (no records) yields zero" { try testing.expectEqual(@as(usize, 0), positions.len); } -test "parsePaste: stops at ETFs Total footer (doesn't parse past it)" { +test "parsePaste: parses across intermediate totals (Stocks Total + ETFs Total)" { const allocator = testing.allocator; - // Two records, then a footer, then more positions that - // SHOULD NOT be parsed (they belong to a phantom second - // table that the user shouldn't have included). The - // totals-line stop ensures we don't accidentally double- - // count. + // The WF holdings page splits positions into multiple + // sections (Stocks, ETFs, Bonds, …), each terminated by its + // own `
Total` footer. The parser must keep going + // past intermediate totals to capture records in subsequent + // sections. (Real-world example: 6135.txt-style export with + // 43 stocks then 13 ETFs separated by `Stocks Total`.) const data = "GSLC , popup\n" ++ "GOLDMAN ACTIVEBETA ETF\n" ++ @@ -695,23 +787,36 @@ test "parsePaste: stops at ETFs Total footer (doesn't parse past it)" { "\t\n" ++ "$1,203.17\n" ++ "\t\n" ++ - "ETFs Total\n" ++ + "Stocks Total\n" ++ "$127,655.40\n" ++ - "GHOST , popup\n" ++ // should NOT be parsed - "SHOULD NOT APPEAR\n" ++ + "ETFs\n" ++ + "ETF table has been sorted ...\n" ++ + "\t\n" ++ + "DBP , popup\n" ++ // SHOULD be parsed (next section) + "INVESCO PRECIOUS METALS ETF\n" ++ "\tMultiple(1) \t\n" ++ - "1\n" ++ - "@ $1.00\n" ++ + "10\n" ++ + "@ $50.00\n" ++ "\t\n" ++ - "$1.00\n" ++ - "+$0.00\n" ++ + "$55.00\n" ++ + "+$0.10\n" ++ "\t\n" ++ - "$1.00\n"; + "$550.00\n" ++ + "+$1.00 (+0.18%)\n" ++ + "\t\n" ++ + "+$50.00\n" ++ + "+10.00%\n" ++ + "\t\n" ++ + "$5.00\n" ++ + "\t\n" ++ + "ETFs Total\n" ++ + "$550.00\n"; const positions = try parsePaste(allocator, data); defer allocator.free(positions); - try testing.expectEqual(@as(usize, 1), positions.len); + try testing.expectEqual(@as(usize, 2), positions.len); try testing.expectEqualStrings("GSLC", positions[0].symbol); + try testing.expectEqualStrings("DBP", positions[1].symbol); } test "parsePaste: money-market symbol gets is_cash=true" { @@ -746,6 +851,156 @@ test "parsePaste: money-market symbol gets is_cash=true" { try testing.expect(positions[0].is_cash); } +test "parsePaste: accepts both `SYMBOL,popup` and `SYMBOL , popup` anchors" { + // Wells Fargo emits two slightly different anchor shapes + // depending on what part of the holdings table the user + // copied — stocks tend to come out as `SYMBOL,popup` (no + // spaces) while ETFs come out as `SYMBOL , popup` (with + // spaces). Single-paste files routinely mix both forms, so + // the parser must accept either. + const allocator = testing.allocator; + const data = + "XOM,popup\n" ++ // no-space form (stock) + "EXXON MOBIL CORP\n" ++ + "\tMultiple(4) \t\n" ++ + "50\n" ++ + "@ $129.66\n" ++ + "\t\n" ++ + "$154.92\n" ++ + "-$0.37\n" ++ + "\t\n" ++ + "$7,746.00\n" ++ + "-$18.50 (-0.24%)\n" ++ + "\t\n" ++ + "+$1,262.85\n" ++ + "+19.48%\n" ++ + "\t\n" ++ + "$206.00\n" ++ + "\t\n" ++ + "GSLC , popup\n" ++ // with-space form (ETF) + "GOLDMAN ACTIVEBETA ETF\n" ++ + "\tMultiple(3) \t\n" ++ + "906\n" ++ + "@ $129.97\n" ++ + "\t\n" ++ + "$140.90\n" ++ + "+$0.31\n" ++ + "\t\n" ++ + "$127,655.40\n" ++ + "+$280.86 (+0.22%)\n" ++ + "\t\n" ++ + "+$9,906.42\n" ++ + "+8.41%\n" ++ + "\t\n" ++ + "$1,203.17\n"; + + const positions = try parsePaste(allocator, data); + defer allocator.free(positions); + try testing.expectEqual(@as(usize, 2), positions.len); + try testing.expectEqualStrings("XOM", positions[0].symbol); + try testing.expectEqualStrings("GSLC", positions[1].symbol); +} + +test "parsePaste: trailing cash section emits a cash position" { + // After the positions table, WF pastes may include a + // "Cash, Cash Alternatives and Margin" section listing the + // account's cash balance. The parser captures that as a + // synthetic cash position; the downstream resolver stamps + // the account fields and `synthesizeLots` emits a + // `security_type::cash` lot. + const allocator = testing.allocator; + const data = + "XOM,popup\n" ++ + "EXXON MOBIL CORP\n" ++ + "\tMultiple(4) \t\n" ++ + "50\n" ++ + "@ $129.66\n" ++ + "\t\n" ++ + "$154.92\n" ++ + "-$0.37\n" ++ + "\t\n" ++ + "$7,746.00\n" ++ + "-$18.50 (-0.24%)\n" ++ + "\t\n" ++ + "+$1,262.85\n" ++ + "+19.48%\n" ++ + "\t\n" ++ + "$206.00\n" ++ + "\t\n" ++ + "ETFs Total\n" ++ + "$7,746.00\n" ++ + "Cash, Cash Alternatives and Margin\n" ++ + "Cash alternatives and margin table has been sorted ...\n" ++ + "\t\n" ++ + "Sample Roth IRA *1234\n" ++ + "\t$14,216.88\n" ++ + "\t\n" ++ + "Cash Total\n" ++ + "\t\n" ++ + "$14,216.88 \n"; + + const positions = try parsePaste(allocator, data); + defer allocator.free(positions); + try testing.expectEqual(@as(usize, 2), positions.len); + try testing.expectEqualStrings("XOM", positions[0].symbol); + try testing.expect(!positions[0].is_cash); + // Cash position + try testing.expectEqualStrings("", positions[1].symbol); + try testing.expect(positions[1].is_cash); + try testing.expectApproxEqAbs(@as(f64, 14216.88), positions[1].current_value.?, 0.01); + try testing.expect(positions[1].quantity == null); + try testing.expect(positions[1].cost_basis == null); +} + +test "parsePaste: cash section absent is a no-op" { + // When the user only pastes the positions table (no cash + // section), parsePaste returns just the positions. Regression + // for the original 3522.txt-style paste shape. + const allocator = testing.allocator; + const data = + "GSLC , popup\n" ++ + "GOLDMAN ACTIVEBETA ETF\n" ++ + "\tMultiple(3) \t\n" ++ + "906\n" ++ + "@ $129.97\n" ++ + "\t\n" ++ + "$140.90\n" ++ + "+$0.31\n" ++ + "\t\n" ++ + "$127,655.40\n" ++ + "+$280.86 (+0.22%)\n" ++ + "\t\n" ++ + "+$9,906.42\n" ++ + "+8.41%\n" ++ + "\t\n" ++ + "$1,203.17\n"; + + const positions = try parsePaste(allocator, data); + defer allocator.free(positions); + try testing.expectEqual(@as(usize, 1), positions.len); + try testing.expect(!positions[0].is_cash); +} + +test "isPopupAnchor: accepts both compact and spaced forms" { + try testing.expect(isPopupAnchor("XOM,popup")); + try testing.expect(isPopupAnchor("XOM ,popup")); + try testing.expect(isPopupAnchor("XOM, popup")); + try testing.expect(isPopupAnchor("XOM , popup")); + try testing.expect(isPopupAnchor("BRK'B,popup")); + // Negative cases. + try testing.expect(!isPopupAnchor("XOM popup")); + try testing.expect(!isPopupAnchor("popup")); + try testing.expect(!isPopupAnchor("")); +} + +test "popupSymbol: extracts symbol from compact form" { + try testing.expectEqualStrings("XOM", popupSymbol("XOM,popup").?); + try testing.expectEqualStrings("BRK'B", popupSymbol("BRK'B,popup").?); + try testing.expectEqualStrings("XOM", popupSymbol("XOM, popup").?); + // Empty symbol part → null. + try testing.expect(popupSymbol(",popup") == null); +} + // ── Resolver tests ─────────────────────────────────────────── /// Test helper: build an `AccountMap` from compile-time entries.