wells fargo parser fixes

This commit is contained in:
Emil Lerch 2026-05-23 11:37:52 -07:00
parent 5e50c4eb6f
commit 8be73c222e
Signed by: lobo
GPG key ID: A7B62D657EF764F8

View file

@ -100,14 +100,25 @@ pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePo
try staged.append(allocator, trimmed);
}
// Locate each record by scanning for `<SYMBOL> , popup`. WF
// emits this exact suffix as part of the symbol column's
// hover-popup affordance; it's a very stable record anchor.
// Locate each record by scanning for a popup anchor. WF
// emits a `, popup` (or `,popup`) suffix as part of the
// symbol column's hover affordance; it's a very stable
// record anchor.
//
// We do NOT stop at intermediate totals lines (`Stocks Total`,
// `ETFs Total`). The WF holdings page splits positions into
// multiple sections (Stocks, ETFs, Bonds, ), each terminated
// by its own totals line; the second/third section's records
// appear AFTER an intermediate totals line and we want to
// capture them. The only structural boundary between
// positions and the (optional) cash table is the
// `Cash, Cash Alternatives and Margin` header, which we
// detect explicitly below.
const cash_header = "Cash, Cash Alternatives and Margin";
var i: usize = 0;
while (i < staged.items.len) : (i += 1) {
const line = staged.items[i];
// Stop on totals footer (`ETFs Total`, `Total`, etc).
if (isTotalLine(line)) break;
if (std.mem.eql(u8, line, cash_header)) break;
if (!isPopupAnchor(line)) continue;
const symbol = popupSymbol(line) orelse continue;
@ -169,24 +180,104 @@ pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePo
});
}
// Cash section (optional)
//
// The user may include the WF "Cash, Cash Alternatives and
// Margin" table at the end of the paste. The shape is:
//
// Cash, Cash Alternatives and Margin
// Cash alternatives and margin table has been sorted ...
// <blank>
// <Account name> e.g. "Sample IRA *1234"
// $14,216.88 per-account cash balance
// <blank>
// Cash Total
// $14,216.88 grand total (skip)
//
// We capture the per-account balance as a synthetic cash
// position with empty `symbol`. The downstream
// `applyAccountToPositions` stamps the account fields, and
// `synthesizeLots` emits a `security_type::cash` lot.
//
// Multiple per-account rows are tolerated (one cash position
// per row); in practice the WF paste flow is single-account so
// there's typically one. Stops at "Cash Total".
{
// After the positions loop, `i` is either at the cash
// header (if the user included the cash table) or at the
// end of the staged array (no cash table). Walk forward
// looking for the header in case it appears after extra
// intermediate lines, then parse account/amount pairs.
var j: usize = i;
while (j < staged.items.len and !std.mem.eql(u8, staged.items[j], cash_header)) : (j += 1) {}
if (j < staged.items.len) {
// Skip the header + the explanatory subtitle line.
j += 1; // past "Cash, Cash Alternatives and Margin"
if (j < staged.items.len and std.mem.startsWith(u8, staged.items[j], "Cash alternatives")) {
j += 1;
}
// Walk rows. Each row is an account name (anything
// non-blank, non-Total) followed by a `$AMOUNT` line.
// Stop at "Cash Total" (the grand-total marker).
while (j < staged.items.len) : (j += 1) {
const row = staged.items[j];
if (row.len == 0) continue;
if (std.mem.eql(u8, row, "Cash Total")) break;
// Account-name row. Look ahead for the dollar amount.
var k: usize = j + 1;
while (k < staged.items.len and staged.items[k].len == 0) : (k += 1) {}
if (k >= staged.items.len) break;
const amount_text = staged.items[k];
// The dollar amount must start with `$` to count as
// a cash balance. Any other shape (e.g. "Cash Total"
// appearing here would mean a malformed paste) skip.
if (amount_text.len == 0 or amount_text[0] != '$') continue;
const cash_amount = parseDollarAmount(amount_text) orelse continue;
try positions.append(allocator, .{
.account_number = "",
.account_name = "",
.symbol = "",
.description = "Cash",
.quantity = null,
.current_value = cash_amount,
.cost_basis = null,
.is_cash = true,
});
j = k; // resume past the consumed amount line
}
}
}
return positions.toOwnedSlice(allocator);
}
/// True when `line` is a record-start anchor like `GSLC , popup`.
/// The trailing `, popup` is the stable signal WF's hover
/// affordance. Whitespace between the symbol and the comma is
/// tolerated (WF emits exactly one space, but trim defends
/// against future-proofing).
/// True when `line` is a record-start anchor like `GSLC , popup`
/// or `XOM,popup`. The trailing `popup` is the stable signal WF's
/// hover affordance and the comma immediately precedes it (with
/// optional whitespace either side, which varies between paste
/// shapes for stocks vs ETFs).
fn isPopupAnchor(line: []const u8) bool {
return std.mem.endsWith(u8, line, ", popup");
if (!std.mem.endsWith(u8, line, "popup")) return false;
// The portion before `popup` must end with a comma (with
// optional whitespace). This rejects e.g. "popup" alone or
// "something popup" without a separator.
const before = line[0 .. line.len - "popup".len];
const trimmed = std.mem.trimEnd(u8, before, &.{ ' ', '\t' });
return std.mem.endsWith(u8, trimmed, ",");
}
/// Extract the symbol from a popup anchor line. Returns null if
/// the line is the right shape but the symbol part is empty.
/// Accepts both `SYMBOL,popup` and `SYMBOL , popup` shapes.
fn popupSymbol(line: []const u8) ?[]const u8 {
if (!isPopupAnchor(line)) return null;
const before_comma = line[0 .. line.len - ", popup".len];
const symbol = std.mem.trim(u8, before_comma, &.{ ' ', '\t' });
// Strip trailing `popup`, surrounding whitespace, and the
// separator comma to get the symbol token.
var end = line.len - "popup".len;
while (end > 0 and (line[end - 1] == ' ' or line[end - 1] == '\t')) end -= 1;
if (end == 0 or line[end - 1] != ',') return null;
end -= 1; // drop the comma
const symbol = std.mem.trim(u8, line[0..end], &.{ ' ', '\t' });
if (symbol.len == 0) return null;
return symbol;
}
@ -670,13 +761,14 @@ test "parsePaste: input with only header preamble (no records) yields zero" {
try testing.expectEqual(@as(usize, 0), positions.len);
}
test "parsePaste: stops at ETFs Total footer (doesn't parse past it)" {
test "parsePaste: parses across intermediate totals (Stocks Total + ETFs Total)" {
const allocator = testing.allocator;
// Two records, then a footer, then more positions that
// SHOULD NOT be parsed (they belong to a phantom second
// table that the user shouldn't have included). The
// totals-line stop ensures we don't accidentally double-
// count.
// The WF holdings page splits positions into multiple
// sections (Stocks, ETFs, Bonds, ), each terminated by its
// own `<Section> Total` footer. The parser must keep going
// past intermediate totals to capture records in subsequent
// sections. (Real-world example: 6135.txt-style export with
// 43 stocks then 13 ETFs separated by `Stocks Total`.)
const data =
"GSLC , popup\n" ++
"GOLDMAN ACTIVEBETA ETF\n" ++
@ -695,23 +787,36 @@ test "parsePaste: stops at ETFs Total footer (doesn't parse past it)" {
"\t\n" ++
"$1,203.17\n" ++
"\t\n" ++
"ETFs Total\n" ++
"Stocks Total\n" ++
"$127,655.40\n" ++
"GHOST , popup\n" ++ // should NOT be parsed
"SHOULD NOT APPEAR\n" ++
"ETFs\n" ++
"ETF table has been sorted ...\n" ++
"\t\n" ++
"DBP , popup\n" ++ // SHOULD be parsed (next section)
"INVESCO PRECIOUS METALS ETF\n" ++
"\tMultiple(1) \t\n" ++
"1\n" ++
"@ $1.00\n" ++
"10\n" ++
"@ $50.00\n" ++
"\t\n" ++
"$1.00\n" ++
"+$0.00\n" ++
"$55.00\n" ++
"+$0.10\n" ++
"\t\n" ++
"$1.00\n";
"$550.00\n" ++
"+$1.00 (+0.18%)\n" ++
"\t\n" ++
"+$50.00\n" ++
"+10.00%\n" ++
"\t\n" ++
"$5.00\n" ++
"\t\n" ++
"ETFs Total\n" ++
"$550.00\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 1), positions.len);
try testing.expectEqual(@as(usize, 2), positions.len);
try testing.expectEqualStrings("GSLC", positions[0].symbol);
try testing.expectEqualStrings("DBP", positions[1].symbol);
}
test "parsePaste: money-market symbol gets is_cash=true" {
@ -746,6 +851,156 @@ test "parsePaste: money-market symbol gets is_cash=true" {
try testing.expect(positions[0].is_cash);
}
test "parsePaste: accepts both `SYMBOL,popup` and `SYMBOL , popup` anchors" {
// Wells Fargo emits two slightly different anchor shapes
// depending on what part of the holdings table the user
// copied stocks tend to come out as `SYMBOL,popup` (no
// spaces) while ETFs come out as `SYMBOL , popup` (with
// spaces). Single-paste files routinely mix both forms, so
// the parser must accept either.
const allocator = testing.allocator;
const data =
"XOM,popup\n" ++ // no-space form (stock)
"EXXON MOBIL CORP\n" ++
"\tMultiple(4) \t\n" ++
"50\n" ++
"@ $129.66\n" ++
"\t\n" ++
"$154.92\n" ++
"-$0.37\n" ++
"\t\n" ++
"$7,746.00\n" ++
"-$18.50 (-0.24%)\n" ++
"\t\n" ++
"+$1,262.85\n" ++
"+19.48%\n" ++
"\t\n" ++
"$206.00\n" ++
"\t\n" ++
"GSLC , popup\n" ++ // with-space form (ETF)
"GOLDMAN ACTIVEBETA ETF\n" ++
"\tMultiple(3) \t\n" ++
"906\n" ++
"@ $129.97\n" ++
"\t\n" ++
"$140.90\n" ++
"+$0.31\n" ++
"\t\n" ++
"$127,655.40\n" ++
"+$280.86 (+0.22%)\n" ++
"\t\n" ++
"+$9,906.42\n" ++
"+8.41%\n" ++
"\t\n" ++
"$1,203.17\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 2), positions.len);
try testing.expectEqualStrings("XOM", positions[0].symbol);
try testing.expectEqualStrings("GSLC", positions[1].symbol);
}
test "parsePaste: trailing cash section emits a cash position" {
// After the positions table, WF pastes may include a
// "Cash, Cash Alternatives and Margin" section listing the
// account's cash balance. The parser captures that as a
// synthetic cash position; the downstream resolver stamps
// the account fields and `synthesizeLots` emits a
// `security_type::cash` lot.
const allocator = testing.allocator;
const data =
"XOM,popup\n" ++
"EXXON MOBIL CORP\n" ++
"\tMultiple(4) \t\n" ++
"50\n" ++
"@ $129.66\n" ++
"\t\n" ++
"$154.92\n" ++
"-$0.37\n" ++
"\t\n" ++
"$7,746.00\n" ++
"-$18.50 (-0.24%)\n" ++
"\t\n" ++
"+$1,262.85\n" ++
"+19.48%\n" ++
"\t\n" ++
"$206.00\n" ++
"\t\n" ++
"ETFs Total\n" ++
"$7,746.00\n" ++
"Cash, Cash Alternatives and Margin\n" ++
"Cash alternatives and margin table has been sorted ...\n" ++
"\t\n" ++
"Sample Roth IRA *1234\n" ++
"\t$14,216.88\n" ++
"\t\n" ++
"Cash Total\n" ++
"\t\n" ++
"$14,216.88 \n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 2), positions.len);
try testing.expectEqualStrings("XOM", positions[0].symbol);
try testing.expect(!positions[0].is_cash);
// Cash position
try testing.expectEqualStrings("", positions[1].symbol);
try testing.expect(positions[1].is_cash);
try testing.expectApproxEqAbs(@as(f64, 14216.88), positions[1].current_value.?, 0.01);
try testing.expect(positions[1].quantity == null);
try testing.expect(positions[1].cost_basis == null);
}
test "parsePaste: cash section absent is a no-op" {
// When the user only pastes the positions table (no cash
// section), parsePaste returns just the positions. Regression
// for the original 3522.txt-style paste shape.
const allocator = testing.allocator;
const data =
"GSLC , popup\n" ++
"GOLDMAN ACTIVEBETA ETF\n" ++
"\tMultiple(3) \t\n" ++
"906\n" ++
"@ $129.97\n" ++
"\t\n" ++
"$140.90\n" ++
"+$0.31\n" ++
"\t\n" ++
"$127,655.40\n" ++
"+$280.86 (+0.22%)\n" ++
"\t\n" ++
"+$9,906.42\n" ++
"+8.41%\n" ++
"\t\n" ++
"$1,203.17\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 1), positions.len);
try testing.expect(!positions[0].is_cash);
}
test "isPopupAnchor: accepts both compact and spaced forms" {
try testing.expect(isPopupAnchor("XOM,popup"));
try testing.expect(isPopupAnchor("XOM ,popup"));
try testing.expect(isPopupAnchor("XOM, popup"));
try testing.expect(isPopupAnchor("XOM , popup"));
try testing.expect(isPopupAnchor("BRK'B,popup"));
// Negative cases.
try testing.expect(!isPopupAnchor("XOM popup"));
try testing.expect(!isPopupAnchor("popup"));
try testing.expect(!isPopupAnchor(""));
}
test "popupSymbol: extracts symbol from compact form" {
try testing.expectEqualStrings("XOM", popupSymbol("XOM,popup").?);
try testing.expectEqualStrings("BRK'B", popupSymbol("BRK'B,popup").?);
try testing.expectEqualStrings("XOM", popupSymbol("XOM, popup").?);
// Empty symbol part null.
try testing.expect(popupSymbol(",popup") == null);
}
// Resolver tests
/// Test helper: build an `AccountMap` from compile-time entries.