wells fargo parser fixes
This commit is contained in:
parent
5e50c4eb6f
commit
8be73c222e
1 changed files with 283 additions and 28 deletions
|
|
@ -100,14 +100,25 @@ pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePo
|
|||
try staged.append(allocator, trimmed);
|
||||
}
|
||||
|
||||
// Locate each record by scanning for `<SYMBOL> , popup`. WF
|
||||
// emits this exact suffix as part of the symbol column's
|
||||
// hover-popup affordance; it's a very stable record anchor.
|
||||
// Locate each record by scanning for a popup anchor. WF
|
||||
// emits a `, popup` (or `,popup`) suffix as part of the
|
||||
// symbol column's hover affordance; it's a very stable
|
||||
// record anchor.
|
||||
//
|
||||
// We do NOT stop at intermediate totals lines (`Stocks Total`,
|
||||
// `ETFs Total`). The WF holdings page splits positions into
|
||||
// multiple sections (Stocks, ETFs, Bonds, …), each terminated
|
||||
// by its own totals line; the second/third section's records
|
||||
// appear AFTER an intermediate totals line and we want to
|
||||
// capture them. The only structural boundary between
|
||||
// positions and the (optional) cash table is the
|
||||
// `Cash, Cash Alternatives and Margin` header, which we
|
||||
// detect explicitly below.
|
||||
const cash_header = "Cash, Cash Alternatives and Margin";
|
||||
var i: usize = 0;
|
||||
while (i < staged.items.len) : (i += 1) {
|
||||
const line = staged.items[i];
|
||||
// Stop on totals footer (`ETFs Total`, `Total`, etc).
|
||||
if (isTotalLine(line)) break;
|
||||
if (std.mem.eql(u8, line, cash_header)) break;
|
||||
if (!isPopupAnchor(line)) continue;
|
||||
|
||||
const symbol = popupSymbol(line) orelse continue;
|
||||
|
|
@ -169,24 +180,104 @@ pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePo
|
|||
});
|
||||
}
|
||||
|
||||
// ── Cash section (optional) ─────────────────────────────
|
||||
//
|
||||
// The user may include the WF "Cash, Cash Alternatives and
|
||||
// Margin" table at the end of the paste. The shape is:
|
||||
//
|
||||
// Cash, Cash Alternatives and Margin
|
||||
// Cash alternatives and margin table has been sorted ...
|
||||
// <blank>
|
||||
// <Account name> ← e.g. "Sample IRA *1234"
|
||||
// $14,216.88 ← per-account cash balance
|
||||
// <blank>
|
||||
// Cash Total
|
||||
// $14,216.88 ← grand total (skip)
|
||||
//
|
||||
// We capture the per-account balance as a synthetic cash
|
||||
// position with empty `symbol`. The downstream
|
||||
// `applyAccountToPositions` stamps the account fields, and
|
||||
// `synthesizeLots` emits a `security_type::cash` lot.
|
||||
//
|
||||
// Multiple per-account rows are tolerated (one cash position
|
||||
// per row); in practice the WF paste flow is single-account so
|
||||
// there's typically one. Stops at "Cash Total".
|
||||
{
|
||||
// After the positions loop, `i` is either at the cash
|
||||
// header (if the user included the cash table) or at the
|
||||
// end of the staged array (no cash table). Walk forward
|
||||
// looking for the header in case it appears after extra
|
||||
// intermediate lines, then parse account/amount pairs.
|
||||
var j: usize = i;
|
||||
while (j < staged.items.len and !std.mem.eql(u8, staged.items[j], cash_header)) : (j += 1) {}
|
||||
if (j < staged.items.len) {
|
||||
// Skip the header + the explanatory subtitle line.
|
||||
j += 1; // past "Cash, Cash Alternatives and Margin"
|
||||
if (j < staged.items.len and std.mem.startsWith(u8, staged.items[j], "Cash alternatives")) {
|
||||
j += 1;
|
||||
}
|
||||
// Walk rows. Each row is an account name (anything
|
||||
// non-blank, non-Total) followed by a `$AMOUNT` line.
|
||||
// Stop at "Cash Total" (the grand-total marker).
|
||||
while (j < staged.items.len) : (j += 1) {
|
||||
const row = staged.items[j];
|
||||
if (row.len == 0) continue;
|
||||
if (std.mem.eql(u8, row, "Cash Total")) break;
|
||||
// Account-name row. Look ahead for the dollar amount.
|
||||
var k: usize = j + 1;
|
||||
while (k < staged.items.len and staged.items[k].len == 0) : (k += 1) {}
|
||||
if (k >= staged.items.len) break;
|
||||
const amount_text = staged.items[k];
|
||||
// The dollar amount must start with `$` to count as
|
||||
// a cash balance. Any other shape (e.g. "Cash Total"
|
||||
// appearing here would mean a malformed paste) → skip.
|
||||
if (amount_text.len == 0 or amount_text[0] != '$') continue;
|
||||
const cash_amount = parseDollarAmount(amount_text) orelse continue;
|
||||
try positions.append(allocator, .{
|
||||
.account_number = "",
|
||||
.account_name = "",
|
||||
.symbol = "",
|
||||
.description = "Cash",
|
||||
.quantity = null,
|
||||
.current_value = cash_amount,
|
||||
.cost_basis = null,
|
||||
.is_cash = true,
|
||||
});
|
||||
j = k; // resume past the consumed amount line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return positions.toOwnedSlice(allocator);
|
||||
}
|
||||
|
||||
/// True when `line` is a record-start anchor like `GSLC , popup`.
|
||||
/// The trailing `, popup` is the stable signal — WF's hover
|
||||
/// affordance. Whitespace between the symbol and the comma is
|
||||
/// tolerated (WF emits exactly one space, but trim defends
|
||||
/// against future-proofing).
|
||||
/// True when `line` is a record-start anchor like `GSLC , popup`
|
||||
/// or `XOM,popup`. The trailing `popup` is the stable signal — WF's
|
||||
/// hover affordance — and the comma immediately precedes it (with
|
||||
/// optional whitespace either side, which varies between paste
|
||||
/// shapes for stocks vs ETFs).
|
||||
fn isPopupAnchor(line: []const u8) bool {
|
||||
return std.mem.endsWith(u8, line, ", popup");
|
||||
if (!std.mem.endsWith(u8, line, "popup")) return false;
|
||||
// The portion before `popup` must end with a comma (with
|
||||
// optional whitespace). This rejects e.g. "popup" alone or
|
||||
// "something popup" without a separator.
|
||||
const before = line[0 .. line.len - "popup".len];
|
||||
const trimmed = std.mem.trimEnd(u8, before, &.{ ' ', '\t' });
|
||||
return std.mem.endsWith(u8, trimmed, ",");
|
||||
}
|
||||
|
||||
/// Extract the symbol from a popup anchor line. Returns null if
|
||||
/// the line is the right shape but the symbol part is empty.
|
||||
/// Accepts both `SYMBOL,popup` and `SYMBOL , popup` shapes.
|
||||
fn popupSymbol(line: []const u8) ?[]const u8 {
|
||||
if (!isPopupAnchor(line)) return null;
|
||||
const before_comma = line[0 .. line.len - ", popup".len];
|
||||
const symbol = std.mem.trim(u8, before_comma, &.{ ' ', '\t' });
|
||||
// Strip trailing `popup`, surrounding whitespace, and the
|
||||
// separator comma to get the symbol token.
|
||||
var end = line.len - "popup".len;
|
||||
while (end > 0 and (line[end - 1] == ' ' or line[end - 1] == '\t')) end -= 1;
|
||||
if (end == 0 or line[end - 1] != ',') return null;
|
||||
end -= 1; // drop the comma
|
||||
const symbol = std.mem.trim(u8, line[0..end], &.{ ' ', '\t' });
|
||||
if (symbol.len == 0) return null;
|
||||
return symbol;
|
||||
}
|
||||
|
|
@ -670,13 +761,14 @@ test "parsePaste: input with only header preamble (no records) yields zero" {
|
|||
try testing.expectEqual(@as(usize, 0), positions.len);
|
||||
}
|
||||
|
||||
test "parsePaste: stops at ETFs Total footer (doesn't parse past it)" {
|
||||
test "parsePaste: parses across intermediate totals (Stocks Total + ETFs Total)" {
|
||||
const allocator = testing.allocator;
|
||||
// Two records, then a footer, then more positions that
|
||||
// SHOULD NOT be parsed (they belong to a phantom second
|
||||
// table that the user shouldn't have included). The
|
||||
// totals-line stop ensures we don't accidentally double-
|
||||
// count.
|
||||
// The WF holdings page splits positions into multiple
|
||||
// sections (Stocks, ETFs, Bonds, …), each terminated by its
|
||||
// own `<Section> Total` footer. The parser must keep going
|
||||
// past intermediate totals to capture records in subsequent
|
||||
// sections. (Real-world example: 6135.txt-style export with
|
||||
// 43 stocks then 13 ETFs separated by `Stocks Total`.)
|
||||
const data =
|
||||
"GSLC , popup\n" ++
|
||||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||||
|
|
@ -695,23 +787,36 @@ test "parsePaste: stops at ETFs Total footer (doesn't parse past it)" {
|
|||
"\t\n" ++
|
||||
"$1,203.17\n" ++
|
||||
"\t\n" ++
|
||||
"ETFs Total\n" ++
|
||||
"Stocks Total\n" ++
|
||||
"$127,655.40\n" ++
|
||||
"GHOST , popup\n" ++ // should NOT be parsed
|
||||
"SHOULD NOT APPEAR\n" ++
|
||||
"ETFs\n" ++
|
||||
"ETF table has been sorted ...\n" ++
|
||||
"\t\n" ++
|
||||
"DBP , popup\n" ++ // SHOULD be parsed (next section)
|
||||
"INVESCO PRECIOUS METALS ETF\n" ++
|
||||
"\tMultiple(1) \t\n" ++
|
||||
"1\n" ++
|
||||
"@ $1.00\n" ++
|
||||
"10\n" ++
|
||||
"@ $50.00\n" ++
|
||||
"\t\n" ++
|
||||
"$1.00\n" ++
|
||||
"+$0.00\n" ++
|
||||
"$55.00\n" ++
|
||||
"+$0.10\n" ++
|
||||
"\t\n" ++
|
||||
"$1.00\n";
|
||||
"$550.00\n" ++
|
||||
"+$1.00 (+0.18%)\n" ++
|
||||
"\t\n" ++
|
||||
"+$50.00\n" ++
|
||||
"+10.00%\n" ++
|
||||
"\t\n" ++
|
||||
"$5.00\n" ++
|
||||
"\t\n" ++
|
||||
"ETFs Total\n" ++
|
||||
"$550.00\n";
|
||||
|
||||
const positions = try parsePaste(allocator, data);
|
||||
defer allocator.free(positions);
|
||||
try testing.expectEqual(@as(usize, 1), positions.len);
|
||||
try testing.expectEqual(@as(usize, 2), positions.len);
|
||||
try testing.expectEqualStrings("GSLC", positions[0].symbol);
|
||||
try testing.expectEqualStrings("DBP", positions[1].symbol);
|
||||
}
|
||||
|
||||
test "parsePaste: money-market symbol gets is_cash=true" {
|
||||
|
|
@ -746,6 +851,156 @@ test "parsePaste: money-market symbol gets is_cash=true" {
|
|||
try testing.expect(positions[0].is_cash);
|
||||
}
|
||||
|
||||
test "parsePaste: accepts both `SYMBOL,popup` and `SYMBOL , popup` anchors" {
|
||||
// Wells Fargo emits two slightly different anchor shapes
|
||||
// depending on what part of the holdings table the user
|
||||
// copied — stocks tend to come out as `SYMBOL,popup` (no
|
||||
// spaces) while ETFs come out as `SYMBOL , popup` (with
|
||||
// spaces). Single-paste files routinely mix both forms, so
|
||||
// the parser must accept either.
|
||||
const allocator = testing.allocator;
|
||||
const data =
|
||||
"XOM,popup\n" ++ // no-space form (stock)
|
||||
"EXXON MOBIL CORP\n" ++
|
||||
"\tMultiple(4) \t\n" ++
|
||||
"50\n" ++
|
||||
"@ $129.66\n" ++
|
||||
"\t\n" ++
|
||||
"$154.92\n" ++
|
||||
"-$0.37\n" ++
|
||||
"\t\n" ++
|
||||
"$7,746.00\n" ++
|
||||
"-$18.50 (-0.24%)\n" ++
|
||||
"\t\n" ++
|
||||
"+$1,262.85\n" ++
|
||||
"+19.48%\n" ++
|
||||
"\t\n" ++
|
||||
"$206.00\n" ++
|
||||
"\t\n" ++
|
||||
"GSLC , popup\n" ++ // with-space form (ETF)
|
||||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||||
"\tMultiple(3) \t\n" ++
|
||||
"906\n" ++
|
||||
"@ $129.97\n" ++
|
||||
"\t\n" ++
|
||||
"$140.90\n" ++
|
||||
"+$0.31\n" ++
|
||||
"\t\n" ++
|
||||
"$127,655.40\n" ++
|
||||
"+$280.86 (+0.22%)\n" ++
|
||||
"\t\n" ++
|
||||
"+$9,906.42\n" ++
|
||||
"+8.41%\n" ++
|
||||
"\t\n" ++
|
||||
"$1,203.17\n";
|
||||
|
||||
const positions = try parsePaste(allocator, data);
|
||||
defer allocator.free(positions);
|
||||
try testing.expectEqual(@as(usize, 2), positions.len);
|
||||
try testing.expectEqualStrings("XOM", positions[0].symbol);
|
||||
try testing.expectEqualStrings("GSLC", positions[1].symbol);
|
||||
}
|
||||
|
||||
test "parsePaste: trailing cash section emits a cash position" {
|
||||
// After the positions table, WF pastes may include a
|
||||
// "Cash, Cash Alternatives and Margin" section listing the
|
||||
// account's cash balance. The parser captures that as a
|
||||
// synthetic cash position; the downstream resolver stamps
|
||||
// the account fields and `synthesizeLots` emits a
|
||||
// `security_type::cash` lot.
|
||||
const allocator = testing.allocator;
|
||||
const data =
|
||||
"XOM,popup\n" ++
|
||||
"EXXON MOBIL CORP\n" ++
|
||||
"\tMultiple(4) \t\n" ++
|
||||
"50\n" ++
|
||||
"@ $129.66\n" ++
|
||||
"\t\n" ++
|
||||
"$154.92\n" ++
|
||||
"-$0.37\n" ++
|
||||
"\t\n" ++
|
||||
"$7,746.00\n" ++
|
||||
"-$18.50 (-0.24%)\n" ++
|
||||
"\t\n" ++
|
||||
"+$1,262.85\n" ++
|
||||
"+19.48%\n" ++
|
||||
"\t\n" ++
|
||||
"$206.00\n" ++
|
||||
"\t\n" ++
|
||||
"ETFs Total\n" ++
|
||||
"$7,746.00\n" ++
|
||||
"Cash, Cash Alternatives and Margin\n" ++
|
||||
"Cash alternatives and margin table has been sorted ...\n" ++
|
||||
"\t\n" ++
|
||||
"Sample Roth IRA *1234\n" ++
|
||||
"\t$14,216.88\n" ++
|
||||
"\t\n" ++
|
||||
"Cash Total\n" ++
|
||||
"\t\n" ++
|
||||
"$14,216.88 \n";
|
||||
|
||||
const positions = try parsePaste(allocator, data);
|
||||
defer allocator.free(positions);
|
||||
try testing.expectEqual(@as(usize, 2), positions.len);
|
||||
try testing.expectEqualStrings("XOM", positions[0].symbol);
|
||||
try testing.expect(!positions[0].is_cash);
|
||||
// Cash position
|
||||
try testing.expectEqualStrings("", positions[1].symbol);
|
||||
try testing.expect(positions[1].is_cash);
|
||||
try testing.expectApproxEqAbs(@as(f64, 14216.88), positions[1].current_value.?, 0.01);
|
||||
try testing.expect(positions[1].quantity == null);
|
||||
try testing.expect(positions[1].cost_basis == null);
|
||||
}
|
||||
|
||||
test "parsePaste: cash section absent is a no-op" {
|
||||
// When the user only pastes the positions table (no cash
|
||||
// section), parsePaste returns just the positions. Regression
|
||||
// for the original 3522.txt-style paste shape.
|
||||
const allocator = testing.allocator;
|
||||
const data =
|
||||
"GSLC , popup\n" ++
|
||||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||||
"\tMultiple(3) \t\n" ++
|
||||
"906\n" ++
|
||||
"@ $129.97\n" ++
|
||||
"\t\n" ++
|
||||
"$140.90\n" ++
|
||||
"+$0.31\n" ++
|
||||
"\t\n" ++
|
||||
"$127,655.40\n" ++
|
||||
"+$280.86 (+0.22%)\n" ++
|
||||
"\t\n" ++
|
||||
"+$9,906.42\n" ++
|
||||
"+8.41%\n" ++
|
||||
"\t\n" ++
|
||||
"$1,203.17\n";
|
||||
|
||||
const positions = try parsePaste(allocator, data);
|
||||
defer allocator.free(positions);
|
||||
try testing.expectEqual(@as(usize, 1), positions.len);
|
||||
try testing.expect(!positions[0].is_cash);
|
||||
}
|
||||
|
||||
test "isPopupAnchor: accepts both compact and spaced forms" {
|
||||
try testing.expect(isPopupAnchor("XOM,popup"));
|
||||
try testing.expect(isPopupAnchor("XOM ,popup"));
|
||||
try testing.expect(isPopupAnchor("XOM, popup"));
|
||||
try testing.expect(isPopupAnchor("XOM , popup"));
|
||||
try testing.expect(isPopupAnchor("BRK'B,popup"));
|
||||
// Negative cases.
|
||||
try testing.expect(!isPopupAnchor("XOM popup"));
|
||||
try testing.expect(!isPopupAnchor("popup"));
|
||||
try testing.expect(!isPopupAnchor(""));
|
||||
}
|
||||
|
||||
test "popupSymbol: extracts symbol from compact form" {
|
||||
try testing.expectEqualStrings("XOM", popupSymbol("XOM,popup").?);
|
||||
try testing.expectEqualStrings("BRK'B", popupSymbol("BRK'B,popup").?);
|
||||
try testing.expectEqualStrings("XOM", popupSymbol("XOM, popup").?);
|
||||
// Empty symbol part → null.
|
||||
try testing.expect(popupSymbol(",popup") == null);
|
||||
}
|
||||
|
||||
// ── Resolver tests ───────────────────────────────────────────
|
||||
|
||||
/// Test helper: build an `AccountMap` from compile-time entries.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue