zfin/src/brokerage/wells_fargo.zig

1211 lines
50 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Wells Fargo paste parser.
//!
//! Wells Fargo's brokerage portal doesn't offer a clean CSV export
//! for positions, so the user copies the rendered HTML table and
//! pastes the result into a file. The paste is a tab-separated
//! multi-line layout, one record per holding plus a (sometimes
//! present) totals block at the end.
//!
//! ## Format
//!
//! Header preamble (optional — present when the user's paste
//! includes the column headers, absent when they paste only the
//! rows). When present, it spans the first ~12 lines and starts
//! with `Symbol/Description`. The parser scans for the first
//! `<SYMBOL> , popup` line as the data anchor and ignores
//! everything before it.
//!
//! Each position record looks like:
//!
//! ```
//! GSLC , popup ← record anchor: <SYMBOL> , popup
//! GOLDMAN ACTIVEBETA ETF ← description
//! Multiple(3) or MM/DD/YYYY ← lot count or single-buy date
//! 906 ← shares (may have commas)
//! @ $129.97 ← average cost basis per share
//! <blank-tab>
//! $140.90 ← last price
//! +$0.31 ← day change ($)
//! <blank-tab>
//! $127,655.40 ← market value
//! +$280.86 (+0.22%) ← day change ($, %)
//! <blank-tab>
//! +$9,906.42 ← unrealized gain/loss ($)
//! +8.41% ← unrealized gain/loss (%)
//! <blank-tab>
//! $1,203.17 ← est. annual income
//! <blank-tab> ← record separator
//! ```
//!
//! Footer (optional — sometimes a totals block appears, sometimes
//! the paste ends after the last record's est-annual-income).
//! The parser stops on a line that begins with a known total
//! sentinel ("ETFs Total", "Total", etc.) OR on EOF.
//!
//! ## Limitations
//!
//! 1. Format is layout-fragile — if WF changes the table structure,
//! this parser breaks. We re-anchor on `<SYMBOL> , popup` per
//! record, which gives some robustness against extra blank
//! lines or stray whitespace, but column reordering would
//! require updating the field offsets below.
//!
//! 2. WF pastes don't carry an account identifier; the import
//! command resolves the account separately (filename inference
//! + `--account` override + accounts.srf lookup).
//!
//! 3. Cost basis is computed from `shares × avg_cost`. WF doesn't
//! print "total cost basis" alongside; the multiplication is
//! a re-derivation from the per-share avg.
//!
//! 4. No cash classification. Wells Fargo positions of cash /
//! money-market funds may need the standard `isMoneyMarketSymbol`
//! fallback; the format itself doesn't tag cash distinctly.
const std = @import("std");
const builtin = @import("builtin");
const portfolio_mod = @import("../models/portfolio.zig");
const types = @import("types.zig");
const analysis = @import("../analytics/analysis.zig");
const BrokeragePosition = types.BrokeragePosition;
const parseDollarAmount = types.parseDollarAmount;
/// Institution name used for `accounts.srf` lookups
/// (`institution::wells_fargo`). Held as a constant so the parser
/// and the resolver don't drift on the spelling.
pub const institution = "wells_fargo";
/// Parse a Wells Fargo paste into BrokeragePosition slices.
///
/// All string fields in the returned positions are slices into
/// `data` (caller must keep `data` alive). The returned slice
/// itself is heap-allocated against `allocator`.
///
/// `account_number` and `account_name` are left as empty strings
/// — WF pastes don't carry account identity. The import command
/// fills these in from filename inference / accounts.srf lookup.
pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePosition {
var positions = std.ArrayList(BrokeragePosition).empty;
errdefer positions.deinit(allocator);
var lines = std.mem.splitScalar(u8, data, '\n');
// Buffer up to N lookahead lines so the parser can slide a
// window over the per-record layout without juggling iterator
// state. Per-record this is at most ~16 lines.
var staged: std.ArrayList([]const u8) = .empty;
defer staged.deinit(allocator);
while (lines.next()) |raw| {
const trimmed = std.mem.trim(u8, raw, &.{ ' ', '\t', '\r' });
try staged.append(allocator, trimmed);
}
// Locate each record by scanning for a popup anchor. WF
// emits a `, popup` (or `,popup`) suffix as part of the
// symbol column's hover affordance; it's a very stable
// record anchor.
//
// We do NOT stop at intermediate totals lines (`Stocks Total`,
// `ETFs Total`). The WF holdings page splits positions into
// multiple sections (Stocks, ETFs, Bonds, …), each terminated
// by its own totals line; the second/third section's records
// appear AFTER an intermediate totals line and we want to
// capture them. The only structural boundary between
// positions and the (optional) cash table is the
// `Cash, Cash Alternatives and Margin` header, which we
// detect explicitly below.
const cash_header = "Cash, Cash Alternatives and Margin";
var i: usize = 0;
while (i < staged.items.len) : (i += 1) {
const line = staged.items[i];
if (std.mem.eql(u8, line, cash_header)) break;
if (!isPopupAnchor(line)) continue;
const symbol = popupSymbol(line) orelse continue;
// Walk forward collecting the rest of the fields. Each
// field-find returns the index it consumed up to, or
// null if the record is truncated. We tolerate stray
// blank lines between fields because WF pastes
// sometimes carry extra whitespace.
var cur = i + 1;
// Description: first non-empty line after the anchor.
const description = nextNonEmpty(staged.items, &cur) orelse break;
// Trade-date column: either `Multiple(N)` or `MM/DD/YYYY`.
// We don't use the value but consuming it advances `cur`
// to the shares line. Single-date format pastes have an
// extra blank line between the description and the date,
// so step past blanks.
_ = nextNonEmpty(staged.items, &cur) orelse break;
// Shares: integer with optional thousands commas, no $.
const shares_text = nextNonEmpty(staged.items, &cur) orelse break;
const shares = parseSharesAmount(shares_text) orelse continue;
// Avg cost: line starts with `@ $`.
const cost_text = nextNonEmpty(staged.items, &cur) orelse break;
if (!std.mem.startsWith(u8, cost_text, "@ ")) continue;
const avg_cost = parseDollarAmount(cost_text[2..]) orelse continue;
// Last price (skip), day-change-$ (skip), day-change-%
// (skip): three lines of price-detail we don't currently
// need. We could surface them later if a caller wants
// them, but for synthesis the avg cost + market value is
// enough.
_ = nextNonEmpty(staged.items, &cur) orelse break; // last price
_ = nextNonEmpty(staged.items, &cur) orelse break; // day change $
// Market value: dollar amount, no parens.
const mv_text = nextNonEmpty(staged.items, &cur) orelse break;
const market_value = parseDollarAmount(mv_text) orelse continue;
// The remaining lines (day-change-$/%, unreal G/L $/%,
// est annual income) are the rest of this record but
// we don't need them for synthesis. The next record's
// popup anchor is what we'll find on the outer loop's
// next iteration.
i = cur; // resume scan past the consumed market-value line
try positions.append(allocator, .{
.account_number = "",
.account_name = "",
.symbol = symbol,
.description = description,
.quantity = shares,
.current_value = market_value,
.cost_basis = shares * avg_cost,
.is_cash = portfolio_mod.isMoneyMarketSymbol(symbol),
});
}
// ── Cash section (optional) ─────────────────────────────
//
// The user may include the WF "Cash, Cash Alternatives and
// Margin" table at the end of the paste. The shape is:
//
// Cash, Cash Alternatives and Margin
// Cash alternatives and margin table has been sorted ...
// <blank>
// <Account name> ← e.g. "Sample IRA *1234"
// $14,216.88 ← per-account cash balance
// <blank>
// Cash Total
// $14,216.88 ← grand total (skip)
//
// We capture the per-account balance as a synthetic cash
// position with empty `symbol`. The downstream
// `applyAccountToPositions` stamps the account fields, and
// `synthesizeLots` emits a `security_type::cash` lot.
//
// Multiple per-account rows are tolerated (one cash position
// per row); in practice the WF paste flow is single-account so
// there's typically one. Stops at "Cash Total".
{
// After the positions loop, `i` is either at the cash
// header (if the user included the cash table) or at the
// end of the staged array (no cash table). Walk forward
// looking for the header in case it appears after extra
// intermediate lines, then parse account/amount pairs.
var j: usize = i;
while (j < staged.items.len and !std.mem.eql(u8, staged.items[j], cash_header)) : (j += 1) {}
if (j < staged.items.len) {
// Skip the header + the explanatory subtitle line.
j += 1; // past "Cash, Cash Alternatives and Margin"
if (j < staged.items.len and std.mem.startsWith(u8, staged.items[j], "Cash alternatives")) {
j += 1;
}
// Walk rows. Each row is an account name (anything
// non-blank, non-Total) followed by a `$AMOUNT` line.
// Stop at "Cash Total" (the grand-total marker).
while (j < staged.items.len) : (j += 1) {
const row = staged.items[j];
if (row.len == 0) continue;
if (std.mem.eql(u8, row, "Cash Total")) break;
// Account-name row. Look ahead for the dollar amount.
var k: usize = j + 1;
while (k < staged.items.len and staged.items[k].len == 0) : (k += 1) {}
if (k >= staged.items.len) break;
const amount_text = staged.items[k];
// The dollar amount must start with `$` to count as
// a cash balance. Any other shape (e.g. "Cash Total"
// appearing here would mean a malformed paste) → skip.
if (amount_text.len == 0 or amount_text[0] != '$') continue;
const cash_amount = parseDollarAmount(amount_text) orelse continue;
try positions.append(allocator, .{
.account_number = "",
.account_name = "",
.symbol = "",
.description = "Cash",
.quantity = null,
.current_value = cash_amount,
.cost_basis = null,
.is_cash = true,
});
j = k; // resume past the consumed amount line
}
}
}
return positions.toOwnedSlice(allocator);
}
/// True when `line` is a record-start anchor like `GSLC , popup`
/// or `XOM,popup`. The trailing `popup` is the stable signal — WF's
/// hover affordance — and the comma immediately precedes it (with
/// optional whitespace either side, which varies between paste
/// shapes for stocks vs ETFs).
fn isPopupAnchor(line: []const u8) bool {
if (!std.mem.endsWith(u8, line, "popup")) return false;
// The portion before `popup` must end with a comma (with
// optional whitespace). This rejects e.g. "popup" alone or
// "something popup" without a separator.
const before = line[0 .. line.len - "popup".len];
const trimmed = std.mem.trimEnd(u8, before, &.{ ' ', '\t' });
return std.mem.endsWith(u8, trimmed, ",");
}
/// Extract the symbol from a popup anchor line. Returns null if
/// the line is the right shape but the symbol part is empty.
/// Accepts both `SYMBOL,popup` and `SYMBOL , popup` shapes.
fn popupSymbol(line: []const u8) ?[]const u8 {
if (!isPopupAnchor(line)) return null;
// Strip trailing `popup`, surrounding whitespace, and the
// separator comma to get the symbol token.
var end = line.len - "popup".len;
while (end > 0 and (line[end - 1] == ' ' or line[end - 1] == '\t')) end -= 1;
if (end == 0 or line[end - 1] != ',') return null;
end -= 1; // drop the comma
const symbol = std.mem.trim(u8, line[0..end], &.{ ' ', '\t' });
if (symbol.len == 0) return null;
return symbol;
}
/// True when `line` looks like a footer-totals sentinel. WF's
/// paste sometimes ends with one or two `ETFs Total` blocks; we
/// also generously accept any line ending with " Total" so
/// "Stocks Total", "Bonds Total", etc. don't slip through if a
/// future paste includes them.
fn isTotalLine(line: []const u8) bool {
if (std.mem.eql(u8, line, "Total")) return true;
return std.mem.endsWith(u8, line, " Total");
}
/// Advance `cur_idx` past blank lines in `lines`, then return
/// (and consume) the first non-blank line. Returns null if no
/// non-blank line remains.
fn nextNonEmpty(lines: []const []const u8, cur_idx: *usize) ?[]const u8 {
while (cur_idx.* < lines.len) {
const line = lines[cur_idx.*];
cur_idx.* += 1;
if (line.len == 0) continue;
if (isTotalLine(line)) return null;
return line;
}
return null;
}
/// Parse a shares value like "906" or "1,020" — integers with
/// optional thousands commas, no $ prefix. Returns null on any
/// other shape (which lets the parent loop skip the record
/// without aborting the whole paste).
fn parseSharesAmount(raw: []const u8) ?f64 {
// Reuse parseDollarAmount: it strips $/+/-/comma and parses
// the rest as a float. WF shares lines have no $ but the
// function is happy without one.
return parseDollarAmount(raw);
}
// ── Account resolution ───────────────────────────────────────
//
// Wells Fargo pastes carry no in-band account identifier (no
// header, no per-row column, no embedded account number — see
// the module doc-block). So after parsing we have to resolve the
// account name from outside the paste: `accounts.srf` plus any
// hints from the file path or an explicit `--account` override.
//
// This block is the WF-specific piece of import. Fidelity and
// Schwab don't need it because their exports stamp the account
// number per-row (Fidelity) or in the title line (Schwab).
/// Resolution result: borrowed slices into the matching
/// `accounts.srf` entry. Both fields live as long as the
/// `AccountMap` does.
pub const Resolved = struct {
account_number: []const u8,
account_name: []const u8,
};
/// Determine which `accounts.srf` entry a Wells Fargo paste
/// belongs to. Resolution order:
///
/// 1. **Explicit `--account NAME`.** Match against
/// `account::` exactly. If the override doesn't match any
/// `institution::wells_fargo` entry, error out.
/// 2. **Filename inference.** Take the basename of the source
/// path (without extension) and try to match it against the
/// `account::` field of each WF entry, allowing for
/// case-insensitive substring overlap on the trailing
/// `*NNNN` digits. Filename "Sample_IRA_1234.txt" matches
/// "Sample IRA *1234".
/// 3. **Single-WF-entry fallback.** If accounts.srf has exactly
/// one `institution::wells_fargo` entry, use it. Helpful for
/// users with one WF account; harmless when there are many
/// (the lookup just falls through to the error).
///
/// On no-match, prints a stderr listing of the available WF
/// entries so the user can pick one with `--account`.
///
/// Errors:
/// - `error.UnknownAccount`: `--account NAME` didn't match a WF
/// entry, or a matching entry has no `account_number::` field
/// (which the downstream lookup keys on).
/// - `error.AmbiguousWellsFargoAccount`: zero or 2+ WF entries in
/// accounts.srf and no other signal to disambiguate.
pub fn resolveAccount(
io: std.Io,
account_map: analysis.AccountMap,
source_path: []const u8,
explicit: ?[]const u8,
) !Resolved {
// 1. Explicit override.
if (explicit) |name| {
for (account_map.entries) |e| {
const inst = e.institution orelse continue;
if (!std.mem.eql(u8, inst, institution)) continue;
if (std.mem.eql(u8, e.account, name)) {
return resolutionFor(io, e);
}
}
if (!builtin.is_test) {
var stderr_buf: [4096]u8 = undefined;
var sw = std.Io.File.stderr().writer(io, &stderr_buf);
try sw.interface.print(
"Error: --account '{s}' did not match any `institution::wells_fargo` entry in accounts.srf.\n",
.{name},
);
try printEntries(&sw.interface, account_map);
try sw.interface.flush();
}
return error.UnknownAccount;
}
// 2. Filename inference. Take everything after the last
// `/` and before the extension. Stdin's `-` returns no
// useful base; the inference simply fails and we fall
// through to step 3.
const inferred: ?analysis.AccountTaxEntry = blk: {
if (std.mem.eql(u8, source_path, "-")) break :blk null;
const base_with_ext = std.fs.path.basename(source_path);
const dot_idx = std.mem.lastIndexOfScalar(u8, base_with_ext, '.');
const base = if (dot_idx) |i| base_with_ext[0..i] else base_with_ext;
var match: ?analysis.AccountTaxEntry = null;
for (account_map.entries) |e| {
const inst = e.institution orelse continue;
if (!std.mem.eql(u8, inst, institution)) continue;
if (filenameMatchesAccount(base, e.account, e.account_number)) {
if (match != null) {
// More than one WF entry matched the
// filename — punt to the user.
break :blk null;
}
match = e;
}
}
break :blk match;
};
if (inferred) |e| return resolutionFor(io, e);
// 3. Single-WF-entry fallback.
var single: ?analysis.AccountTaxEntry = null;
var wf_count: usize = 0;
for (account_map.entries) |e| {
const inst = e.institution orelse continue;
if (!std.mem.eql(u8, inst, institution)) continue;
wf_count += 1;
single = e;
}
if (wf_count == 1) return resolutionFor(io, single.?);
// Couldn't pick. Print enumerated guidance.
if (!builtin.is_test) {
var stderr_buf: [4096]u8 = undefined;
var sw = std.Io.File.stderr().writer(io, &stderr_buf);
if (wf_count == 0) {
try sw.interface.print(
"Error: no `institution::wells_fargo` entries found in accounts.srf.\n" ++
" Add one (e.g. `account::Sample IRA *1234,tax_type::roth,institution::wells_fargo,account_number::1234`)\n" ++
" and rerun the import.\n",
.{},
);
} else {
try sw.interface.print(
"Error: {d} Wells Fargo accounts in accounts.srf; cannot pick one automatically.\n" ++
" Pass --account NAME to disambiguate. Candidates:\n",
.{wf_count},
);
try printEntries(&sw.interface, account_map);
}
try sw.interface.flush();
}
return error.AmbiguousWellsFargoAccount;
}
/// Convenience: resolve the account once and then patch every
/// position's `account_number` / `account_name` fields with the
/// resolved values. Used by `import` after `parsePaste`. The
/// patched slices borrow from `account_map`, which the caller
/// must keep alive for the lifetime of the positions.
pub fn applyAccountToPositions(
io: std.Io,
account_map: analysis.AccountMap,
source_path: []const u8,
explicit: ?[]const u8,
positions: []BrokeragePosition,
) !void {
const resolved = try resolveAccount(io, account_map, source_path, explicit);
var idx: usize = 0;
while (idx < positions.len) : (idx += 1) {
positions[idx].account_number = resolved.account_number;
positions[idx].account_name = resolved.account_name;
}
}
/// Build a `Resolved` from an `accounts.srf` entry. Errors when
/// the entry has no `account_number::` field, because the
/// downstream `findByInstitutionAccount` lookup keys on it.
fn resolutionFor(io: std.Io, entry: analysis.AccountTaxEntry) !Resolved {
const num = entry.account_number orelse {
if (!builtin.is_test) {
var stderr_buf: [512]u8 = undefined;
var sw = std.Io.File.stderr().writer(io, &stderr_buf);
try sw.interface.print(
"Error: WF account '{s}' has no `account_number::` field in accounts.srf.\n" ++
" Add one (the trailing digits after `*` work well, e.g. `account_number::1234`).\n",
.{entry.account},
);
try sw.interface.flush();
}
return error.UnknownAccount;
};
return .{ .account_number = num, .account_name = entry.account };
}
/// True when the source file's basename (without extension)
/// looks like it refers to `account_name`. Implemented as a
/// case-insensitive substring overlap on the trailing-digits
/// tail of the account name (after `*` or end-of-string), with
/// underscores and spaces treated as equivalent.
///
/// `account_number` (when non-null) is also tried as an
/// alternate anchor: a filename containing `accounts.srf`'s
/// `account_number::` value matches even when the account name
/// itself has no trailing digit run (e.g. user named the file
/// `1234.txt` and recorded `account::Sample Roth IRA,
/// account_number::1234` without putting `*1234` in the name).
/// This is the more user-friendly path; without it, the user
/// would have to keep the digit suffix in two places.
///
/// Examples:
/// filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null) → true
/// filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null) → true (digits match)
/// filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null) → false
/// filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234") → true (account_number anchor)
fn filenameMatchesAccount(filename: []const u8, account_name: []const u8, account_number: ?[]const u8) bool {
// Extract the trailing digit run from the account name.
// "Sample IRA *1234" → "1234".
var digits_start: usize = account_name.len;
while (digits_start > 0) {
const c = account_name[digits_start - 1];
if (c < '0' or c > '9') break;
digits_start -= 1;
}
const digits = account_name[digits_start..];
// If the account name ends in digits, the filename must
// contain that exact digit run somewhere. This is the
// strongest signal — WF account suffixes are unique within
// a household.
if (digits.len > 0 and std.mem.indexOf(u8, filename, digits) != null) return true;
// Try the `account_number::` field as an alternate anchor.
// Useful when the user didn't bother to put the digits in
// the human-readable account name. We only treat the
// account_number as an anchor when it's all digits (e.g.
// "1234"); alphanumeric account numbers like Schwab's
// "Z123" prefixed format wouldn't be a useful filename hint
// for a WF import anyway, but tolerating them here as a
// substring match is harmless. So: if the number is all
// digits, do an exact substring; if it's mixed, also try a
// substring
// (case-insensitive) which is the broader fuzzy fallback.
if (account_number) |num| {
if (num.len > 0 and std.mem.indexOf(u8, filename, num) != null) return true;
}
// No digit suffix to compare; fall back to a fuzzy
// letters-only overlap. Lowercase both sides; compare
// alphanumeric runs only. If every alphanumeric run of the
// account name appears in the filename in order, it's a
// match.
return alphaRunsContained(filename, account_name);
}
/// True when every maximal alphanumeric run in `account_name`
/// appears (case-insensitive, in order) somewhere inside
/// `filename`. Used as a fallback in `filenameMatchesAccount`
/// when the account has no digit suffix to anchor on.
fn alphaRunsContained(filename: []const u8, account_name: []const u8) bool {
var f_lower_buf: [256]u8 = undefined;
if (filename.len > f_lower_buf.len) return false;
for (filename, 0..) |c, i| f_lower_buf[i] = std.ascii.toLower(c);
const f_lower = f_lower_buf[0..filename.len];
var i: usize = 0;
var search_from: usize = 0;
while (i < account_name.len) {
// Skip non-alphanum.
while (i < account_name.len and !std.ascii.isAlphanumeric(account_name[i])) : (i += 1) {}
const start = i;
while (i < account_name.len and std.ascii.isAlphanumeric(account_name[i])) : (i += 1) {}
if (start == i) break;
const acct_run = account_name[start..i];
if (acct_run.len == 0) continue;
// Lowercase the run and find it in f_lower starting at
// search_from.
var run_lower_buf: [128]u8 = undefined;
if (acct_run.len > run_lower_buf.len) return false;
for (acct_run, 0..) |c, k| run_lower_buf[k] = std.ascii.toLower(c);
const run_lower = run_lower_buf[0..acct_run.len];
const found = std.mem.indexOfPos(u8, f_lower, search_from, run_lower) orelse return false;
search_from = found + acct_run.len;
}
return true;
}
/// Helper: print every `institution::wells_fargo` entry from
/// the account map onto the given writer, one per line, indented.
fn printEntries(w: *std.Io.Writer, account_map: analysis.AccountMap) !void {
for (account_map.entries) |e| {
const inst = e.institution orelse continue;
if (!std.mem.eql(u8, inst, institution)) continue;
try w.print(" - {s}\n", .{e.account});
}
}
// ── Tests ────────────────────────────────────────────────────
const testing = std.testing;
test "isPopupAnchor: recognizes WF record anchors" {
try testing.expect(isPopupAnchor("GSLC , popup"));
try testing.expect(isPopupAnchor("VTV , popup"));
try testing.expect(!isPopupAnchor("GOLDMAN ACTIVEBETA ETF"));
try testing.expect(!isPopupAnchor("ETFs Total"));
try testing.expect(!isPopupAnchor(""));
}
test "popupSymbol: extracts symbol token before ', popup'" {
try testing.expectEqualStrings("GSLC", popupSymbol("GSLC , popup").?);
try testing.expectEqualStrings("VO", popupSymbol("VO , popup").?);
// Empty symbol part → null.
try testing.expect(popupSymbol(", popup") == null);
// Wrong shape → null.
try testing.expect(popupSymbol("GSLC popup") == null);
}
test "isTotalLine: matches WF footer sentinels" {
try testing.expect(isTotalLine("ETFs Total"));
try testing.expect(isTotalLine("Stocks Total"));
try testing.expect(isTotalLine("Total"));
try testing.expect(!isTotalLine("Subtotal"));
try testing.expect(!isTotalLine("GSLC , popup"));
}
test "parseSharesAmount: accepts integers with thousands commas" {
try testing.expectApproxEqAbs(@as(f64, 906), parseSharesAmount("906").?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 1020), parseSharesAmount("1,020").?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 2597), parseSharesAmount("2,597").?, 0.01);
}
test "parsePaste: header preamble plus three records" {
const allocator = testing.allocator;
// Mirrors the wf.txt structure — header preamble, then a
// few records, then the totals footer. Tabs and blank
// lines are intentional; the trim+nextNonEmpty pipeline
// should handle them.
const data =
"Symbol/Description,click to sort \tTrade Date,click to sort \tShares\n" ++
"@ Cost\n" ++
",click to sort \tLast Price/\n" ++
"Change\n" ++
",click to sort \tMarket Value/\n" ++
"Today's Change\n" ++
"\tUnreal.\n" ++
"Gain/Loss\n" ++
",click to sort \tEstimated\n" ++
"Annual Income\n" ++
",click to sort\n" ++
"\t\n" ++
"GSLC , popup\n" ++
"GOLDMAN ACTIVEBETA ETF\n" ++
"\tMultiple(3) \t\n" ++
"906\n" ++
"@ $129.97\n" ++
"\t\n" ++
"$140.90\n" ++
"+$0.31\n" ++
"\t\n" ++
"$127,655.40\n" ++
"+$280.86 (+0.22%)\n" ++
"\t\n" ++
"+$9,906.42\n" ++
"+8.41%\n" ++
"\t\n" ++
"$1,203.17\n" ++
"\t\n" ++
"VO , popup\n" ++
"VANGUARD MID CAP ETF\n" ++
"\tMultiple(2) \t\n" ++
"1,020\n" ++
"@ $74.30\n" ++
"\t\n" ++
"$77.41\n" ++
"+$0.35\n" ++
"\t\n" ++
"$78,958.20\n" ++
"+$357.00 (+0.45%)\n" ++
"\t\n" ++
"+$3,174.66\n" ++
"+4.19%\n" ++
"\t\n" ++
"$1,104.66\n" ++
"\t\n" ++
"EEM , popup\n" ++
"ISHARES MSCI EMRG MK ETF\n" ++
"\t\n" ++
"02/24/2026\n" ++
"\t\n" ++
"875\n" ++
"@ $62.71\n" ++
"\t\n" ++
"$66.03\n" ++
"+$0.57\n" ++
"\t\n" ++
"$57,776.25\n" ++
"+$498.75 (+0.87%)\n" ++
"\t\n" ++
"+$2,906.67\n" ++
"+5.30%\n" ++
"\t\n" ++
"$1,063.12\n" ++
"\t\n" ++
"ETFs Total\n" ++
"\t\t\t\t\n" ++
"$264,389.85\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 3), positions.len);
// GSLC: 906 shares × $129.97 avg = $117,752.82 cost basis;
// market value $127,655.40.
try testing.expectEqualStrings("GSLC", positions[0].symbol);
try testing.expectEqualStrings("GOLDMAN ACTIVEBETA ETF", positions[0].description);
try testing.expectApproxEqAbs(@as(f64, 906), positions[0].quantity.?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 117_752.82), positions[0].cost_basis.?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 127_655.40), positions[0].current_value.?, 0.01);
try testing.expect(!positions[0].is_cash);
// VO: 1,020 × $74.30 = $75,786 cost; market $78,958.20.
try testing.expectEqualStrings("VO", positions[1].symbol);
try testing.expectApproxEqAbs(@as(f64, 1020), positions[1].quantity.?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 75_786.00), positions[1].cost_basis.?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 78_958.20), positions[1].current_value.?, 0.01);
// EEM: single-date format (`02/24/2026` instead of `Multiple(N)`),
// so the parser handles both shapes by treating the trade-date
// column as a generic skip.
try testing.expectEqualStrings("EEM", positions[2].symbol);
try testing.expectApproxEqAbs(@as(f64, 875), positions[2].quantity.?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 875.0 * 62.71), positions[2].cost_basis.?, 0.01);
try testing.expectApproxEqAbs(@as(f64, 57_776.25), positions[2].current_value.?, 0.01);
}
test "parsePaste: no header preamble, no footer totals" {
// Mirrors wf2.txt — same record format, no preamble at
// top, no totals at bottom. Parser must reach EOF cleanly.
const allocator = testing.allocator;
const data =
"\n" ++
"GSLC , popup\n" ++
"GOLDMAN ACTIVEBETA ETF\n" ++
"\tMultiple(3) \t\n" ++
"906\n" ++
"@ $129.97\n" ++
"\t\n" ++
"$140.90\n" ++
"+$0.31\n" ++
"\t\n" ++
"$127,655.40\n" ++
"+$280.86 (+0.22%)\n" ++
"\t\n" ++
"+$9,906.42\n" ++
"+8.41%\n" ++
"\t\n" ++
"$1,203.17\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 1), positions.len);
try testing.expectEqualStrings("GSLC", positions[0].symbol);
try testing.expectApproxEqAbs(@as(f64, 906), positions[0].quantity.?, 0.01);
}
test "parsePaste: empty input yields zero positions" {
const allocator = testing.allocator;
const positions = try parsePaste(allocator, "");
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 0), positions.len);
}
test "parsePaste: input with only header preamble (no records) yields zero" {
const allocator = testing.allocator;
const data =
"Symbol/Description,click to sort \tTrade Date,click to sort \tShares\n" ++
"@ Cost\n" ++
",click to sort \tLast Price/\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 0), positions.len);
}
test "parsePaste: parses across intermediate totals (Stocks Total + ETFs Total)" {
const allocator = testing.allocator;
// The WF holdings page splits positions into multiple
// sections (Stocks, ETFs, Bonds, …), each terminated by its
// own `<Section> Total` footer. The parser must keep going
// past intermediate totals to capture records in subsequent
// sections. (Real-world example: a multi-section export with
// 43 stocks then 13 ETFs separated by `Stocks Total`.)
const data =
"GSLC , popup\n" ++
"GOLDMAN ACTIVEBETA ETF\n" ++
"\tMultiple(3) \t\n" ++
"906\n" ++
"@ $129.97\n" ++
"\t\n" ++
"$140.90\n" ++
"+$0.31\n" ++
"\t\n" ++
"$127,655.40\n" ++
"+$280.86 (+0.22%)\n" ++
"\t\n" ++
"+$9,906.42\n" ++
"+8.41%\n" ++
"\t\n" ++
"$1,203.17\n" ++
"\t\n" ++
"Stocks Total\n" ++
"$127,655.40\n" ++
"ETFs\n" ++
"ETF table has been sorted ...\n" ++
"\t\n" ++
"DBP , popup\n" ++ // SHOULD be parsed (next section)
"INVESCO PRECIOUS METALS ETF\n" ++
"\tMultiple(1) \t\n" ++
"10\n" ++
"@ $50.00\n" ++
"\t\n" ++
"$55.00\n" ++
"+$0.10\n" ++
"\t\n" ++
"$550.00\n" ++
"+$1.00 (+0.18%)\n" ++
"\t\n" ++
"+$50.00\n" ++
"+10.00%\n" ++
"\t\n" ++
"$5.00\n" ++
"\t\n" ++
"ETFs Total\n" ++
"$550.00\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 2), positions.len);
try testing.expectEqualStrings("GSLC", positions[0].symbol);
try testing.expectEqualStrings("DBP", positions[1].symbol);
}
test "parsePaste: money-market symbol gets is_cash=true" {
const allocator = testing.allocator;
// WMPXX is the Allspring (née Wells Fargo) money-market
// fund; it's in the canonical money-market list, so even
// without a `**` suffix or unit-price hint, the parser
// tags it as cash. Using a WF-house ticker here keeps the
// fixture credible — SWVXX would never show up on a Wells
// Fargo holdings page.
const data =
"WMPXX , popup\n" ++
"ALLSPRING MONEY MARKET FUND\n" ++
"\tMultiple(1) \t\n" ++
"5000\n" ++
"@ $1.00\n" ++
"\t\n" ++
"$1.00\n" ++
"$0.00\n" ++
"\t\n" ++
"$5,000.00\n" ++
"+$0.00 (0.00%)\n" ++
"\t\n" ++
"+$0.00\n" ++
"0.00%\n" ++
"\t\n" ++
"$200.00\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 1), positions.len);
try testing.expect(positions[0].is_cash);
}
test "parsePaste: accepts both `SYMBOL,popup` and `SYMBOL , popup` anchors" {
// Wells Fargo emits two slightly different anchor shapes
// depending on what part of the holdings table the user
// copied — stocks tend to come out as `SYMBOL,popup` (no
// spaces) while ETFs come out as `SYMBOL , popup` (with
// spaces). Single-paste files routinely mix both forms, so
// the parser must accept either.
const allocator = testing.allocator;
const data =
"XOM,popup\n" ++ // no-space form (stock)
"EXXON MOBIL CORP\n" ++
"\tMultiple(4) \t\n" ++
"50\n" ++
"@ $129.66\n" ++
"\t\n" ++
"$154.92\n" ++
"-$0.37\n" ++
"\t\n" ++
"$7,746.00\n" ++
"-$18.50 (-0.24%)\n" ++
"\t\n" ++
"+$1,262.85\n" ++
"+19.48%\n" ++
"\t\n" ++
"$206.00\n" ++
"\t\n" ++
"GSLC , popup\n" ++ // with-space form (ETF)
"GOLDMAN ACTIVEBETA ETF\n" ++
"\tMultiple(3) \t\n" ++
"906\n" ++
"@ $129.97\n" ++
"\t\n" ++
"$140.90\n" ++
"+$0.31\n" ++
"\t\n" ++
"$127,655.40\n" ++
"+$280.86 (+0.22%)\n" ++
"\t\n" ++
"+$9,906.42\n" ++
"+8.41%\n" ++
"\t\n" ++
"$1,203.17\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 2), positions.len);
try testing.expectEqualStrings("XOM", positions[0].symbol);
try testing.expectEqualStrings("GSLC", positions[1].symbol);
}
test "parsePaste: trailing cash section emits a cash position" {
// After the positions table, WF pastes may include a
// "Cash, Cash Alternatives and Margin" section listing the
// account's cash balance. The parser captures that as a
// synthetic cash position; the downstream resolver stamps
// the account fields and `synthesizeLots` emits a
// `security_type::cash` lot.
const allocator = testing.allocator;
const data =
"XOM,popup\n" ++
"EXXON MOBIL CORP\n" ++
"\tMultiple(4) \t\n" ++
"50\n" ++
"@ $129.66\n" ++
"\t\n" ++
"$154.92\n" ++
"-$0.37\n" ++
"\t\n" ++
"$7,746.00\n" ++
"-$18.50 (-0.24%)\n" ++
"\t\n" ++
"+$1,262.85\n" ++
"+19.48%\n" ++
"\t\n" ++
"$206.00\n" ++
"\t\n" ++
"ETFs Total\n" ++
"$7,746.00\n" ++
"Cash, Cash Alternatives and Margin\n" ++
"Cash alternatives and margin table has been sorted ...\n" ++
"\t\n" ++
"Sample Roth IRA *1234\n" ++
"\t$14,216.88\n" ++
"\t\n" ++
"Cash Total\n" ++
"\t\n" ++
"$14,216.88 \n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 2), positions.len);
try testing.expectEqualStrings("XOM", positions[0].symbol);
try testing.expect(!positions[0].is_cash);
// Cash position
try testing.expectEqualStrings("", positions[1].symbol);
try testing.expect(positions[1].is_cash);
try testing.expectApproxEqAbs(@as(f64, 14216.88), positions[1].current_value.?, 0.01);
try testing.expect(positions[1].quantity == null);
try testing.expect(positions[1].cost_basis == null);
}
test "parsePaste: cash section absent is a no-op" {
// When the user only pastes the positions table (no cash
// section), parsePaste returns just the positions. Regression
// for the original 3522.txt-style paste shape.
const allocator = testing.allocator;
const data =
"GSLC , popup\n" ++
"GOLDMAN ACTIVEBETA ETF\n" ++
"\tMultiple(3) \t\n" ++
"906\n" ++
"@ $129.97\n" ++
"\t\n" ++
"$140.90\n" ++
"+$0.31\n" ++
"\t\n" ++
"$127,655.40\n" ++
"+$280.86 (+0.22%)\n" ++
"\t\n" ++
"+$9,906.42\n" ++
"+8.41%\n" ++
"\t\n" ++
"$1,203.17\n";
const positions = try parsePaste(allocator, data);
defer allocator.free(positions);
try testing.expectEqual(@as(usize, 1), positions.len);
try testing.expect(!positions[0].is_cash);
}
test "isPopupAnchor: accepts both compact and spaced forms" {
try testing.expect(isPopupAnchor("XOM,popup"));
try testing.expect(isPopupAnchor("XOM ,popup"));
try testing.expect(isPopupAnchor("XOM, popup"));
try testing.expect(isPopupAnchor("XOM , popup"));
try testing.expect(isPopupAnchor("BRK'B,popup"));
// Negative cases.
try testing.expect(!isPopupAnchor("XOM popup"));
try testing.expect(!isPopupAnchor("popup"));
try testing.expect(!isPopupAnchor(""));
}
test "popupSymbol: extracts symbol from compact form" {
try testing.expectEqualStrings("XOM", popupSymbol("XOM,popup").?);
try testing.expectEqualStrings("BRK'B", popupSymbol("BRK'B,popup").?);
try testing.expectEqualStrings("XOM", popupSymbol("XOM, popup").?);
// Empty symbol part → null.
try testing.expect(popupSymbol(",popup") == null);
}
// ── Resolver tests ───────────────────────────────────────────
/// Test helper: build an `AccountMap` from compile-time entries.
/// Mirrors the helper in `commands/import.zig`'s test block;
/// duplicated here so resolver tests don't depend on import's
/// test-only infrastructure.
fn testAccountMap(allocator: std.mem.Allocator, entries: []const analysis.AccountTaxEntry) !analysis.AccountMap {
var owned = try allocator.alloc(analysis.AccountTaxEntry, entries.len);
for (entries, 0..) |e, i| {
owned[i] = .{
.account = try allocator.dupe(u8, e.account),
.tax_type = e.tax_type,
.institution = if (e.institution) |s| try allocator.dupe(u8, s) else null,
.account_number = if (e.account_number) |s| try allocator.dupe(u8, s) else null,
};
}
return .{ .entries = owned, .allocator = allocator };
}
test "filenameMatchesAccount: trailing-digit anchor wins" {
// Strongest signal — WF account suffixes are unique within
// a household, so a digit-run match is unambiguous.
try testing.expect(filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null));
try testing.expect(filenameMatchesAccount("1234.txt", "Sample IRA *1234", null));
try testing.expect(filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null));
// Different digit suffix → no match.
try testing.expect(!filenameMatchesAccount("Sample_IRA_5678", "Sample IRA *1234", null));
try testing.expect(!filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null));
}
test "filenameMatchesAccount: account_number anchor when name lacks digits" {
// User stored the digits in `account_number::` but didn't
// bother to put them in the human-readable account name.
// The number itself can anchor the filename match.
try testing.expect(filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234"));
try testing.expect(filenameMatchesAccount("smpl_1234", "Sample Roth IRA", "1234"));
// Wrong digits → no match.
try testing.expect(!filenameMatchesAccount("9999.txt", "Sample Roth IRA", "1234"));
// No account_number and no digits in name → no match
// (alphaRunsContained doesn't help against a digit-only file).
try testing.expect(!filenameMatchesAccount("1234.txt", "Sample Roth IRA", null));
}
test "filenameMatchesAccount: name digits take precedence over account_number" {
// Both signals available; either one matching is enough.
// (Tests the OR semantics — name digits win first because
// they're checked first; we also verify account_number-only
// matches when name digits don't appear.)
try testing.expect(filenameMatchesAccount("Sample_1234", "Sample *1234", "9999"));
try testing.expect(filenameMatchesAccount("Sample_9999", "Sample *1234", "9999"));
try testing.expect(!filenameMatchesAccount("Sample_5555", "Sample *1234", "9999"));
}
test "filenameMatchesAccount: alpha-only fallback when account has no digit suffix" {
// No trailing digits to anchor on — falls through to the
// alpha-runs-contained check.
try testing.expect(filenameMatchesAccount("emils_brokerage", "Emils Brokerage", null));
// Out-of-order tokens don't match: alphaRunsContained
// requires every account-name run to appear in order in
// the filename.
try testing.expect(!filenameMatchesAccount("Brokerage_Emils", "Emils Brokerage", null));
// Partial overlap also doesn't match — every run must be
// present.
try testing.expect(!filenameMatchesAccount("emils_only", "Emils Brokerage", null));
}
test "filenameMatchesAccount: case-insensitive fallback" {
try testing.expect(filenameMatchesAccount("EMILS_brokerage", "Emils Brokerage", null));
try testing.expect(filenameMatchesAccount("emils_BROKERAGE", "Emils Brokerage", null));
}
test "alphaRunsContained: every alphanumeric run from account appears in order" {
try testing.expect(alphaRunsContained("emils_brokerage", "Emils Brokerage"));
try testing.expect(alphaRunsContained("--emils-brokerage--", "Emils Brokerage"));
try testing.expect(!alphaRunsContained("brokerage_emils", "Emils Brokerage")); // order matters
try testing.expect(!alphaRunsContained("emils_only", "Emils Brokerage")); // missing run
// Empty account name has no runs → trivially true.
try testing.expect(alphaRunsContained("anything", ""));
}
test "resolveAccount: explicit override matches a WF entry" {
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
.{ .account = "Sample Brokerage *5678", .tax_type = .taxable, .institution = "wells_fargo", .account_number = "5678" },
});
defer account_map.deinit();
const r = try resolveAccount(testing.io, account_map, "anything.txt", "Sample IRA *1234");
try testing.expectEqualStrings("1234", r.account_number);
try testing.expectEqualStrings("Sample IRA *1234", r.account_name);
}
test "resolveAccount: explicit override that doesn't match → UnknownAccount" {
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
});
defer account_map.deinit();
try testing.expectError(error.UnknownAccount, resolveAccount(testing.io, account_map, "anything.txt", "Wrong Account"));
}
test "resolveAccount: filename inference picks the right entry from multiple WF accounts" {
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
.{ .account = "Sample Brokerage *5678", .tax_type = .taxable, .institution = "wells_fargo", .account_number = "5678" },
});
defer account_map.deinit();
const r = try resolveAccount(testing.io, account_map, "/path/to/Sample_IRA_1234.txt", null);
try testing.expectEqualStrings("1234", r.account_number);
}
test "resolveAccount: single-WF-entry fallback when filename has no signal" {
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
// Non-WF entry shouldn't interfere.
.{ .account = "Sample Fid", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" },
});
defer account_map.deinit();
const r = try resolveAccount(testing.io, account_map, "unrelated_filename.txt", null);
try testing.expectEqualStrings("1234", r.account_number);
}
test "resolveAccount: ambiguous when 2+ WF entries and no signal" {
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
.{ .account = "Sample Brokerage *5678", .tax_type = .taxable, .institution = "wells_fargo", .account_number = "5678" },
});
defer account_map.deinit();
try testing.expectError(error.AmbiguousWellsFargoAccount, resolveAccount(testing.io, account_map, "unrelated_filename.txt", null));
}
test "resolveAccount: zero WF entries → AmbiguousWellsFargoAccount with helpful message" {
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample Fid", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" },
});
defer account_map.deinit();
try testing.expectError(error.AmbiguousWellsFargoAccount, resolveAccount(testing.io, account_map, "anything.txt", null));
}
test "resolveAccount: WF entry without account_number → UnknownAccount" {
// Pins the requirement that WF entries in accounts.srf MUST
// carry an `account_number::` field — the downstream
// `findByInstitutionAccount` lookup keys on it. Without
// this guard the import would silently produce
// "unmapped account" errors at synthesizeLots time with
// no useful hint about why.
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample IRA", .tax_type = .roth, .institution = "wells_fargo", .account_number = null },
});
defer account_map.deinit();
try testing.expectError(error.UnknownAccount, resolveAccount(testing.io, account_map, "Sample_IRA.txt", null));
}
test "applyAccountToPositions: patches every position's account fields" {
const allocator = testing.allocator;
var account_map = try testAccountMap(allocator, &.{
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
});
defer account_map.deinit();
var positions = [_]BrokeragePosition{
.{ .account_number = "", .account_name = "", .symbol = "VTI", .description = "", .quantity = 10, .current_value = 1000, .cost_basis = 800, .is_cash = false },
.{ .account_number = "", .account_name = "", .symbol = "AAPL", .description = "", .quantity = 5, .current_value = 1000, .cost_basis = 750, .is_cash = false },
};
try applyAccountToPositions(testing.io, account_map, "Sample_IRA_1234.txt", null, &positions);
try testing.expectEqualStrings("1234", positions[0].account_number);
try testing.expectEqualStrings("Sample IRA *1234", positions[0].account_name);
try testing.expectEqualStrings("1234", positions[1].account_number);
try testing.expectEqualStrings("Sample IRA *1234", positions[1].account_name);
}