1211 lines
50 KiB
Zig
1211 lines
50 KiB
Zig
//! Wells Fargo paste parser.
|
||
//!
|
||
//! Wells Fargo's brokerage portal doesn't offer a clean CSV export
|
||
//! for positions, so the user copies the rendered HTML table and
|
||
//! pastes the result into a file. The paste is a tab-separated
|
||
//! multi-line layout, one record per holding plus a (sometimes
|
||
//! present) totals block at the end.
|
||
//!
|
||
//! ## Format
|
||
//!
|
||
//! Header preamble (optional — present when the user's paste
|
||
//! includes the column headers, absent when they paste only the
|
||
//! rows). When present, it spans the first ~12 lines and starts
|
||
//! with `Symbol/Description`. The parser scans for the first
|
||
//! `<SYMBOL> , popup` line as the data anchor and ignores
|
||
//! everything before it.
|
||
//!
|
||
//! Each position record looks like:
|
||
//!
|
||
//! ```
|
||
//! GSLC , popup ← record anchor: <SYMBOL> , popup
|
||
//! GOLDMAN ACTIVEBETA ETF ← description
|
||
//! Multiple(3) or MM/DD/YYYY ← lot count or single-buy date
|
||
//! 906 ← shares (may have commas)
|
||
//! @ $129.97 ← average cost basis per share
|
||
//! <blank-tab>
|
||
//! $140.90 ← last price
|
||
//! +$0.31 ← day change ($)
|
||
//! <blank-tab>
|
||
//! $127,655.40 ← market value
|
||
//! +$280.86 (+0.22%) ← day change ($, %)
|
||
//! <blank-tab>
|
||
//! +$9,906.42 ← unrealized gain/loss ($)
|
||
//! +8.41% ← unrealized gain/loss (%)
|
||
//! <blank-tab>
|
||
//! $1,203.17 ← est. annual income
|
||
//! <blank-tab> ← record separator
|
||
//! ```
|
||
//!
|
||
//! Footer (optional — sometimes a totals block appears, sometimes
|
||
//! the paste ends after the last record's est-annual-income).
|
||
//! The parser stops on a line that begins with a known total
|
||
//! sentinel ("ETFs Total", "Total", etc.) OR on EOF.
|
||
//!
|
||
//! ## Limitations
|
||
//!
|
||
//! 1. Format is layout-fragile — if WF changes the table structure,
|
||
//! this parser breaks. We re-anchor on `<SYMBOL> , popup` per
|
||
//! record, which gives some robustness against extra blank
|
||
//! lines or stray whitespace, but column reordering would
|
||
//! require updating the field offsets below.
|
||
//!
|
||
//! 2. WF pastes don't carry an account identifier; the import
|
||
//! command resolves the account separately (filename inference
|
||
//! + `--account` override + accounts.srf lookup).
|
||
//!
|
||
//! 3. Cost basis is computed from `shares × avg_cost`. WF doesn't
|
||
//! print "total cost basis" alongside; the multiplication is
|
||
//! a re-derivation from the per-share avg.
|
||
//!
|
||
//! 4. No cash classification. Wells Fargo positions of cash /
|
||
//! money-market funds may need the standard `isMoneyMarketSymbol`
|
||
//! fallback; the format itself doesn't tag cash distinctly.
|
||
|
||
const std = @import("std");
|
||
const builtin = @import("builtin");
|
||
const portfolio_mod = @import("../models/portfolio.zig");
|
||
const types = @import("types.zig");
|
||
const analysis = @import("../analytics/analysis.zig");
|
||
|
||
const BrokeragePosition = types.BrokeragePosition;
|
||
const parseDollarAmount = types.parseDollarAmount;
|
||
|
||
/// Institution name used for `accounts.srf` lookups
|
||
/// (`institution::wells_fargo`). Held as a constant so the parser
|
||
/// and the resolver don't drift on the spelling.
|
||
pub const institution = "wells_fargo";
|
||
|
||
/// Parse a Wells Fargo paste into BrokeragePosition slices.
|
||
///
|
||
/// All string fields in the returned positions are slices into
|
||
/// `data` (caller must keep `data` alive). The returned slice
|
||
/// itself is heap-allocated against `allocator`.
|
||
///
|
||
/// `account_number` and `account_name` are left as empty strings
|
||
/// — WF pastes don't carry account identity. The import command
|
||
/// fills these in from filename inference / accounts.srf lookup.
|
||
pub fn parsePaste(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePosition {
|
||
var positions = std.ArrayList(BrokeragePosition).empty;
|
||
errdefer positions.deinit(allocator);
|
||
|
||
var lines = std.mem.splitScalar(u8, data, '\n');
|
||
// Buffer up to N lookahead lines so the parser can slide a
|
||
// window over the per-record layout without juggling iterator
|
||
// state. Per-record this is at most ~16 lines.
|
||
var staged: std.ArrayList([]const u8) = .empty;
|
||
defer staged.deinit(allocator);
|
||
while (lines.next()) |raw| {
|
||
const trimmed = std.mem.trim(u8, raw, &.{ ' ', '\t', '\r' });
|
||
try staged.append(allocator, trimmed);
|
||
}
|
||
|
||
// Locate each record by scanning for a popup anchor. WF
|
||
// emits a `, popup` (or `,popup`) suffix as part of the
|
||
// symbol column's hover affordance; it's a very stable
|
||
// record anchor.
|
||
//
|
||
// We do NOT stop at intermediate totals lines (`Stocks Total`,
|
||
// `ETFs Total`). The WF holdings page splits positions into
|
||
// multiple sections (Stocks, ETFs, Bonds, …), each terminated
|
||
// by its own totals line; the second/third section's records
|
||
// appear AFTER an intermediate totals line and we want to
|
||
// capture them. The only structural boundary between
|
||
// positions and the (optional) cash table is the
|
||
// `Cash, Cash Alternatives and Margin` header, which we
|
||
// detect explicitly below.
|
||
const cash_header = "Cash, Cash Alternatives and Margin";
|
||
var i: usize = 0;
|
||
while (i < staged.items.len) : (i += 1) {
|
||
const line = staged.items[i];
|
||
if (std.mem.eql(u8, line, cash_header)) break;
|
||
if (!isPopupAnchor(line)) continue;
|
||
|
||
const symbol = popupSymbol(line) orelse continue;
|
||
|
||
// Walk forward collecting the rest of the fields. Each
|
||
// field-find returns the index it consumed up to, or
|
||
// null if the record is truncated. We tolerate stray
|
||
// blank lines between fields because WF pastes
|
||
// sometimes carry extra whitespace.
|
||
var cur = i + 1;
|
||
|
||
// Description: first non-empty line after the anchor.
|
||
const description = nextNonEmpty(staged.items, &cur) orelse break;
|
||
|
||
// Trade-date column: either `Multiple(N)` or `MM/DD/YYYY`.
|
||
// We don't use the value but consuming it advances `cur`
|
||
// to the shares line. Single-date format pastes have an
|
||
// extra blank line between the description and the date,
|
||
// so step past blanks.
|
||
_ = nextNonEmpty(staged.items, &cur) orelse break;
|
||
|
||
// Shares: integer with optional thousands commas, no $.
|
||
const shares_text = nextNonEmpty(staged.items, &cur) orelse break;
|
||
const shares = parseSharesAmount(shares_text) orelse continue;
|
||
|
||
// Avg cost: line starts with `@ $`.
|
||
const cost_text = nextNonEmpty(staged.items, &cur) orelse break;
|
||
if (!std.mem.startsWith(u8, cost_text, "@ ")) continue;
|
||
const avg_cost = parseDollarAmount(cost_text[2..]) orelse continue;
|
||
|
||
// Last price (skip), day-change-$ (skip), day-change-%
|
||
// (skip): three lines of price-detail we don't currently
|
||
// need. We could surface them later if a caller wants
|
||
// them, but for synthesis the avg cost + market value is
|
||
// enough.
|
||
_ = nextNonEmpty(staged.items, &cur) orelse break; // last price
|
||
_ = nextNonEmpty(staged.items, &cur) orelse break; // day change $
|
||
|
||
// Market value: dollar amount, no parens.
|
||
const mv_text = nextNonEmpty(staged.items, &cur) orelse break;
|
||
const market_value = parseDollarAmount(mv_text) orelse continue;
|
||
|
||
// The remaining lines (day-change-$/%, unreal G/L $/%,
|
||
// est annual income) are the rest of this record but
|
||
// we don't need them for synthesis. The next record's
|
||
// popup anchor is what we'll find on the outer loop's
|
||
// next iteration.
|
||
i = cur; // resume scan past the consumed market-value line
|
||
|
||
try positions.append(allocator, .{
|
||
.account_number = "",
|
||
.account_name = "",
|
||
.symbol = symbol,
|
||
.description = description,
|
||
.quantity = shares,
|
||
.current_value = market_value,
|
||
.cost_basis = shares * avg_cost,
|
||
.is_cash = portfolio_mod.isMoneyMarketSymbol(symbol),
|
||
});
|
||
}
|
||
|
||
// ── Cash section (optional) ─────────────────────────────
|
||
//
|
||
// The user may include the WF "Cash, Cash Alternatives and
|
||
// Margin" table at the end of the paste. The shape is:
|
||
//
|
||
// Cash, Cash Alternatives and Margin
|
||
// Cash alternatives and margin table has been sorted ...
|
||
// <blank>
|
||
// <Account name> ← e.g. "Sample IRA *1234"
|
||
// $14,216.88 ← per-account cash balance
|
||
// <blank>
|
||
// Cash Total
|
||
// $14,216.88 ← grand total (skip)
|
||
//
|
||
// We capture the per-account balance as a synthetic cash
|
||
// position with empty `symbol`. The downstream
|
||
// `applyAccountToPositions` stamps the account fields, and
|
||
// `synthesizeLots` emits a `security_type::cash` lot.
|
||
//
|
||
// Multiple per-account rows are tolerated (one cash position
|
||
// per row); in practice the WF paste flow is single-account so
|
||
// there's typically one. Stops at "Cash Total".
|
||
{
|
||
// After the positions loop, `i` is either at the cash
|
||
// header (if the user included the cash table) or at the
|
||
// end of the staged array (no cash table). Walk forward
|
||
// looking for the header in case it appears after extra
|
||
// intermediate lines, then parse account/amount pairs.
|
||
var j: usize = i;
|
||
while (j < staged.items.len and !std.mem.eql(u8, staged.items[j], cash_header)) : (j += 1) {}
|
||
if (j < staged.items.len) {
|
||
// Skip the header + the explanatory subtitle line.
|
||
j += 1; // past "Cash, Cash Alternatives and Margin"
|
||
if (j < staged.items.len and std.mem.startsWith(u8, staged.items[j], "Cash alternatives")) {
|
||
j += 1;
|
||
}
|
||
// Walk rows. Each row is an account name (anything
|
||
// non-blank, non-Total) followed by a `$AMOUNT` line.
|
||
// Stop at "Cash Total" (the grand-total marker).
|
||
while (j < staged.items.len) : (j += 1) {
|
||
const row = staged.items[j];
|
||
if (row.len == 0) continue;
|
||
if (std.mem.eql(u8, row, "Cash Total")) break;
|
||
// Account-name row. Look ahead for the dollar amount.
|
||
var k: usize = j + 1;
|
||
while (k < staged.items.len and staged.items[k].len == 0) : (k += 1) {}
|
||
if (k >= staged.items.len) break;
|
||
const amount_text = staged.items[k];
|
||
// The dollar amount must start with `$` to count as
|
||
// a cash balance. Any other shape (e.g. "Cash Total"
|
||
// appearing here would mean a malformed paste) → skip.
|
||
if (amount_text.len == 0 or amount_text[0] != '$') continue;
|
||
const cash_amount = parseDollarAmount(amount_text) orelse continue;
|
||
try positions.append(allocator, .{
|
||
.account_number = "",
|
||
.account_name = "",
|
||
.symbol = "",
|
||
.description = "Cash",
|
||
.quantity = null,
|
||
.current_value = cash_amount,
|
||
.cost_basis = null,
|
||
.is_cash = true,
|
||
});
|
||
j = k; // resume past the consumed amount line
|
||
}
|
||
}
|
||
}
|
||
|
||
return positions.toOwnedSlice(allocator);
|
||
}
|
||
|
||
/// True when `line` is a record-start anchor like `GSLC , popup`
|
||
/// or `XOM,popup`. The trailing `popup` is the stable signal — WF's
|
||
/// hover affordance — and the comma immediately precedes it (with
|
||
/// optional whitespace either side, which varies between paste
|
||
/// shapes for stocks vs ETFs).
|
||
fn isPopupAnchor(line: []const u8) bool {
|
||
if (!std.mem.endsWith(u8, line, "popup")) return false;
|
||
// The portion before `popup` must end with a comma (with
|
||
// optional whitespace). This rejects e.g. "popup" alone or
|
||
// "something popup" without a separator.
|
||
const before = line[0 .. line.len - "popup".len];
|
||
const trimmed = std.mem.trimEnd(u8, before, &.{ ' ', '\t' });
|
||
return std.mem.endsWith(u8, trimmed, ",");
|
||
}
|
||
|
||
/// Extract the symbol from a popup anchor line. Returns null if
|
||
/// the line is the right shape but the symbol part is empty.
|
||
/// Accepts both `SYMBOL,popup` and `SYMBOL , popup` shapes.
|
||
fn popupSymbol(line: []const u8) ?[]const u8 {
|
||
if (!isPopupAnchor(line)) return null;
|
||
// Strip trailing `popup`, surrounding whitespace, and the
|
||
// separator comma to get the symbol token.
|
||
var end = line.len - "popup".len;
|
||
while (end > 0 and (line[end - 1] == ' ' or line[end - 1] == '\t')) end -= 1;
|
||
if (end == 0 or line[end - 1] != ',') return null;
|
||
end -= 1; // drop the comma
|
||
const symbol = std.mem.trim(u8, line[0..end], &.{ ' ', '\t' });
|
||
if (symbol.len == 0) return null;
|
||
return symbol;
|
||
}
|
||
|
||
/// True when `line` looks like a footer-totals sentinel. WF's
|
||
/// paste sometimes ends with one or two `ETFs Total` blocks; we
|
||
/// also generously accept any line ending with " Total" so
|
||
/// "Stocks Total", "Bonds Total", etc. don't slip through if a
|
||
/// future paste includes them.
|
||
fn isTotalLine(line: []const u8) bool {
|
||
if (std.mem.eql(u8, line, "Total")) return true;
|
||
return std.mem.endsWith(u8, line, " Total");
|
||
}
|
||
|
||
/// Advance `cur_idx` past blank lines in `lines`, then return
|
||
/// (and consume) the first non-blank line. Returns null if no
|
||
/// non-blank line remains.
|
||
fn nextNonEmpty(lines: []const []const u8, cur_idx: *usize) ?[]const u8 {
|
||
while (cur_idx.* < lines.len) {
|
||
const line = lines[cur_idx.*];
|
||
cur_idx.* += 1;
|
||
if (line.len == 0) continue;
|
||
if (isTotalLine(line)) return null;
|
||
return line;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/// Parse a shares value like "906" or "1,020" — integers with
|
||
/// optional thousands commas, no $ prefix. Returns null on any
|
||
/// other shape (which lets the parent loop skip the record
|
||
/// without aborting the whole paste).
|
||
fn parseSharesAmount(raw: []const u8) ?f64 {
|
||
// Reuse parseDollarAmount: it strips $/+/-/comma and parses
|
||
// the rest as a float. WF shares lines have no $ but the
|
||
// function is happy without one.
|
||
return parseDollarAmount(raw);
|
||
}
|
||
|
||
// ── Account resolution ───────────────────────────────────────
|
||
//
|
||
// Wells Fargo pastes carry no in-band account identifier (no
|
||
// header, no per-row column, no embedded account number — see
|
||
// the module doc-block). So after parsing we have to resolve the
|
||
// account name from outside the paste: `accounts.srf` plus any
|
||
// hints from the file path or an explicit `--account` override.
|
||
//
|
||
// This block is the WF-specific piece of import. Fidelity and
|
||
// Schwab don't need it because their exports stamp the account
|
||
// number per-row (Fidelity) or in the title line (Schwab).
|
||
|
||
/// Resolution result: borrowed slices into the matching
|
||
/// `accounts.srf` entry. Both fields live as long as the
|
||
/// `AccountMap` does.
|
||
pub const Resolved = struct {
|
||
account_number: []const u8,
|
||
account_name: []const u8,
|
||
};
|
||
|
||
/// Determine which `accounts.srf` entry a Wells Fargo paste
|
||
/// belongs to. Resolution order:
|
||
///
|
||
/// 1. **Explicit `--account NAME`.** Match against
|
||
/// `account::` exactly. If the override doesn't match any
|
||
/// `institution::wells_fargo` entry, error out.
|
||
/// 2. **Filename inference.** Take the basename of the source
|
||
/// path (without extension) and try to match it against the
|
||
/// `account::` field of each WF entry, allowing for
|
||
/// case-insensitive substring overlap on the trailing
|
||
/// `*NNNN` digits. Filename "Sample_IRA_1234.txt" matches
|
||
/// "Sample IRA *1234".
|
||
/// 3. **Single-WF-entry fallback.** If accounts.srf has exactly
|
||
/// one `institution::wells_fargo` entry, use it. Helpful for
|
||
/// users with one WF account; harmless when there are many
|
||
/// (the lookup just falls through to the error).
|
||
///
|
||
/// On no-match, prints a stderr listing of the available WF
|
||
/// entries so the user can pick one with `--account`.
|
||
///
|
||
/// Errors:
|
||
/// - `error.UnknownAccount`: `--account NAME` didn't match a WF
|
||
/// entry, or a matching entry has no `account_number::` field
|
||
/// (which the downstream lookup keys on).
|
||
/// - `error.AmbiguousWellsFargoAccount`: zero or 2+ WF entries in
|
||
/// accounts.srf and no other signal to disambiguate.
|
||
pub fn resolveAccount(
|
||
io: std.Io,
|
||
account_map: analysis.AccountMap,
|
||
source_path: []const u8,
|
||
explicit: ?[]const u8,
|
||
) !Resolved {
|
||
// 1. Explicit override.
|
||
if (explicit) |name| {
|
||
for (account_map.entries) |e| {
|
||
const inst = e.institution orelse continue;
|
||
if (!std.mem.eql(u8, inst, institution)) continue;
|
||
if (std.mem.eql(u8, e.account, name)) {
|
||
return resolutionFor(io, e);
|
||
}
|
||
}
|
||
if (!builtin.is_test) {
|
||
var stderr_buf: [4096]u8 = undefined;
|
||
var sw = std.Io.File.stderr().writer(io, &stderr_buf);
|
||
try sw.interface.print(
|
||
"Error: --account '{s}' did not match any `institution::wells_fargo` entry in accounts.srf.\n",
|
||
.{name},
|
||
);
|
||
try printEntries(&sw.interface, account_map);
|
||
try sw.interface.flush();
|
||
}
|
||
return error.UnknownAccount;
|
||
}
|
||
|
||
// 2. Filename inference. Take everything after the last
|
||
// `/` and before the extension. Stdin's `-` returns no
|
||
// useful base; the inference simply fails and we fall
|
||
// through to step 3.
|
||
const inferred: ?analysis.AccountTaxEntry = blk: {
|
||
if (std.mem.eql(u8, source_path, "-")) break :blk null;
|
||
const base_with_ext = std.fs.path.basename(source_path);
|
||
const dot_idx = std.mem.lastIndexOfScalar(u8, base_with_ext, '.');
|
||
const base = if (dot_idx) |i| base_with_ext[0..i] else base_with_ext;
|
||
|
||
var match: ?analysis.AccountTaxEntry = null;
|
||
for (account_map.entries) |e| {
|
||
const inst = e.institution orelse continue;
|
||
if (!std.mem.eql(u8, inst, institution)) continue;
|
||
if (filenameMatchesAccount(base, e.account, e.account_number)) {
|
||
if (match != null) {
|
||
// More than one WF entry matched the
|
||
// filename — punt to the user.
|
||
break :blk null;
|
||
}
|
||
match = e;
|
||
}
|
||
}
|
||
break :blk match;
|
||
};
|
||
if (inferred) |e| return resolutionFor(io, e);
|
||
|
||
// 3. Single-WF-entry fallback.
|
||
var single: ?analysis.AccountTaxEntry = null;
|
||
var wf_count: usize = 0;
|
||
for (account_map.entries) |e| {
|
||
const inst = e.institution orelse continue;
|
||
if (!std.mem.eql(u8, inst, institution)) continue;
|
||
wf_count += 1;
|
||
single = e;
|
||
}
|
||
if (wf_count == 1) return resolutionFor(io, single.?);
|
||
|
||
// Couldn't pick. Print enumerated guidance.
|
||
if (!builtin.is_test) {
|
||
var stderr_buf: [4096]u8 = undefined;
|
||
var sw = std.Io.File.stderr().writer(io, &stderr_buf);
|
||
if (wf_count == 0) {
|
||
try sw.interface.print(
|
||
"Error: no `institution::wells_fargo` entries found in accounts.srf.\n" ++
|
||
" Add one (e.g. `account::Sample IRA *1234,tax_type::roth,institution::wells_fargo,account_number::1234`)\n" ++
|
||
" and rerun the import.\n",
|
||
.{},
|
||
);
|
||
} else {
|
||
try sw.interface.print(
|
||
"Error: {d} Wells Fargo accounts in accounts.srf; cannot pick one automatically.\n" ++
|
||
" Pass --account NAME to disambiguate. Candidates:\n",
|
||
.{wf_count},
|
||
);
|
||
try printEntries(&sw.interface, account_map);
|
||
}
|
||
try sw.interface.flush();
|
||
}
|
||
return error.AmbiguousWellsFargoAccount;
|
||
}
|
||
|
||
/// Convenience: resolve the account once and then patch every
|
||
/// position's `account_number` / `account_name` fields with the
|
||
/// resolved values. Used by `import` after `parsePaste`. The
|
||
/// patched slices borrow from `account_map`, which the caller
|
||
/// must keep alive for the lifetime of the positions.
|
||
pub fn applyAccountToPositions(
|
||
io: std.Io,
|
||
account_map: analysis.AccountMap,
|
||
source_path: []const u8,
|
||
explicit: ?[]const u8,
|
||
positions: []BrokeragePosition,
|
||
) !void {
|
||
const resolved = try resolveAccount(io, account_map, source_path, explicit);
|
||
var idx: usize = 0;
|
||
while (idx < positions.len) : (idx += 1) {
|
||
positions[idx].account_number = resolved.account_number;
|
||
positions[idx].account_name = resolved.account_name;
|
||
}
|
||
}
|
||
|
||
/// Build a `Resolved` from an `accounts.srf` entry. Errors when
|
||
/// the entry has no `account_number::` field, because the
|
||
/// downstream `findByInstitutionAccount` lookup keys on it.
|
||
fn resolutionFor(io: std.Io, entry: analysis.AccountTaxEntry) !Resolved {
|
||
const num = entry.account_number orelse {
|
||
if (!builtin.is_test) {
|
||
var stderr_buf: [512]u8 = undefined;
|
||
var sw = std.Io.File.stderr().writer(io, &stderr_buf);
|
||
try sw.interface.print(
|
||
"Error: WF account '{s}' has no `account_number::` field in accounts.srf.\n" ++
|
||
" Add one (the trailing digits after `*` work well, e.g. `account_number::1234`).\n",
|
||
.{entry.account},
|
||
);
|
||
try sw.interface.flush();
|
||
}
|
||
return error.UnknownAccount;
|
||
};
|
||
return .{ .account_number = num, .account_name = entry.account };
|
||
}
|
||
|
||
/// True when the source file's basename (without extension)
|
||
/// looks like it refers to `account_name`. Implemented as a
|
||
/// case-insensitive substring overlap on the trailing-digits
|
||
/// tail of the account name (after `*` or end-of-string), with
|
||
/// underscores and spaces treated as equivalent.
|
||
///
|
||
/// `account_number` (when non-null) is also tried as an
|
||
/// alternate anchor: a filename containing `accounts.srf`'s
|
||
/// `account_number::` value matches even when the account name
|
||
/// itself has no trailing digit run (e.g. user named the file
|
||
/// `1234.txt` and recorded `account::Sample Roth IRA,
|
||
/// account_number::1234` without putting `*1234` in the name).
|
||
/// This is the more user-friendly path; without it, the user
|
||
/// would have to keep the digit suffix in two places.
|
||
///
|
||
/// Examples:
|
||
/// filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null) → true
|
||
/// filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null) → true (digits match)
|
||
/// filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null) → false
|
||
/// filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234") → true (account_number anchor)
|
||
fn filenameMatchesAccount(filename: []const u8, account_name: []const u8, account_number: ?[]const u8) bool {
|
||
// Extract the trailing digit run from the account name.
|
||
// "Sample IRA *1234" → "1234".
|
||
var digits_start: usize = account_name.len;
|
||
while (digits_start > 0) {
|
||
const c = account_name[digits_start - 1];
|
||
if (c < '0' or c > '9') break;
|
||
digits_start -= 1;
|
||
}
|
||
const digits = account_name[digits_start..];
|
||
|
||
// If the account name ends in digits, the filename must
|
||
// contain that exact digit run somewhere. This is the
|
||
// strongest signal — WF account suffixes are unique within
|
||
// a household.
|
||
if (digits.len > 0 and std.mem.indexOf(u8, filename, digits) != null) return true;
|
||
|
||
// Try the `account_number::` field as an alternate anchor.
|
||
// Useful when the user didn't bother to put the digits in
|
||
// the human-readable account name. We only treat the
|
||
// account_number as an anchor when it's all digits (e.g.
|
||
// "1234"); alphanumeric account numbers like Schwab's
|
||
// "Z123" prefixed format wouldn't be a useful filename hint
|
||
// for a WF import anyway, but tolerating them here as a
|
||
// substring match is harmless. So: if the number is all
|
||
// digits, do an exact substring; if it's mixed, also try a
|
||
// substring
|
||
// (case-insensitive) which is the broader fuzzy fallback.
|
||
if (account_number) |num| {
|
||
if (num.len > 0 and std.mem.indexOf(u8, filename, num) != null) return true;
|
||
}
|
||
|
||
// No digit suffix to compare; fall back to a fuzzy
|
||
// letters-only overlap. Lowercase both sides; compare
|
||
// alphanumeric runs only. If every alphanumeric run of the
|
||
// account name appears in the filename in order, it's a
|
||
// match.
|
||
return alphaRunsContained(filename, account_name);
|
||
}
|
||
|
||
/// True when every maximal alphanumeric run in `account_name`
|
||
/// appears (case-insensitive, in order) somewhere inside
|
||
/// `filename`. Used as a fallback in `filenameMatchesAccount`
|
||
/// when the account has no digit suffix to anchor on.
|
||
fn alphaRunsContained(filename: []const u8, account_name: []const u8) bool {
|
||
var f_lower_buf: [256]u8 = undefined;
|
||
if (filename.len > f_lower_buf.len) return false;
|
||
for (filename, 0..) |c, i| f_lower_buf[i] = std.ascii.toLower(c);
|
||
const f_lower = f_lower_buf[0..filename.len];
|
||
|
||
var i: usize = 0;
|
||
var search_from: usize = 0;
|
||
while (i < account_name.len) {
|
||
// Skip non-alphanum.
|
||
while (i < account_name.len and !std.ascii.isAlphanumeric(account_name[i])) : (i += 1) {}
|
||
const start = i;
|
||
while (i < account_name.len and std.ascii.isAlphanumeric(account_name[i])) : (i += 1) {}
|
||
if (start == i) break;
|
||
const acct_run = account_name[start..i];
|
||
if (acct_run.len == 0) continue;
|
||
|
||
// Lowercase the run and find it in f_lower starting at
|
||
// search_from.
|
||
var run_lower_buf: [128]u8 = undefined;
|
||
if (acct_run.len > run_lower_buf.len) return false;
|
||
for (acct_run, 0..) |c, k| run_lower_buf[k] = std.ascii.toLower(c);
|
||
const run_lower = run_lower_buf[0..acct_run.len];
|
||
|
||
const found = std.mem.indexOfPos(u8, f_lower, search_from, run_lower) orelse return false;
|
||
search_from = found + acct_run.len;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
/// Helper: print every `institution::wells_fargo` entry from
|
||
/// the account map onto the given writer, one per line, indented.
|
||
fn printEntries(w: *std.Io.Writer, account_map: analysis.AccountMap) !void {
|
||
for (account_map.entries) |e| {
|
||
const inst = e.institution orelse continue;
|
||
if (!std.mem.eql(u8, inst, institution)) continue;
|
||
try w.print(" - {s}\n", .{e.account});
|
||
}
|
||
}
|
||
|
||
// ── Tests ────────────────────────────────────────────────────
|
||
|
||
const testing = std.testing;
|
||
|
||
test "isPopupAnchor: recognizes WF record anchors" {
|
||
try testing.expect(isPopupAnchor("GSLC , popup"));
|
||
try testing.expect(isPopupAnchor("VTV , popup"));
|
||
try testing.expect(!isPopupAnchor("GOLDMAN ACTIVEBETA ETF"));
|
||
try testing.expect(!isPopupAnchor("ETFs Total"));
|
||
try testing.expect(!isPopupAnchor(""));
|
||
}
|
||
|
||
test "popupSymbol: extracts symbol token before ', popup'" {
|
||
try testing.expectEqualStrings("GSLC", popupSymbol("GSLC , popup").?);
|
||
try testing.expectEqualStrings("VO", popupSymbol("VO , popup").?);
|
||
// Empty symbol part → null.
|
||
try testing.expect(popupSymbol(", popup") == null);
|
||
// Wrong shape → null.
|
||
try testing.expect(popupSymbol("GSLC popup") == null);
|
||
}
|
||
|
||
test "isTotalLine: matches WF footer sentinels" {
|
||
try testing.expect(isTotalLine("ETFs Total"));
|
||
try testing.expect(isTotalLine("Stocks Total"));
|
||
try testing.expect(isTotalLine("Total"));
|
||
try testing.expect(!isTotalLine("Subtotal"));
|
||
try testing.expect(!isTotalLine("GSLC , popup"));
|
||
}
|
||
|
||
test "parseSharesAmount: accepts integers with thousands commas" {
|
||
try testing.expectApproxEqAbs(@as(f64, 906), parseSharesAmount("906").?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 1020), parseSharesAmount("1,020").?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 2597), parseSharesAmount("2,597").?, 0.01);
|
||
}
|
||
|
||
test "parsePaste: header preamble plus three records" {
|
||
const allocator = testing.allocator;
|
||
// Mirrors the wf.txt structure — header preamble, then a
|
||
// few records, then the totals footer. Tabs and blank
|
||
// lines are intentional; the trim+nextNonEmpty pipeline
|
||
// should handle them.
|
||
const data =
|
||
"Symbol/Description,click to sort \tTrade Date,click to sort \tShares\n" ++
|
||
"@ Cost\n" ++
|
||
",click to sort \tLast Price/\n" ++
|
||
"Change\n" ++
|
||
",click to sort \tMarket Value/\n" ++
|
||
"Today's Change\n" ++
|
||
"\tUnreal.\n" ++
|
||
"Gain/Loss\n" ++
|
||
",click to sort \tEstimated\n" ++
|
||
"Annual Income\n" ++
|
||
",click to sort\n" ++
|
||
"\t\n" ++
|
||
"GSLC , popup\n" ++
|
||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||
"\tMultiple(3) \t\n" ++
|
||
"906\n" ++
|
||
"@ $129.97\n" ++
|
||
"\t\n" ++
|
||
"$140.90\n" ++
|
||
"+$0.31\n" ++
|
||
"\t\n" ++
|
||
"$127,655.40\n" ++
|
||
"+$280.86 (+0.22%)\n" ++
|
||
"\t\n" ++
|
||
"+$9,906.42\n" ++
|
||
"+8.41%\n" ++
|
||
"\t\n" ++
|
||
"$1,203.17\n" ++
|
||
"\t\n" ++
|
||
"VO , popup\n" ++
|
||
"VANGUARD MID CAP ETF\n" ++
|
||
"\tMultiple(2) \t\n" ++
|
||
"1,020\n" ++
|
||
"@ $74.30\n" ++
|
||
"\t\n" ++
|
||
"$77.41\n" ++
|
||
"+$0.35\n" ++
|
||
"\t\n" ++
|
||
"$78,958.20\n" ++
|
||
"+$357.00 (+0.45%)\n" ++
|
||
"\t\n" ++
|
||
"+$3,174.66\n" ++
|
||
"+4.19%\n" ++
|
||
"\t\n" ++
|
||
"$1,104.66\n" ++
|
||
"\t\n" ++
|
||
"EEM , popup\n" ++
|
||
"ISHARES MSCI EMRG MK ETF\n" ++
|
||
"\t\n" ++
|
||
"02/24/2026\n" ++
|
||
"\t\n" ++
|
||
"875\n" ++
|
||
"@ $62.71\n" ++
|
||
"\t\n" ++
|
||
"$66.03\n" ++
|
||
"+$0.57\n" ++
|
||
"\t\n" ++
|
||
"$57,776.25\n" ++
|
||
"+$498.75 (+0.87%)\n" ++
|
||
"\t\n" ++
|
||
"+$2,906.67\n" ++
|
||
"+5.30%\n" ++
|
||
"\t\n" ++
|
||
"$1,063.12\n" ++
|
||
"\t\n" ++
|
||
"ETFs Total\n" ++
|
||
"\t\t\t\t\n" ++
|
||
"$264,389.85\n";
|
||
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
|
||
try testing.expectEqual(@as(usize, 3), positions.len);
|
||
|
||
// GSLC: 906 shares × $129.97 avg = $117,752.82 cost basis;
|
||
// market value $127,655.40.
|
||
try testing.expectEqualStrings("GSLC", positions[0].symbol);
|
||
try testing.expectEqualStrings("GOLDMAN ACTIVEBETA ETF", positions[0].description);
|
||
try testing.expectApproxEqAbs(@as(f64, 906), positions[0].quantity.?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 117_752.82), positions[0].cost_basis.?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 127_655.40), positions[0].current_value.?, 0.01);
|
||
try testing.expect(!positions[0].is_cash);
|
||
|
||
// VO: 1,020 × $74.30 = $75,786 cost; market $78,958.20.
|
||
try testing.expectEqualStrings("VO", positions[1].symbol);
|
||
try testing.expectApproxEqAbs(@as(f64, 1020), positions[1].quantity.?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 75_786.00), positions[1].cost_basis.?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 78_958.20), positions[1].current_value.?, 0.01);
|
||
|
||
// EEM: single-date format (`02/24/2026` instead of `Multiple(N)`),
|
||
// so the parser handles both shapes by treating the trade-date
|
||
// column as a generic skip.
|
||
try testing.expectEqualStrings("EEM", positions[2].symbol);
|
||
try testing.expectApproxEqAbs(@as(f64, 875), positions[2].quantity.?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 875.0 * 62.71), positions[2].cost_basis.?, 0.01);
|
||
try testing.expectApproxEqAbs(@as(f64, 57_776.25), positions[2].current_value.?, 0.01);
|
||
}
|
||
|
||
test "parsePaste: no header preamble, no footer totals" {
|
||
// Mirrors wf2.txt — same record format, no preamble at
|
||
// top, no totals at bottom. Parser must reach EOF cleanly.
|
||
const allocator = testing.allocator;
|
||
const data =
|
||
"\n" ++
|
||
"GSLC , popup\n" ++
|
||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||
"\tMultiple(3) \t\n" ++
|
||
"906\n" ++
|
||
"@ $129.97\n" ++
|
||
"\t\n" ++
|
||
"$140.90\n" ++
|
||
"+$0.31\n" ++
|
||
"\t\n" ++
|
||
"$127,655.40\n" ++
|
||
"+$280.86 (+0.22%)\n" ++
|
||
"\t\n" ++
|
||
"+$9,906.42\n" ++
|
||
"+8.41%\n" ++
|
||
"\t\n" ++
|
||
"$1,203.17\n";
|
||
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
|
||
try testing.expectEqual(@as(usize, 1), positions.len);
|
||
try testing.expectEqualStrings("GSLC", positions[0].symbol);
|
||
try testing.expectApproxEqAbs(@as(f64, 906), positions[0].quantity.?, 0.01);
|
||
}
|
||
|
||
test "parsePaste: empty input yields zero positions" {
|
||
const allocator = testing.allocator;
|
||
const positions = try parsePaste(allocator, "");
|
||
defer allocator.free(positions);
|
||
try testing.expectEqual(@as(usize, 0), positions.len);
|
||
}
|
||
|
||
test "parsePaste: input with only header preamble (no records) yields zero" {
|
||
const allocator = testing.allocator;
|
||
const data =
|
||
"Symbol/Description,click to sort \tTrade Date,click to sort \tShares\n" ++
|
||
"@ Cost\n" ++
|
||
",click to sort \tLast Price/\n";
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
try testing.expectEqual(@as(usize, 0), positions.len);
|
||
}
|
||
|
||
test "parsePaste: parses across intermediate totals (Stocks Total + ETFs Total)" {
|
||
const allocator = testing.allocator;
|
||
// The WF holdings page splits positions into multiple
|
||
// sections (Stocks, ETFs, Bonds, …), each terminated by its
|
||
// own `<Section> Total` footer. The parser must keep going
|
||
// past intermediate totals to capture records in subsequent
|
||
// sections. (Real-world example: a multi-section export with
|
||
// 43 stocks then 13 ETFs separated by `Stocks Total`.)
|
||
const data =
|
||
"GSLC , popup\n" ++
|
||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||
"\tMultiple(3) \t\n" ++
|
||
"906\n" ++
|
||
"@ $129.97\n" ++
|
||
"\t\n" ++
|
||
"$140.90\n" ++
|
||
"+$0.31\n" ++
|
||
"\t\n" ++
|
||
"$127,655.40\n" ++
|
||
"+$280.86 (+0.22%)\n" ++
|
||
"\t\n" ++
|
||
"+$9,906.42\n" ++
|
||
"+8.41%\n" ++
|
||
"\t\n" ++
|
||
"$1,203.17\n" ++
|
||
"\t\n" ++
|
||
"Stocks Total\n" ++
|
||
"$127,655.40\n" ++
|
||
"ETFs\n" ++
|
||
"ETF table has been sorted ...\n" ++
|
||
"\t\n" ++
|
||
"DBP , popup\n" ++ // SHOULD be parsed (next section)
|
||
"INVESCO PRECIOUS METALS ETF\n" ++
|
||
"\tMultiple(1) \t\n" ++
|
||
"10\n" ++
|
||
"@ $50.00\n" ++
|
||
"\t\n" ++
|
||
"$55.00\n" ++
|
||
"+$0.10\n" ++
|
||
"\t\n" ++
|
||
"$550.00\n" ++
|
||
"+$1.00 (+0.18%)\n" ++
|
||
"\t\n" ++
|
||
"+$50.00\n" ++
|
||
"+10.00%\n" ++
|
||
"\t\n" ++
|
||
"$5.00\n" ++
|
||
"\t\n" ++
|
||
"ETFs Total\n" ++
|
||
"$550.00\n";
|
||
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
try testing.expectEqual(@as(usize, 2), positions.len);
|
||
try testing.expectEqualStrings("GSLC", positions[0].symbol);
|
||
try testing.expectEqualStrings("DBP", positions[1].symbol);
|
||
}
|
||
|
||
test "parsePaste: money-market symbol gets is_cash=true" {
|
||
const allocator = testing.allocator;
|
||
// WMPXX is the Allspring (née Wells Fargo) money-market
|
||
// fund; it's in the canonical money-market list, so even
|
||
// without a `**` suffix or unit-price hint, the parser
|
||
// tags it as cash. Using a WF-house ticker here keeps the
|
||
// fixture credible — SWVXX would never show up on a Wells
|
||
// Fargo holdings page.
|
||
const data =
|
||
"WMPXX , popup\n" ++
|
||
"ALLSPRING MONEY MARKET FUND\n" ++
|
||
"\tMultiple(1) \t\n" ++
|
||
"5000\n" ++
|
||
"@ $1.00\n" ++
|
||
"\t\n" ++
|
||
"$1.00\n" ++
|
||
"$0.00\n" ++
|
||
"\t\n" ++
|
||
"$5,000.00\n" ++
|
||
"+$0.00 (0.00%)\n" ++
|
||
"\t\n" ++
|
||
"+$0.00\n" ++
|
||
"0.00%\n" ++
|
||
"\t\n" ++
|
||
"$200.00\n";
|
||
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
try testing.expectEqual(@as(usize, 1), positions.len);
|
||
try testing.expect(positions[0].is_cash);
|
||
}
|
||
|
||
test "parsePaste: accepts both `SYMBOL,popup` and `SYMBOL , popup` anchors" {
|
||
// Wells Fargo emits two slightly different anchor shapes
|
||
// depending on what part of the holdings table the user
|
||
// copied — stocks tend to come out as `SYMBOL,popup` (no
|
||
// spaces) while ETFs come out as `SYMBOL , popup` (with
|
||
// spaces). Single-paste files routinely mix both forms, so
|
||
// the parser must accept either.
|
||
const allocator = testing.allocator;
|
||
const data =
|
||
"XOM,popup\n" ++ // no-space form (stock)
|
||
"EXXON MOBIL CORP\n" ++
|
||
"\tMultiple(4) \t\n" ++
|
||
"50\n" ++
|
||
"@ $129.66\n" ++
|
||
"\t\n" ++
|
||
"$154.92\n" ++
|
||
"-$0.37\n" ++
|
||
"\t\n" ++
|
||
"$7,746.00\n" ++
|
||
"-$18.50 (-0.24%)\n" ++
|
||
"\t\n" ++
|
||
"+$1,262.85\n" ++
|
||
"+19.48%\n" ++
|
||
"\t\n" ++
|
||
"$206.00\n" ++
|
||
"\t\n" ++
|
||
"GSLC , popup\n" ++ // with-space form (ETF)
|
||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||
"\tMultiple(3) \t\n" ++
|
||
"906\n" ++
|
||
"@ $129.97\n" ++
|
||
"\t\n" ++
|
||
"$140.90\n" ++
|
||
"+$0.31\n" ++
|
||
"\t\n" ++
|
||
"$127,655.40\n" ++
|
||
"+$280.86 (+0.22%)\n" ++
|
||
"\t\n" ++
|
||
"+$9,906.42\n" ++
|
||
"+8.41%\n" ++
|
||
"\t\n" ++
|
||
"$1,203.17\n";
|
||
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
try testing.expectEqual(@as(usize, 2), positions.len);
|
||
try testing.expectEqualStrings("XOM", positions[0].symbol);
|
||
try testing.expectEqualStrings("GSLC", positions[1].symbol);
|
||
}
|
||
|
||
test "parsePaste: trailing cash section emits a cash position" {
|
||
// After the positions table, WF pastes may include a
|
||
// "Cash, Cash Alternatives and Margin" section listing the
|
||
// account's cash balance. The parser captures that as a
|
||
// synthetic cash position; the downstream resolver stamps
|
||
// the account fields and `synthesizeLots` emits a
|
||
// `security_type::cash` lot.
|
||
const allocator = testing.allocator;
|
||
const data =
|
||
"XOM,popup\n" ++
|
||
"EXXON MOBIL CORP\n" ++
|
||
"\tMultiple(4) \t\n" ++
|
||
"50\n" ++
|
||
"@ $129.66\n" ++
|
||
"\t\n" ++
|
||
"$154.92\n" ++
|
||
"-$0.37\n" ++
|
||
"\t\n" ++
|
||
"$7,746.00\n" ++
|
||
"-$18.50 (-0.24%)\n" ++
|
||
"\t\n" ++
|
||
"+$1,262.85\n" ++
|
||
"+19.48%\n" ++
|
||
"\t\n" ++
|
||
"$206.00\n" ++
|
||
"\t\n" ++
|
||
"ETFs Total\n" ++
|
||
"$7,746.00\n" ++
|
||
"Cash, Cash Alternatives and Margin\n" ++
|
||
"Cash alternatives and margin table has been sorted ...\n" ++
|
||
"\t\n" ++
|
||
"Sample Roth IRA *1234\n" ++
|
||
"\t$14,216.88\n" ++
|
||
"\t\n" ++
|
||
"Cash Total\n" ++
|
||
"\t\n" ++
|
||
"$14,216.88 \n";
|
||
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
try testing.expectEqual(@as(usize, 2), positions.len);
|
||
try testing.expectEqualStrings("XOM", positions[0].symbol);
|
||
try testing.expect(!positions[0].is_cash);
|
||
// Cash position
|
||
try testing.expectEqualStrings("", positions[1].symbol);
|
||
try testing.expect(positions[1].is_cash);
|
||
try testing.expectApproxEqAbs(@as(f64, 14216.88), positions[1].current_value.?, 0.01);
|
||
try testing.expect(positions[1].quantity == null);
|
||
try testing.expect(positions[1].cost_basis == null);
|
||
}
|
||
|
||
test "parsePaste: cash section absent is a no-op" {
|
||
// When the user only pastes the positions table (no cash
|
||
// section), parsePaste returns just the positions. Regression
|
||
// for the original 3522.txt-style paste shape.
|
||
const allocator = testing.allocator;
|
||
const data =
|
||
"GSLC , popup\n" ++
|
||
"GOLDMAN ACTIVEBETA ETF\n" ++
|
||
"\tMultiple(3) \t\n" ++
|
||
"906\n" ++
|
||
"@ $129.97\n" ++
|
||
"\t\n" ++
|
||
"$140.90\n" ++
|
||
"+$0.31\n" ++
|
||
"\t\n" ++
|
||
"$127,655.40\n" ++
|
||
"+$280.86 (+0.22%)\n" ++
|
||
"\t\n" ++
|
||
"+$9,906.42\n" ++
|
||
"+8.41%\n" ++
|
||
"\t\n" ++
|
||
"$1,203.17\n";
|
||
|
||
const positions = try parsePaste(allocator, data);
|
||
defer allocator.free(positions);
|
||
try testing.expectEqual(@as(usize, 1), positions.len);
|
||
try testing.expect(!positions[0].is_cash);
|
||
}
|
||
|
||
test "isPopupAnchor: accepts both compact and spaced forms" {
|
||
try testing.expect(isPopupAnchor("XOM,popup"));
|
||
try testing.expect(isPopupAnchor("XOM ,popup"));
|
||
try testing.expect(isPopupAnchor("XOM, popup"));
|
||
try testing.expect(isPopupAnchor("XOM , popup"));
|
||
try testing.expect(isPopupAnchor("BRK'B,popup"));
|
||
// Negative cases.
|
||
try testing.expect(!isPopupAnchor("XOM popup"));
|
||
try testing.expect(!isPopupAnchor("popup"));
|
||
try testing.expect(!isPopupAnchor(""));
|
||
}
|
||
|
||
test "popupSymbol: extracts symbol from compact form" {
|
||
try testing.expectEqualStrings("XOM", popupSymbol("XOM,popup").?);
|
||
try testing.expectEqualStrings("BRK'B", popupSymbol("BRK'B,popup").?);
|
||
try testing.expectEqualStrings("XOM", popupSymbol("XOM, popup").?);
|
||
// Empty symbol part → null.
|
||
try testing.expect(popupSymbol(",popup") == null);
|
||
}
|
||
|
||
// ── Resolver tests ───────────────────────────────────────────
|
||
|
||
/// Test helper: build an `AccountMap` from compile-time entries.
|
||
/// Mirrors the helper in `commands/import.zig`'s test block;
|
||
/// duplicated here so resolver tests don't depend on import's
|
||
/// test-only infrastructure.
|
||
fn testAccountMap(allocator: std.mem.Allocator, entries: []const analysis.AccountTaxEntry) !analysis.AccountMap {
|
||
var owned = try allocator.alloc(analysis.AccountTaxEntry, entries.len);
|
||
for (entries, 0..) |e, i| {
|
||
owned[i] = .{
|
||
.account = try allocator.dupe(u8, e.account),
|
||
.tax_type = e.tax_type,
|
||
.institution = if (e.institution) |s| try allocator.dupe(u8, s) else null,
|
||
.account_number = if (e.account_number) |s| try allocator.dupe(u8, s) else null,
|
||
};
|
||
}
|
||
return .{ .entries = owned, .allocator = allocator };
|
||
}
|
||
|
||
test "filenameMatchesAccount: trailing-digit anchor wins" {
|
||
// Strongest signal — WF account suffixes are unique within
|
||
// a household, so a digit-run match is unambiguous.
|
||
try testing.expect(filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null));
|
||
try testing.expect(filenameMatchesAccount("1234.txt", "Sample IRA *1234", null));
|
||
try testing.expect(filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null));
|
||
// Different digit suffix → no match.
|
||
try testing.expect(!filenameMatchesAccount("Sample_IRA_5678", "Sample IRA *1234", null));
|
||
try testing.expect(!filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null));
|
||
}
|
||
|
||
test "filenameMatchesAccount: account_number anchor when name lacks digits" {
|
||
// User stored the digits in `account_number::` but didn't
|
||
// bother to put them in the human-readable account name.
|
||
// The number itself can anchor the filename match.
|
||
try testing.expect(filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234"));
|
||
try testing.expect(filenameMatchesAccount("smpl_1234", "Sample Roth IRA", "1234"));
|
||
// Wrong digits → no match.
|
||
try testing.expect(!filenameMatchesAccount("9999.txt", "Sample Roth IRA", "1234"));
|
||
// No account_number and no digits in name → no match
|
||
// (alphaRunsContained doesn't help against a digit-only file).
|
||
try testing.expect(!filenameMatchesAccount("1234.txt", "Sample Roth IRA", null));
|
||
}
|
||
|
||
test "filenameMatchesAccount: name digits take precedence over account_number" {
|
||
// Both signals available; either one matching is enough.
|
||
// (Tests the OR semantics — name digits win first because
|
||
// they're checked first; we also verify account_number-only
|
||
// matches when name digits don't appear.)
|
||
try testing.expect(filenameMatchesAccount("Sample_1234", "Sample *1234", "9999"));
|
||
try testing.expect(filenameMatchesAccount("Sample_9999", "Sample *1234", "9999"));
|
||
try testing.expect(!filenameMatchesAccount("Sample_5555", "Sample *1234", "9999"));
|
||
}
|
||
|
||
test "filenameMatchesAccount: alpha-only fallback when account has no digit suffix" {
|
||
// No trailing digits to anchor on — falls through to the
|
||
// alpha-runs-contained check.
|
||
try testing.expect(filenameMatchesAccount("emils_brokerage", "Emils Brokerage", null));
|
||
// Out-of-order tokens don't match: alphaRunsContained
|
||
// requires every account-name run to appear in order in
|
||
// the filename.
|
||
try testing.expect(!filenameMatchesAccount("Brokerage_Emils", "Emils Brokerage", null));
|
||
// Partial overlap also doesn't match — every run must be
|
||
// present.
|
||
try testing.expect(!filenameMatchesAccount("emils_only", "Emils Brokerage", null));
|
||
}
|
||
|
||
test "filenameMatchesAccount: case-insensitive fallback" {
|
||
try testing.expect(filenameMatchesAccount("EMILS_brokerage", "Emils Brokerage", null));
|
||
try testing.expect(filenameMatchesAccount("emils_BROKERAGE", "Emils Brokerage", null));
|
||
}
|
||
|
||
test "alphaRunsContained: every alphanumeric run from account appears in order" {
|
||
try testing.expect(alphaRunsContained("emils_brokerage", "Emils Brokerage"));
|
||
try testing.expect(alphaRunsContained("--emils-brokerage--", "Emils Brokerage"));
|
||
try testing.expect(!alphaRunsContained("brokerage_emils", "Emils Brokerage")); // order matters
|
||
try testing.expect(!alphaRunsContained("emils_only", "Emils Brokerage")); // missing run
|
||
// Empty account name has no runs → trivially true.
|
||
try testing.expect(alphaRunsContained("anything", ""));
|
||
}
|
||
|
||
test "resolveAccount: explicit override matches a WF entry" {
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
|
||
.{ .account = "Sample Brokerage *5678", .tax_type = .taxable, .institution = "wells_fargo", .account_number = "5678" },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
const r = try resolveAccount(testing.io, account_map, "anything.txt", "Sample IRA *1234");
|
||
try testing.expectEqualStrings("1234", r.account_number);
|
||
try testing.expectEqualStrings("Sample IRA *1234", r.account_name);
|
||
}
|
||
|
||
test "resolveAccount: explicit override that doesn't match → UnknownAccount" {
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
try testing.expectError(error.UnknownAccount, resolveAccount(testing.io, account_map, "anything.txt", "Wrong Account"));
|
||
}
|
||
|
||
test "resolveAccount: filename inference picks the right entry from multiple WF accounts" {
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
|
||
.{ .account = "Sample Brokerage *5678", .tax_type = .taxable, .institution = "wells_fargo", .account_number = "5678" },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
const r = try resolveAccount(testing.io, account_map, "/path/to/Sample_IRA_1234.txt", null);
|
||
try testing.expectEqualStrings("1234", r.account_number);
|
||
}
|
||
|
||
test "resolveAccount: single-WF-entry fallback when filename has no signal" {
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
|
||
// Non-WF entry shouldn't interfere.
|
||
.{ .account = "Sample Fid", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
const r = try resolveAccount(testing.io, account_map, "unrelated_filename.txt", null);
|
||
try testing.expectEqualStrings("1234", r.account_number);
|
||
}
|
||
|
||
test "resolveAccount: ambiguous when 2+ WF entries and no signal" {
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
|
||
.{ .account = "Sample Brokerage *5678", .tax_type = .taxable, .institution = "wells_fargo", .account_number = "5678" },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
try testing.expectError(error.AmbiguousWellsFargoAccount, resolveAccount(testing.io, account_map, "unrelated_filename.txt", null));
|
||
}
|
||
|
||
test "resolveAccount: zero WF entries → AmbiguousWellsFargoAccount with helpful message" {
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample Fid", .tax_type = .taxable, .institution = "fidelity", .account_number = "Z123" },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
try testing.expectError(error.AmbiguousWellsFargoAccount, resolveAccount(testing.io, account_map, "anything.txt", null));
|
||
}
|
||
|
||
test "resolveAccount: WF entry without account_number → UnknownAccount" {
|
||
// Pins the requirement that WF entries in accounts.srf MUST
|
||
// carry an `account_number::` field — the downstream
|
||
// `findByInstitutionAccount` lookup keys on it. Without
|
||
// this guard the import would silently produce
|
||
// "unmapped account" errors at synthesizeLots time with
|
||
// no useful hint about why.
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample IRA", .tax_type = .roth, .institution = "wells_fargo", .account_number = null },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
try testing.expectError(error.UnknownAccount, resolveAccount(testing.io, account_map, "Sample_IRA.txt", null));
|
||
}
|
||
|
||
test "applyAccountToPositions: patches every position's account fields" {
|
||
const allocator = testing.allocator;
|
||
var account_map = try testAccountMap(allocator, &.{
|
||
.{ .account = "Sample IRA *1234", .tax_type = .roth, .institution = "wells_fargo", .account_number = "1234" },
|
||
});
|
||
defer account_map.deinit();
|
||
|
||
var positions = [_]BrokeragePosition{
|
||
.{ .account_number = "", .account_name = "", .symbol = "VTI", .description = "", .quantity = 10, .current_value = 1000, .cost_basis = 800, .is_cash = false },
|
||
.{ .account_number = "", .account_name = "", .symbol = "AAPL", .description = "", .quantity = 5, .current_value = 1000, .cost_basis = 750, .is_cash = false },
|
||
};
|
||
|
||
try applyAccountToPositions(testing.io, account_map, "Sample_IRA_1234.txt", null, &positions);
|
||
try testing.expectEqualStrings("1234", positions[0].account_number);
|
||
try testing.expectEqualStrings("Sample IRA *1234", positions[0].account_name);
|
||
try testing.expectEqualStrings("1234", positions[1].account_number);
|
||
try testing.expectEqualStrings("Sample IRA *1234", positions[1].account_name);
|
||
}
|