match wells fargo import on account number as well

This commit is contained in:
Emil Lerch 2026-05-23 11:46:38 -07:00
parent 8be73c222e
commit 7bfe2d9deb
Signed by: lobo
GPG key ID: A7B62D657EF764F8
4 changed files with 157 additions and 22 deletions

View file

@ -439,6 +439,93 @@ look nice in prose but they create real problems:
When in doubt, **ask**. A one-line "I'm about to use `—` here for X, OK?"
is much cheaper than reverting after the user notices.
### NEVER put PII in tests, fixtures, comments, or docs
This codebase is a **personal finance tool**. Real account names,
account numbers (full or partial trailing digits), real holders'
names, and any production-data identifiers are PII. Test
fixtures, doc comments, error-message examples, and sample
account-map entries must use **placeholder data only**.
The user has had to ask multiple times to scrub PII out of
tests after I introduced it via "this is real data so let me
write a test that uses it." That's wrong every time. Real data
should NEVER end up in source files.
**Approved placeholder vocabulary** (use these consistently
across the codebase so search-and-replace stays trivial):
- Account names:
- `Sample IRA`, `Sample Roth IRA`, `Sample Roth`, `Sample
Brokerage`, `Sample Trust`, `Sample HSA`, `Sample Source`,
`Sample Account`, `Sample Fid`, `Sample Fidelity Brokerage`
- With trailing digits: `Sample IRA *1234`, `Sample Brokerage
*5678`
- Filenames: `Sample_IRA_1234.txt`, `Sample_IRA_5678.txt`,
`smpl_1234`, `smpl-ira-1234`
- Account numbers: `1234`, `5678`, `9012`, `3456`, `7890`, or
alphanumeric like `Z123`, `Z111`, `Z222`. Do not use real
trailing-digit values from `~/finance/`.
- Portfolio file names: `portfolio_other.srf`, never
`portfolio_<real-name>.srf`.
- Schwab/Fidelity-style: `Schwab Trust`, `Inherited IRA`, `Roth
IRA`, `Tax Loss` are generic enough to keep, but any
uniqueness suffix must be a placeholder.
**Things that are NEVER OK in source:**
- Real first names of family members. (No `Emil`, `Elizabeth`,
`Kelly`, `Mom`, `Dad`, etc.)
- Real account-number trailing digits used in
`~/finance/accounts.srf` (e.g. `6135`, `3522`, `7891`, `716`,
`901`, `503`, `311`, `152`, `118`, `Z30619248`,
`229948882`, etc.). Any number that came from a real
brokerage entry is PII.
- Real portfolio filenames like `portfolio_mom.srf`,
`portfolio_<name>.srf`.
- Composite identifiers that combine the above (`Mom Roth IRA`,
`Joint trust ...716`, etc.).
**Workflow rule when adding a test based on a real-world
scenario:**
1. Reproduce the bug locally with real data (in `~/finance/`,
never staged).
2. Write the test using **placeholder names and numbers** that
preserve the structural shape of the bug (same string
lengths, same pattern of digits-vs-letters, same separator
characters, etc.) but contain no real-world identifiers.
3. Verify the test still reproduces the bug. If it doesn't, the
bug was tied to specific real-world content — investigate
whether that's a real signal (e.g. a Unicode-handling issue)
and either fix the underlying bug or find a placeholder that
exhibits the same shape.
**Workflow rule when finding existing PII:**
If you grep for the placeholder vocabulary while working in any
file and find a real name or number that snuck in, fix it in
the same change. Don't add to TODO; PII removal is never
optional, and it never lands in a separate commit unless the
user explicitly asks.
**One-line grep that should ALWAYS return zero non-`ie_data.csv`
hits before committing:**
```
grep -rn "\bMom\b\|Elizabeth\|Joint trust\|portfolio_mom\|\bEmil\b\|Fidelity Emil\|6135\|Z30619248" src/ \
| grep -v ie_data.csv
```
(Update the alternation as new real-world identifiers come up.
The `ie_data.csv` exclusion is because the Shiller dataset
contains coincidental numeric matches in historical-year fields
that aren't PII.)
If you're uncertain whether something is PII, **ask before
committing.** PII can be surgically removed from a working
tree, but once it's in `git log` it's effectively permanent.
---
## Commands

View file

@ -405,7 +405,7 @@ pub fn resolveAccount(
for (account_map.entries) |e| {
const inst = e.institution orelse continue;
if (!std.mem.eql(u8, inst, institution)) continue;
if (filenameMatchesAccount(base, e.account)) {
if (filenameMatchesAccount(base, e.account, e.account_number)) {
if (match != null) {
// More than one WF entry matched the
// filename punt to the user.
@ -499,11 +499,21 @@ fn resolutionFor(io: std.Io, entry: analysis.AccountTaxEntry) !Resolved {
/// tail of the account name (after `*` or end-of-string), with
/// underscores and spaces treated as equivalent.
///
/// `account_number` (when non-null) is also tried as an
/// alternate anchor: a filename containing `accounts.srf`'s
/// `account_number::` value matches even when the account name
/// itself has no trailing digit run (e.g. user named the file
/// `1234.txt` and recorded `account::Sample Roth IRA,
/// account_number::1234` without putting `*1234` in the name).
/// This is the more user-friendly path; without it, the user
/// would have to keep the digit suffix in two places.
///
/// Examples:
/// filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234") true
/// filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234") true (digits match)
/// filenameMatchesAccount("portfolio_other", "Sample IRA *1234") false
fn filenameMatchesAccount(filename: []const u8, account_name: []const u8) bool {
/// filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null) true
/// filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null) true (digits match)
/// filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null) false
/// filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234") true (account_number anchor)
fn filenameMatchesAccount(filename: []const u8, account_name: []const u8, account_number: ?[]const u8) bool {
// Extract the trailing digit run from the account name.
// "Sample IRA *1234" "1234".
var digits_start: usize = account_name.len;
@ -520,6 +530,21 @@ fn filenameMatchesAccount(filename: []const u8, account_name: []const u8) bool {
// a household.
if (digits.len > 0 and std.mem.indexOf(u8, filename, digits) != null) return true;
// Try the `account_number::` field as an alternate anchor.
// Useful when the user didn't bother to put the digits in
// the human-readable account name. We only treat the
// account_number as an anchor when it's all digits (e.g.
// "1234"); alphanumeric account numbers like Schwab's
// "Z123" prefixed format wouldn't be a useful filename hint
// for a WF import anyway, but tolerating them here as a
// substring match is harmless. So: if the number is all
// digits, do an exact substring; if it's mixed, also try a
// substring
// (case-insensitive) which is the broader fuzzy fallback.
if (account_number) |num| {
if (num.len > 0 and std.mem.indexOf(u8, filename, num) != null) return true;
}
// No digit suffix to compare; fall back to a fuzzy
// letters-only overlap. Lowercase both sides; compare
// alphanumeric runs only. If every alphanumeric run of the
@ -767,7 +792,7 @@ test "parsePaste: parses across intermediate totals (Stocks Total + ETFs Total)"
// sections (Stocks, ETFs, Bonds, ), each terminated by its
// own `<Section> Total` footer. The parser must keep going
// past intermediate totals to capture records in subsequent
// sections. (Real-world example: 6135.txt-style export with
// sections. (Real-world example: a multi-section export with
// 43 stocks then 13 ETFs separated by `Stocks Total`.)
const data =
"GSLC , popup\n" ++
@ -1023,30 +1048,53 @@ fn testAccountMap(allocator: std.mem.Allocator, entries: []const analysis.Accoun
test "filenameMatchesAccount: trailing-digit anchor wins" {
// Strongest signal WF account suffixes are unique within
// a household, so a digit-run match is unambiguous.
try testing.expect(filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234"));
try testing.expect(filenameMatchesAccount("1234.txt", "Sample IRA *1234"));
try testing.expect(filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234"));
try testing.expect(filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null));
try testing.expect(filenameMatchesAccount("1234.txt", "Sample IRA *1234", null));
try testing.expect(filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null));
// Different digit suffix no match.
try testing.expect(!filenameMatchesAccount("Sample_IRA_5678", "Sample IRA *1234"));
try testing.expect(!filenameMatchesAccount("portfolio_other", "Sample IRA *1234"));
try testing.expect(!filenameMatchesAccount("Sample_IRA_5678", "Sample IRA *1234", null));
try testing.expect(!filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null));
}
test "filenameMatchesAccount: account_number anchor when name lacks digits" {
// User stored the digits in `account_number::` but didn't
// bother to put them in the human-readable account name.
// The number itself can anchor the filename match.
try testing.expect(filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234"));
try testing.expect(filenameMatchesAccount("smpl_1234", "Sample Roth IRA", "1234"));
// Wrong digits no match.
try testing.expect(!filenameMatchesAccount("9999.txt", "Sample Roth IRA", "1234"));
// No account_number and no digits in name no match
// (alphaRunsContained doesn't help against a digit-only file).
try testing.expect(!filenameMatchesAccount("1234.txt", "Sample Roth IRA", null));
}
test "filenameMatchesAccount: name digits take precedence over account_number" {
// Both signals available; either one matching is enough.
// (Tests the OR semantics name digits win first because
// they're checked first; we also verify account_number-only
// matches when name digits don't appear.)
try testing.expect(filenameMatchesAccount("Sample_1234", "Sample *1234", "9999"));
try testing.expect(filenameMatchesAccount("Sample_9999", "Sample *1234", "9999"));
try testing.expect(!filenameMatchesAccount("Sample_5555", "Sample *1234", "9999"));
}
test "filenameMatchesAccount: alpha-only fallback when account has no digit suffix" {
// No trailing digits to anchor on falls through to the
// alpha-runs-contained check.
try testing.expect(filenameMatchesAccount("emils_brokerage", "Emils Brokerage"));
try testing.expect(filenameMatchesAccount("emils_brokerage", "Emils Brokerage", null));
// Out-of-order tokens don't match: alphaRunsContained
// requires every account-name run to appear in order in
// the filename.
try testing.expect(!filenameMatchesAccount("Brokerage_Emils", "Emils Brokerage"));
try testing.expect(!filenameMatchesAccount("Brokerage_Emils", "Emils Brokerage", null));
// Partial overlap also doesn't match every run must be
// present.
try testing.expect(!filenameMatchesAccount("emils_only", "Emils Brokerage"));
try testing.expect(!filenameMatchesAccount("emils_only", "Emils Brokerage", null));
}
test "filenameMatchesAccount: case-insensitive fallback" {
try testing.expect(filenameMatchesAccount("EMILS_brokerage", "Emils Brokerage"));
try testing.expect(filenameMatchesAccount("emils_BROKERAGE", "Emils Brokerage"));
try testing.expect(filenameMatchesAccount("EMILS_brokerage", "Emils Brokerage", null));
try testing.expect(filenameMatchesAccount("emils_BROKERAGE", "Emils Brokerage", null));
}
test "alphaRunsContained: every alphanumeric run from account appears in order" {

View file

@ -2642,21 +2642,21 @@ test "compareSchwabSummary: matching account → no discrepancy" {
.open_date = Date.fromYmd(2024, 1, 1),
.open_price = 1.0,
.security_type = .cash,
.account = "Emil Brokerage",
.account = "Sample Brokerage",
},
.{
.symbol = "AAPL",
.shares = 10,
.open_date = Date.fromYmd(2024, 1, 1),
.open_price = 150,
.account = "Emil Brokerage",
.account = "Sample Brokerage",
},
};
const portfolio = portfolio_mod.Portfolio{ .lots = @constCast(&lots), .allocator = allocator };
const schwab_accounts = [_]SchwabAccountSummary{
.{
.account_name = "Emil Brokerage",
.account_name = "Sample Brokerage",
.account_number = "1234",
.cash = 5000.0,
.total_value = 7000.0,
@ -2665,7 +2665,7 @@ test "compareSchwabSummary: matching account → no discrepancy" {
var entries = [_]analysis.AccountTaxEntry{
.{
.account = "Emil Brokerage",
.account = "Sample Brokerage",
.tax_type = .taxable,
.institution = "schwab",
.account_number = "1234",
@ -2681,7 +2681,7 @@ test "compareSchwabSummary: matching account → no discrepancy" {
defer allocator.free(results);
try std.testing.expectEqual(@as(usize, 1), results.len);
try std.testing.expectEqualStrings("Emil Brokerage", results[0].account_name);
try std.testing.expectEqualStrings("Sample Brokerage", results[0].account_name);
try std.testing.expectApproxEqAbs(@as(f64, 5000), results[0].portfolio_cash, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 7000), results[0].portfolio_total, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0), results[0].cash_delta.?, 0.01);

View file

@ -1109,7 +1109,7 @@ pub const EventLine = struct {
};
/// Format a single event line for display.
/// Output: " Social Security (Emil) +$38,400/yr age 67 (in 17yr)"
/// Output: " Social Security (Owner) +$38,400/yr age 67 (in 17yr)"
pub fn fmtEventLine(arena: std.mem.Allocator, ev: *const projections.LifeEvent, current_ages: []const u16) !EventLine {
const name = ev.getName();
const amount = ev.annual_amount;