match wells fargo import on account number as well
This commit is contained in:
parent
8be73c222e
commit
7bfe2d9deb
4 changed files with 157 additions and 22 deletions
87
AGENTS.md
87
AGENTS.md
|
|
@ -439,6 +439,93 @@ look nice in prose but they create real problems:
|
|||
When in doubt, **ask**. A one-line "I'm about to use `—` here for X, OK?"
|
||||
is much cheaper than reverting after the user notices.
|
||||
|
||||
### NEVER put PII in tests, fixtures, comments, or docs
|
||||
|
||||
This codebase is a **personal finance tool**. Real account names,
|
||||
account numbers (full or partial trailing digits), real holders'
|
||||
names, and any production-data identifiers are PII. Test
|
||||
fixtures, doc comments, error-message examples, and sample
|
||||
account-map entries must use **placeholder data only**.
|
||||
|
||||
The user has had to ask multiple times to scrub PII out of
|
||||
tests after I introduced it via "this is real data so let me
|
||||
write a test that uses it." That's wrong every time. Real data
|
||||
should NEVER end up in source files.
|
||||
|
||||
**Approved placeholder vocabulary** (use these consistently
|
||||
across the codebase so search-and-replace stays trivial):
|
||||
|
||||
- Account names:
|
||||
- `Sample IRA`, `Sample Roth IRA`, `Sample Roth`, `Sample
|
||||
Brokerage`, `Sample Trust`, `Sample HSA`, `Sample Source`,
|
||||
`Sample Account`, `Sample Fid`, `Sample Fidelity Brokerage`
|
||||
- With trailing digits: `Sample IRA *1234`, `Sample Brokerage
|
||||
*5678`
|
||||
- Filenames: `Sample_IRA_1234.txt`, `Sample_IRA_5678.txt`,
|
||||
`smpl_1234`, `smpl-ira-1234`
|
||||
- Account numbers: `1234`, `5678`, `9012`, `3456`, `7890`, or
|
||||
alphanumeric like `Z123`, `Z111`, `Z222`. Do not use real
|
||||
trailing-digit values from `~/finance/`.
|
||||
- Portfolio file names: `portfolio_other.srf`, never
|
||||
`portfolio_<real-name>.srf`.
|
||||
- Schwab/Fidelity-style: `Schwab Trust`, `Inherited IRA`, `Roth
|
||||
IRA`, `Tax Loss` are generic enough to keep, but any
|
||||
uniqueness suffix must be a placeholder.
|
||||
|
||||
**Things that are NEVER OK in source:**
|
||||
|
||||
- Real first names of family members. (No `Emil`, `Elizabeth`,
|
||||
`Kelly`, `Mom`, `Dad`, etc.)
|
||||
- Real account-number trailing digits used in
|
||||
`~/finance/accounts.srf` (e.g. `6135`, `3522`, `7891`, `716`,
|
||||
`901`, `503`, `311`, `152`, `118`, `Z30619248`,
|
||||
`229948882`, etc.). Any number that came from a real
|
||||
brokerage entry is PII.
|
||||
- Real portfolio filenames like `portfolio_mom.srf`,
|
||||
`portfolio_<name>.srf`.
|
||||
- Composite identifiers that combine the above (`Mom Roth IRA`,
|
||||
`Joint trust ...716`, etc.).
|
||||
|
||||
**Workflow rule when adding a test based on a real-world
|
||||
scenario:**
|
||||
|
||||
1. Reproduce the bug locally with real data (in `~/finance/`,
|
||||
never staged).
|
||||
2. Write the test using **placeholder names and numbers** that
|
||||
preserve the structural shape of the bug (same string
|
||||
lengths, same pattern of digits-vs-letters, same separator
|
||||
characters, etc.) but contain no real-world identifiers.
|
||||
3. Verify the test still reproduces the bug. If it doesn't, the
|
||||
bug was tied to specific real-world content — investigate
|
||||
whether that's a real signal (e.g. a Unicode-handling issue)
|
||||
and either fix the underlying bug or find a placeholder that
|
||||
exhibits the same shape.
|
||||
|
||||
**Workflow rule when finding existing PII:**
|
||||
|
||||
If you grep for the placeholder vocabulary while working in any
|
||||
file and find a real name or number that snuck in, fix it in
|
||||
the same change. Don't add to TODO; PII removal is never
|
||||
optional, and it never lands in a separate commit unless the
|
||||
user explicitly asks.
|
||||
|
||||
**One-line grep that should ALWAYS return zero non-`ie_data.csv`
|
||||
hits before committing:**
|
||||
|
||||
```
|
||||
grep -rn "\bMom\b\|Elizabeth\|Joint trust\|portfolio_mom\|\bEmil\b\|Fidelity Emil\|6135\|Z30619248" src/ \
|
||||
| grep -v ie_data.csv
|
||||
```
|
||||
|
||||
(Update the alternation as new real-world identifiers come up.
|
||||
The `ie_data.csv` exclusion is because the Shiller dataset
|
||||
contains coincidental numeric matches in historical-year fields
|
||||
that aren't PII.)
|
||||
|
||||
If you're uncertain whether something is PII, **ask before
|
||||
committing.** PII can be surgically removed from a working
|
||||
tree, but once it's in `git log` it's effectively permanent.
|
||||
|
||||
---
|
||||
|
||||
## Commands
|
||||
|
|
|
|||
|
|
@ -405,7 +405,7 @@ pub fn resolveAccount(
|
|||
for (account_map.entries) |e| {
|
||||
const inst = e.institution orelse continue;
|
||||
if (!std.mem.eql(u8, inst, institution)) continue;
|
||||
if (filenameMatchesAccount(base, e.account)) {
|
||||
if (filenameMatchesAccount(base, e.account, e.account_number)) {
|
||||
if (match != null) {
|
||||
// More than one WF entry matched the
|
||||
// filename — punt to the user.
|
||||
|
|
@ -499,11 +499,21 @@ fn resolutionFor(io: std.Io, entry: analysis.AccountTaxEntry) !Resolved {
|
|||
/// tail of the account name (after `*` or end-of-string), with
|
||||
/// underscores and spaces treated as equivalent.
|
||||
///
|
||||
/// `account_number` (when non-null) is also tried as an
|
||||
/// alternate anchor: a filename containing `accounts.srf`'s
|
||||
/// `account_number::` value matches even when the account name
|
||||
/// itself has no trailing digit run (e.g. user named the file
|
||||
/// `1234.txt` and recorded `account::Sample Roth IRA,
|
||||
/// account_number::1234` without putting `*1234` in the name).
|
||||
/// This is the more user-friendly path; without it, the user
|
||||
/// would have to keep the digit suffix in two places.
|
||||
///
|
||||
/// Examples:
|
||||
/// filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234") → true
|
||||
/// filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234") → true (digits match)
|
||||
/// filenameMatchesAccount("portfolio_other", "Sample IRA *1234") → false
|
||||
fn filenameMatchesAccount(filename: []const u8, account_name: []const u8) bool {
|
||||
/// filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null) → true
|
||||
/// filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null) → true (digits match)
|
||||
/// filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null) → false
|
||||
/// filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234") → true (account_number anchor)
|
||||
fn filenameMatchesAccount(filename: []const u8, account_name: []const u8, account_number: ?[]const u8) bool {
|
||||
// Extract the trailing digit run from the account name.
|
||||
// "Sample IRA *1234" → "1234".
|
||||
var digits_start: usize = account_name.len;
|
||||
|
|
@ -520,6 +530,21 @@ fn filenameMatchesAccount(filename: []const u8, account_name: []const u8) bool {
|
|||
// a household.
|
||||
if (digits.len > 0 and std.mem.indexOf(u8, filename, digits) != null) return true;
|
||||
|
||||
// Try the `account_number::` field as an alternate anchor.
|
||||
// Useful when the user didn't bother to put the digits in
|
||||
// the human-readable account name. We only treat the
|
||||
// account_number as an anchor when it's all digits (e.g.
|
||||
// "1234"); alphanumeric account numbers like Schwab's
|
||||
// "Z123" prefixed format wouldn't be a useful filename hint
|
||||
// for a WF import anyway, but tolerating them here as a
|
||||
// substring match is harmless. So: if the number is all
|
||||
// digits, do an exact substring; if it's mixed, also try a
|
||||
// substring
|
||||
// (case-insensitive) which is the broader fuzzy fallback.
|
||||
if (account_number) |num| {
|
||||
if (num.len > 0 and std.mem.indexOf(u8, filename, num) != null) return true;
|
||||
}
|
||||
|
||||
// No digit suffix to compare; fall back to a fuzzy
|
||||
// letters-only overlap. Lowercase both sides; compare
|
||||
// alphanumeric runs only. If every alphanumeric run of the
|
||||
|
|
@ -767,7 +792,7 @@ test "parsePaste: parses across intermediate totals (Stocks Total + ETFs Total)"
|
|||
// sections (Stocks, ETFs, Bonds, …), each terminated by its
|
||||
// own `<Section> Total` footer. The parser must keep going
|
||||
// past intermediate totals to capture records in subsequent
|
||||
// sections. (Real-world example: 6135.txt-style export with
|
||||
// sections. (Real-world example: a multi-section export with
|
||||
// 43 stocks then 13 ETFs separated by `Stocks Total`.)
|
||||
const data =
|
||||
"GSLC , popup\n" ++
|
||||
|
|
@ -1023,30 +1048,53 @@ fn testAccountMap(allocator: std.mem.Allocator, entries: []const analysis.Accoun
|
|||
test "filenameMatchesAccount: trailing-digit anchor wins" {
|
||||
// Strongest signal — WF account suffixes are unique within
|
||||
// a household, so a digit-run match is unambiguous.
|
||||
try testing.expect(filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234"));
|
||||
try testing.expect(filenameMatchesAccount("1234.txt", "Sample IRA *1234"));
|
||||
try testing.expect(filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234"));
|
||||
try testing.expect(filenameMatchesAccount("Sample_IRA_1234", "Sample IRA *1234", null));
|
||||
try testing.expect(filenameMatchesAccount("1234.txt", "Sample IRA *1234", null));
|
||||
try testing.expect(filenameMatchesAccount("smpl-ira-1234", "Sample IRA *1234", null));
|
||||
// Different digit suffix → no match.
|
||||
try testing.expect(!filenameMatchesAccount("Sample_IRA_5678", "Sample IRA *1234"));
|
||||
try testing.expect(!filenameMatchesAccount("portfolio_other", "Sample IRA *1234"));
|
||||
try testing.expect(!filenameMatchesAccount("Sample_IRA_5678", "Sample IRA *1234", null));
|
||||
try testing.expect(!filenameMatchesAccount("portfolio_other", "Sample IRA *1234", null));
|
||||
}
|
||||
|
||||
test "filenameMatchesAccount: account_number anchor when name lacks digits" {
|
||||
// User stored the digits in `account_number::` but didn't
|
||||
// bother to put them in the human-readable account name.
|
||||
// The number itself can anchor the filename match.
|
||||
try testing.expect(filenameMatchesAccount("1234.txt", "Sample Roth IRA", "1234"));
|
||||
try testing.expect(filenameMatchesAccount("smpl_1234", "Sample Roth IRA", "1234"));
|
||||
// Wrong digits → no match.
|
||||
try testing.expect(!filenameMatchesAccount("9999.txt", "Sample Roth IRA", "1234"));
|
||||
// No account_number and no digits in name → no match
|
||||
// (alphaRunsContained doesn't help against a digit-only file).
|
||||
try testing.expect(!filenameMatchesAccount("1234.txt", "Sample Roth IRA", null));
|
||||
}
|
||||
|
||||
test "filenameMatchesAccount: name digits take precedence over account_number" {
|
||||
// Both signals available; either one matching is enough.
|
||||
// (Tests the OR semantics — name digits win first because
|
||||
// they're checked first; we also verify account_number-only
|
||||
// matches when name digits don't appear.)
|
||||
try testing.expect(filenameMatchesAccount("Sample_1234", "Sample *1234", "9999"));
|
||||
try testing.expect(filenameMatchesAccount("Sample_9999", "Sample *1234", "9999"));
|
||||
try testing.expect(!filenameMatchesAccount("Sample_5555", "Sample *1234", "9999"));
|
||||
}
|
||||
|
||||
test "filenameMatchesAccount: alpha-only fallback when account has no digit suffix" {
|
||||
// No trailing digits to anchor on — falls through to the
|
||||
// alpha-runs-contained check.
|
||||
try testing.expect(filenameMatchesAccount("emils_brokerage", "Emils Brokerage"));
|
||||
try testing.expect(filenameMatchesAccount("emils_brokerage", "Emils Brokerage", null));
|
||||
// Out-of-order tokens don't match: alphaRunsContained
|
||||
// requires every account-name run to appear in order in
|
||||
// the filename.
|
||||
try testing.expect(!filenameMatchesAccount("Brokerage_Emils", "Emils Brokerage"));
|
||||
try testing.expect(!filenameMatchesAccount("Brokerage_Emils", "Emils Brokerage", null));
|
||||
// Partial overlap also doesn't match — every run must be
|
||||
// present.
|
||||
try testing.expect(!filenameMatchesAccount("emils_only", "Emils Brokerage"));
|
||||
try testing.expect(!filenameMatchesAccount("emils_only", "Emils Brokerage", null));
|
||||
}
|
||||
|
||||
test "filenameMatchesAccount: case-insensitive fallback" {
|
||||
try testing.expect(filenameMatchesAccount("EMILS_brokerage", "Emils Brokerage"));
|
||||
try testing.expect(filenameMatchesAccount("emils_BROKERAGE", "Emils Brokerage"));
|
||||
try testing.expect(filenameMatchesAccount("EMILS_brokerage", "Emils Brokerage", null));
|
||||
try testing.expect(filenameMatchesAccount("emils_BROKERAGE", "Emils Brokerage", null));
|
||||
}
|
||||
|
||||
test "alphaRunsContained: every alphanumeric run from account appears in order" {
|
||||
|
|
|
|||
|
|
@ -2642,21 +2642,21 @@ test "compareSchwabSummary: matching account → no discrepancy" {
|
|||
.open_date = Date.fromYmd(2024, 1, 1),
|
||||
.open_price = 1.0,
|
||||
.security_type = .cash,
|
||||
.account = "Emil Brokerage",
|
||||
.account = "Sample Brokerage",
|
||||
},
|
||||
.{
|
||||
.symbol = "AAPL",
|
||||
.shares = 10,
|
||||
.open_date = Date.fromYmd(2024, 1, 1),
|
||||
.open_price = 150,
|
||||
.account = "Emil Brokerage",
|
||||
.account = "Sample Brokerage",
|
||||
},
|
||||
};
|
||||
const portfolio = portfolio_mod.Portfolio{ .lots = @constCast(&lots), .allocator = allocator };
|
||||
|
||||
const schwab_accounts = [_]SchwabAccountSummary{
|
||||
.{
|
||||
.account_name = "Emil Brokerage",
|
||||
.account_name = "Sample Brokerage",
|
||||
.account_number = "1234",
|
||||
.cash = 5000.0,
|
||||
.total_value = 7000.0,
|
||||
|
|
@ -2665,7 +2665,7 @@ test "compareSchwabSummary: matching account → no discrepancy" {
|
|||
|
||||
var entries = [_]analysis.AccountTaxEntry{
|
||||
.{
|
||||
.account = "Emil Brokerage",
|
||||
.account = "Sample Brokerage",
|
||||
.tax_type = .taxable,
|
||||
.institution = "schwab",
|
||||
.account_number = "1234",
|
||||
|
|
@ -2681,7 +2681,7 @@ test "compareSchwabSummary: matching account → no discrepancy" {
|
|||
defer allocator.free(results);
|
||||
|
||||
try std.testing.expectEqual(@as(usize, 1), results.len);
|
||||
try std.testing.expectEqualStrings("Emil Brokerage", results[0].account_name);
|
||||
try std.testing.expectEqualStrings("Sample Brokerage", results[0].account_name);
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 5000), results[0].portfolio_cash, 0.01);
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 7000), results[0].portfolio_total, 0.01);
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 0), results[0].cash_delta.?, 0.01);
|
||||
|
|
|
|||
|
|
@ -1109,7 +1109,7 @@ pub const EventLine = struct {
|
|||
};
|
||||
|
||||
/// Format a single event line for display.
|
||||
/// Output: " Social Security (Emil) +$38,400/yr age 67 (in 17yr)"
|
||||
/// Output: " Social Security (Owner) +$38,400/yr age 67 (in 17yr)"
|
||||
pub fn fmtEventLine(arena: std.mem.Allocator, ev: *const projections.LifeEvent, current_ages: []const u16) !EventLine {
|
||||
const name = ev.getName();
|
||||
const amount = ev.annual_amount;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue