diff --git a/TODO.md b/TODO.md index d5659e8..90b6f97 100644 --- a/TODO.md +++ b/TODO.md @@ -199,13 +199,23 @@ to avoid duplicated checks. ## Split `audit.zig` into per-broker reconcilers — priority LOW -`src/commands/audit.zig` is 3438 lines — the largest command file -by ~2x. It bundles four logically distinct responsibilities: +`src/commands/audit.zig` is now 2856 lines (was 3438) after the +brokerage parsers moved to per-broker files under `src/brokerage/`. +It still bundles three logically distinct responsibilities: - Portfolio hygiene check (no-flag mode) - Fidelity positions CSV reconciler (`--fidelity`) -- Schwab per-account positions CSV reconciler (`--schwab`) -- Schwab account-summary stdin parser (`--schwab-summary`) +- Schwab per-account positions CSV reconciler (`--schwab`) and + Schwab account-summary stdin reconciler (`--schwab-summary`) + +The brokerage parsers themselves are split per broker: +`src/brokerage/types.zig` (shared `BrokeragePosition` + +`parseDollarAmount`), `src/brokerage/fidelity.zig` (Fidelity CSV + +option-symbol matcher), `src/brokerage/schwab.zig` (per-account +CSV + summary paste). Adding a new broker is a one-file add next +to those. What's left is splitting the *reconciler* +(compare-portfolio-vs-brokerage) and *display* code in audit.zig +into per-broker files that consume those parsers. ### Sketch @@ -213,23 +223,20 @@ by ~2x. It bundles four logically distinct responsibilities: src/commands/audit/ mod.zig ← thin dispatcher; current public `run()` lives here hygiene.zig ← portfolio hygiene check (no-flag mode) - fidelity.zig ← --fidelity CSV reconciler - schwab.zig ← --schwab CSV + --schwab-summary stdin reconciler + fidelity.zig ← --fidelity reconciler (uses brokerage/fidelity.zig) + schwab.zig ← --schwab + --schwab-summary reconcilers common.zig ← shared types (Discrepancy, ReconcileResult), formatters ``` -Adding a new broker (Vanguard, Robinhood, etc.) becomes a one-file -add against a documented contract. The hygiene check can be -referenced from `zfin doctor` (above) without pulling in CSV-parser -baggage. +The hygiene check can be referenced from `zfin doctor` (above) +without pulling in reconciler baggage. ### Driver -Maintenance friction. The next person adding a broker reconciler -— likely future-you — has to navigate 3438 lines to find the -pattern. The split also makes the audit-bug investigations already -in this TODO file (phantom discrepancy on freshly-added lots) easier -to localize. +Maintenance friction. The split makes the audit-bug investigations +already in this TODO file (phantom discrepancy on freshly-added +lots) easier to localize, and lets a `zfin doctor` command reuse +hygiene without inheriting the reconciliation surface. Pure internal refactor; no user-visible change. diff --git a/src/brokerage/fidelity.zig b/src/brokerage/fidelity.zig new file mode 100644 index 0000000..f458673 --- /dev/null +++ b/src/brokerage/fidelity.zig @@ -0,0 +1,378 @@ +//! Fidelity export parsers. +//! +//! Parses the CSV produced by Fidelity's "Download Positions" feature, +//! plus the structured option-symbol matching used to tie a Fidelity +//! option row back to a portfolio lot. +//! +//! ## Limitations of this CSV parser +//! +//! 1. NOT a general-purpose CSV parser. It handles the specific format +//! exported by Fidelity's "Download Positions" feature. +//! +//! 2. Does NOT handle quoted fields containing commas. Fidelity's export +//! does not quote fields with commas in practice (description fields +//! use spaces, not commas), but a truly compliant RFC 4180 parser would +//! need to handle "field,with,commas" as a single value. +//! +//! 3. Does NOT handle escaped quotes ("" inside quoted fields). +//! +//! 4. Does NOT handle multi-line values (newlines inside quoted fields). +//! +//! 5. Assumes UTF-8 with optional BOM (which Fidelity includes). +//! +//! 6. Stops parsing at the first blank line, which separates position data +//! from the Fidelity legal disclaimer footer. +//! +//! 7. Hardcodes the expected column layout. If Fidelity changes the CSV +//! format (adds/removes/reorders columns), this parser will break. +//! The header row is validated to catch this. +//! +//! 8. Dollar values like "$1,234.56" and "+$1,234.56" are stripped of +//! $, commas, and leading +/- signs. Negative values wrapped in +//! parentheses are NOT handled (Fidelity uses -$X.XX format). +//! +//! 9. Money market rows (symbol ending in **) are treated as cash. +//! +//! For a production-grade CSV parser, consider a library that handles +//! RFC 4180 fully (quoted fields, escaping, multi-line values). + +const std = @import("std"); +const Date = @import("../Date.zig"); +const portfolio_mod = @import("../models/portfolio.zig"); +const types = @import("types.zig"); + +const BrokeragePosition = types.BrokeragePosition; +const parseDollarAmount = types.parseDollarAmount; +const isUnitPriceCash = types.isUnitPriceCash; + +const expected_columns = 16; + +/// Column indices in the Fidelity CSV export. +/// Based on: Account Number, Account Name, Symbol, Description, Quantity, +/// Last Price, Last Price Change, Current Value, Today's Gain/Loss Dollar, +/// Today's Gain/Loss Percent, Total Gain/Loss Dollar, Total Gain/Loss Percent, +/// Percent Of Account, Cost Basis Total, Average Cost Basis, Type +const Col = struct { + const account_number = 0; + const account_name = 1; + const symbol = 2; + const description = 3; + const quantity = 4; + const last_price = 5; + const current_value = 7; + const cost_basis_total = 13; + const avg_cost_basis = 14; + const type_col = 15; +}; + +/// Parse a Fidelity CSV positions export into BrokeragePosition slices. +/// All string fields in the returned positions are slices into `data`, +/// so the caller must keep `data` alive for as long as the positions are used. +/// Only the returned slice itself is heap-allocated (caller must free it). +pub fn parseCsv(allocator: std.mem.Allocator, data: []const u8) ![]BrokeragePosition { + var positions = std.ArrayList(BrokeragePosition).empty; + errdefer positions.deinit(allocator); + + // Skip UTF-8 BOM if present + var content = data; + if (content.len >= 3 and content[0] == 0xEF and content[1] == 0xBB and content[2] == 0xBF) { + content = content[3..]; + } + + var lines = std.mem.splitScalar(u8, content, '\n'); + + // Validate header row + const header_line = lines.next() orelse return error.EmptyFile; + const header_trimmed = std.mem.trimEnd(u8, header_line, &.{ '\r', ' ' }); + if (header_trimmed.len == 0) return error.EmptyFile; + if (!std.mem.startsWith(u8, header_trimmed, "Account Number")) { + return error.UnexpectedHeader; + } + + // Parse data rows + while (lines.next()) |line| { + const trimmed = std.mem.trimEnd(u8, line, &.{ '\r', ' ' }); + if (trimmed.len == 0) break; + + // Skip lines starting with " (disclaimer text) + if (trimmed[0] == '"') break; + + var col_iter = std.mem.splitScalar(u8, trimmed, ','); + var cols: [expected_columns][]const u8 = undefined; + var col_count: usize = 0; + while (col_iter.next()) |col| { + if (col_count < expected_columns) { + cols[col_count] = col; + col_count += 1; + } + } + if (col_count < expected_columns) continue; + + const symbol_raw = std.mem.trim(u8, cols[Col.symbol], &.{ ' ', '"' }); + if (symbol_raw.len == 0) continue; + + // Strip ** suffix from money market symbols for display + const symbol_clean = if (std.mem.endsWith(u8, symbol_raw, "**")) + symbol_raw[0 .. symbol_raw.len - 2] + else + symbol_raw; + + // Classify as cash if any of: + // - Fidelity's ** suffix marks a money-market position + // - The symbol appears in zfin's canonical money-market list + // (e.g. FDRXX, SPAXX — Fidelity omits ** for some of these) + // - price and cost both equal exactly $1.00, the catch-all for + // fixed-NAV instruments that we don't have in the list yet. + const is_cash = std.mem.endsWith(u8, symbol_raw, "**") or + portfolio_mod.isMoneyMarketSymbol(symbol_clean) or + isUnitPriceCash(cols[Col.last_price], cols[Col.avg_cost_basis]); + + try positions.append(allocator, .{ + .account_number = std.mem.trim(u8, cols[Col.account_number], &.{ ' ', '"' }), + .account_name = std.mem.trim(u8, cols[Col.account_name], &.{ ' ', '"' }), + .symbol = symbol_clean, + .description = std.mem.trim(u8, cols[Col.description], &.{ ' ', '"' }), + .quantity = if (is_cash) null else parseDollarAmount(cols[Col.quantity]), + .current_value = parseDollarAmount(cols[Col.current_value]), + .cost_basis = if (is_cash) null else parseDollarAmount(cols[Col.cost_basis_total]), + .is_cash = is_cash, + }); + } + + return positions.toOwnedSlice(allocator); +} + +/// Check if a Fidelity option symbol (e.g. "-AMZN260515C220") matches a +/// portfolio lot by comparing parsed components against the lot's structured +/// fields (underlying, maturity_date, option_type, strike). +/// +/// Fidelity format: [-]{UNDERLYING}{YYMMDD}{C|P}{STRIKE} +/// The underlying length is variable, so we scan for the first position +/// where 6 consecutive digits encode a valid date. +pub fn optionMatchesLot(symbol: []const u8, lot: portfolio_mod.Lot) bool { + if (lot.security_type != .option) return false; + + // Strip leading dash (short indicator) + const sym = if (symbol.len > 0 and symbol[0] == '-') symbol[1..] else symbol; + + // Need at least: 1 char underlying + 6 date + 1 type + 1 strike = 9 + if (sym.len < 9) return false; + + // Scan for the date boundary: first position where 6 consecutive digits + // form a valid YYMMDD (and the character before is a letter). + var i: usize = 1; // underlying is at least 1 char + while (i + 7 < sym.len) : (i += 1) { + // All 6 chars must be digits + if (!std.ascii.isDigit(sym[i]) or + !std.ascii.isDigit(sym[i + 1]) or + !std.ascii.isDigit(sym[i + 2]) or + !std.ascii.isDigit(sym[i + 3]) or + !std.ascii.isDigit(sym[i + 4]) or + !std.ascii.isDigit(sym[i + 5])) + continue; + + // Character after the 6 digits must be C or P + const type_char = sym[i + 6]; + if (type_char != 'C' and type_char != 'P') continue; + + // Parse date components + const yy = std.fmt.parseInt(i16, sym[i..][0..2], 10) catch continue; + const mm = std.fmt.parseInt(u8, sym[i + 2 ..][0..2], 10) catch continue; + const dd = std.fmt.parseInt(u8, sym[i + 4 ..][0..2], 10) catch continue; + if (mm < 1 or mm > 12 or dd < 1 or dd > 31) continue; + const year = 2000 + yy; + + // Parse components + const underlying = sym[0..i]; + const option_type: portfolio_mod.OptionType = if (type_char == 'P') .put else .call; + const strike_str = sym[i + 7 ..]; + const strike = std.fmt.parseFloat(f64, strike_str) catch continue; + const date = Date.fromYmd(year, mm, dd); + + // Match against lot fields + const lot_underlying = lot.underlying orelse return false; + const lot_maturity = lot.maturity_date orelse return false; + + if (!std.mem.eql(u8, underlying, lot_underlying)) return false; + if (!lot_maturity.eql(date)) return false; + if (option_type != lot.option_type) return false; + if (lot.strike) |ls| { + if (@abs(ls - strike) > 0.01) return false; + } else return false; + + return true; + } + return false; +} + +// ── Tests ──────────────────────────────────────────────────── + +test "parseCsv basic" { + const csv = + "\xEF\xBB\xBF" ++ // BOM + "Account Number,Account Name,Symbol,Description,Quantity,Last Price,Last Price Change,Current Value,Today's Gain/Loss Dollar,Today's Gain/Loss Percent,Total Gain/Loss Dollar,Total Gain/Loss Percent,Percent Of Account,Cost Basis Total,Average Cost Basis,Type\r\n" ++ + "Z123,Individual,FZFXX**,HELD IN MONEY MARKET,,,,$5000.00,,,,,50%,,,Cash,\r\n" ++ + "Z123,Individual,AAPL,APPLE INC,100,$150.00,+$2.00,$15000.00,+$200.00,+1.35%,+$5000.00,+50.00%,50%,$10000.00,$100.00,Margin,\r\n" ++ + "\r\n" ++ + "\"Disclaimer text\"\r\n"; + + const allocator = std.testing.allocator; + const positions = try parseCsv(allocator, csv); + defer allocator.free(positions); + + try std.testing.expectEqual(@as(usize, 2), positions.len); + + // Cash position + try std.testing.expectEqualStrings("FZFXX", positions[0].symbol); + try std.testing.expect(positions[0].is_cash); + try std.testing.expectApproxEqAbs(@as(f64, 5000.00), positions[0].current_value.?, 0.01); + try std.testing.expect(positions[0].quantity == null); + + // Stock position + try std.testing.expectEqualStrings("AAPL", positions[1].symbol); + try std.testing.expect(!positions[1].is_cash); + try std.testing.expectApproxEqAbs(@as(f64, 100), positions[1].quantity.?, 0.01); + try std.testing.expectApproxEqAbs(@as(f64, 15000.00), positions[1].current_value.?, 0.01); + try std.testing.expectApproxEqAbs(@as(f64, 10000.00), positions[1].cost_basis.?, 0.01); +} + +test "parseCsv treats $1.00 price+cost as cash" { + const csv = + "Account Number,Account Name,Symbol,Description,Quantity,Last Price,Last Price Change,Current Value,Today's Gain/Loss Dollar,Today's Gain/Loss Percent,Total Gain/Loss Dollar,Total Gain/Loss Percent,Percent Of Account,Cost Basis Total,Average Cost Basis,Type\n" ++ + "Z123,Individual,FDRXX,FID GOV CASH RESERVE,8500,$1.00,,$8500.00,,,,,10%,$8500.00,$1.00,Cash,\n"; + + const allocator = std.testing.allocator; + const positions = try parseCsv(allocator, csv); + defer allocator.free(positions); + + try std.testing.expectEqual(@as(usize, 1), positions.len); + try std.testing.expectEqualStrings("FDRXX", positions[0].symbol); + try std.testing.expect(positions[0].is_cash); + try std.testing.expect(positions[0].quantity == null); + try std.testing.expectApproxEqAbs(@as(f64, 8500.00), positions[0].current_value.?, 0.01); +} + +test "parseCsv stops at blank line" { + const csv = + "Account Number,Account Name,Symbol,Description,Quantity,Last Price,Last Price Change,Current Value,Today's Gain/Loss Dollar,Today's Gain/Loss Percent,Total Gain/Loss Dollar,Total Gain/Loss Percent,Percent Of Account,Cost Basis Total,Average Cost Basis,Type\n" ++ + "Z123,Individual,AAPL,APPLE INC,50,$150.00,+$2.00,$7500.00,+$100.00,+1.35%,+$2500.00,+50.00%,100%,$5000.00,$100.00,Margin,\n" ++ + "\n" ++ + "Z123,Individual,MSFT,SHOULD NOT APPEAR,10,$300.00,,,$3000.00,,,,,,,,\n"; + + const allocator = std.testing.allocator; + const positions = try parseCsv(allocator, csv); + defer allocator.free(positions); + + try std.testing.expectEqual(@as(usize, 1), positions.len); + try std.testing.expectEqualStrings("AAPL", positions[0].symbol); +} + +test "parseCsv option row" { + const csv = + "Account Number,Account Name,Symbol,Description,Quantity,Last Price,Last Price Change,Current Value,Today's Gain/Loss Dollar,Today's Gain/Loss Percent,Total Gain/Loss Dollar,Total Gain/Loss Percent,Percent Of Account,Cost Basis Total,Average Cost Basis,Type\n" ++ + "Z123,Individual, -AMZN260515C220,AMZN MAY 15 2026 $220 CALL,-3,$21.00,+$8.60,-$6300.00,-$2580.00,-69.36%,-$3674.02,-139.92%,-8.85%,$2625.98,$8.75,Margin,\n"; + + const allocator = std.testing.allocator; + const positions = try parseCsv(allocator, csv); + defer allocator.free(positions); + + try std.testing.expectEqual(@as(usize, 1), positions.len); + try std.testing.expectEqualStrings("-AMZN260515C220", positions[0].symbol); + try std.testing.expectApproxEqAbs(@as(f64, -3), positions[0].quantity.?, 0.01); + try std.testing.expectApproxEqAbs(@as(f64, -6300.00), positions[0].current_value.?, 0.01); +} + +test "parseCsv empty file" { + const allocator = std.testing.allocator; + const result = parseCsv(allocator, ""); + try std.testing.expectError(error.EmptyFile, result); +} + +test "parseCsv wrong header" { + const allocator = std.testing.allocator; + const result = parseCsv(allocator, "Wrong,Header,Format\n"); + try std.testing.expectError(error.UnexpectedHeader, result); +} + +test "parseCsv cash account type is not cash position" { + // Fidelity's Type column says "Cash" for cash-account positions (vs "Margin"). + // This does NOT mean the security is a cash holding — only ** suffix means that. + const csv = + "Account Number,Account Name,Symbol,Description,Quantity,Last Price,Last Price Change,Current Value,Today's Gain/Loss Dollar,Today's Gain/Loss Percent,Total Gain/Loss Dollar,Total Gain/Loss Percent,Percent Of Account,Cost Basis Total,Average Cost Basis,Type\n" ++ + "X99,HSA,QTUM,DEFIANCE QUANTUM ETF,190,$116.14,+$0.31,$22066.60,+$58.90,+0.26%,+$1185.60,+5.67%,99.64%,$20881.00,$109.90,Cash,\n"; + + const allocator = std.testing.allocator; + const positions = try parseCsv(allocator, csv); + defer allocator.free(positions); + + try std.testing.expectEqual(@as(usize, 1), positions.len); + try std.testing.expectEqualStrings("QTUM", positions[0].symbol); + try std.testing.expect(!positions[0].is_cash); + try std.testing.expectApproxEqAbs(@as(f64, 190), positions[0].quantity.?, 0.01); +} + +test "optionMatchesLot basic call" { + const lot = portfolio_mod.Lot{ + .symbol = "AMZN 05/15/2026 220.00 C", + .security_type = .option, + .underlying = "AMZN", + .strike = 220.0, + .option_type = .call, + .maturity_date = Date.fromYmd(2026, 5, 15), + .shares = -3, + .open_date = Date.fromYmd(2025, 1, 1), + .open_price = 8.75, + }; + + // Fidelity format with leading dash (short) + try std.testing.expect(optionMatchesLot("-AMZN260515C220", lot)); + // Without dash + try std.testing.expect(optionMatchesLot("AMZN260515C220", lot)); + // Wrong underlying + try std.testing.expect(!optionMatchesLot("-MSFT260515C220", lot)); + // Wrong date + try std.testing.expect(!optionMatchesLot("-AMZN260615C220", lot)); + // Wrong type + try std.testing.expect(!optionMatchesLot("-AMZN260515P220", lot)); + // Wrong strike + try std.testing.expect(!optionMatchesLot("-AMZN260515C230", lot)); + // Non-option lot + const stock_lot = portfolio_mod.Lot{ .symbol = "AMZN", .security_type = .stock, .shares = 100, .open_date = Date.fromYmd(2025, 1, 1), .open_price = 100 }; + try std.testing.expect(!optionMatchesLot("-AMZN260515C220", stock_lot)); +} + +test "optionMatchesLot put option and decimal strike" { + const lot = portfolio_mod.Lot{ + .symbol = "AAPL 06/20/2026 220.50 P", + .security_type = .option, + .underlying = "AAPL", + .strike = 220.50, + .option_type = .put, + .maturity_date = Date.fromYmd(2026, 6, 20), + .shares = -1, + .open_date = Date.fromYmd(2025, 1, 1), + .open_price = 5.0, + }; + + try std.testing.expect(optionMatchesLot("-AAPL260620P220.50", lot)); + try std.testing.expect(optionMatchesLot("AAPL260620P220.50", lot)); + // Call doesn't match put + try std.testing.expect(!optionMatchesLot("-AAPL260620C220.50", lot)); +} + +test "optionMatchesLot single-char underlying" { + const lot = portfolio_mod.Lot{ + .symbol = "A 03/20/2026 150.00 C", + .security_type = .option, + .underlying = "A", + .strike = 150.0, + .option_type = .call, + .maturity_date = Date.fromYmd(2026, 3, 20), + .shares = -2, + .open_date = Date.fromYmd(2025, 1, 1), + .open_price = 3.0, + }; + + try std.testing.expect(optionMatchesLot("-A260320C150", lot)); + try std.testing.expect(!optionMatchesLot("-A260320P150", lot)); +} diff --git a/src/brokerage/schwab.zig b/src/brokerage/schwab.zig new file mode 100644 index 0000000..c7c279c --- /dev/null +++ b/src/brokerage/schwab.zig @@ -0,0 +1,423 @@ +//! Schwab export parsers. +//! +//! Parses two distinct Schwab inputs: +//! +//! 1. The per-account positions CSV exported from Schwab's website +//! (Accounts → Positions → Export). One file per account. +//! +//! 2. The freeform account-summary text the user pastes from +//! Schwab's Accounts overview page. One paste covers all +//! accounts at once but only carries cash + total-value +//! aggregates, no per-position detail. +//! +//! ## Schwab CSV — limitations +//! +//! 1. NOT a general-purpose CSV parser. Handles Schwab's specific export +//! format where every field is double-quoted. +//! +//! 2. Handles simple quoted fields ("value") but does NOT handle escaped +//! quotes ("value with ""quotes"" inside") or multi-line quoted fields. +//! Schwab's export does not use these in practice. +//! +//! 3. The account number and name are extracted from the title line: +//! "Positions for account ... as of ..." +//! +//! 4. Rows with symbol "Cash & Cash Investments" are treated as cash. +//! The row with symbol "Positions Total" is skipped. +//! +//! 5. Hardcodes the expected column layout. If Schwab changes the CSV +//! format, this parser will break. The header row is not validated +//! beyond being skipped. +//! +//! ## Schwab summary — limitations +//! +//! The expected paste format is repeating blocks of 2-3 lines per +//! account: +//! +//! Account Name +//! Account number ending in NNN ...NNN +//! Type IRA $46.44 $227,058.15 +$1,072.88 +0.47% +//! +//! 1. NOT a CSV parser — parses freeform text pasted from the Schwab UI. +//! +//! 2. Identifies account blocks by the "Account number ending in" line. +//! The account name is the non-empty line immediately before it. +//! +//! 3. The values line (cash, total, change, pct) is identified by finding +//! dollar amounts. It tolerates missing or extra fields — it looks for +//! the first two dollar amounts as cash and total value. +//! +//! 4. Skips summary lines like "Investment Total", "Day Change Total", +//! and "Day Change Percent Total" which appear at the end of the paste. +//! +//! 5. Tolerant of partial pastes: if the user copies headers once but +//! not on subsequent pastes, or includes extra blank lines, the parser +//! still finds account blocks by the "Account number ending in" anchor. +//! +//! 6. The account number is extracted from "...NNN" at the end of the +//! account number line (the last whitespace-separated token). + +const std = @import("std"); +const portfolio_mod = @import("../models/portfolio.zig"); +const types = @import("types.zig"); + +const BrokeragePosition = types.BrokeragePosition; +const parseDollarAmount = types.parseDollarAmount; + +const expected_columns = 17; + +const Col = struct { + const symbol = 0; + const price = 4; + const quantity = 5; + const market_value = 8; + const cost_basis = 9; + const asset_type = 16; +}; + +/// Split a Schwab CSV line on commas, stripping surrounding quotes from each field. +/// Returns the number of columns parsed. Fields are slices into the input line. +fn splitCsvLine(line: []const u8, cols: *[expected_columns][]const u8) usize { + var col_count: usize = 0; + var pos: usize = 0; + while (pos < line.len and col_count < expected_columns) { + if (line[pos] == '"') { + // Quoted field: find closing quote + const start = pos + 1; + pos = start; + while (pos < line.len and line[pos] != '"') : (pos += 1) {} + cols[col_count] = line[start..pos]; + col_count += 1; + if (pos < line.len) pos += 1; // skip closing quote + if (pos < line.len and line[pos] == ',') pos += 1; // skip comma + } else if (line[pos] == ',') { + cols[col_count] = ""; + col_count += 1; + pos += 1; + } else { + // Unquoted field + const start = pos; + while (pos < line.len and line[pos] != ',') : (pos += 1) {} + cols[col_count] = line[start..pos]; + col_count += 1; + if (pos < line.len) pos += 1; // skip comma + } + } + return col_count; +} + +/// Extract account name and number from Schwab title line. +/// Format: "Positions for account ... as of