zfin/src/brokerage/schwab.zig
2026-05-23 11:25:39 -07:00

423 lines
17 KiB
Zig

//! Schwab export parsers.
//!
//! Parses two distinct Schwab inputs:
//!
//! 1. The per-account positions CSV exported from Schwab's website
//! (Accounts → Positions → Export). One file per account.
//!
//! 2. The freeform account-summary text the user pastes from
//! Schwab's Accounts overview page. One paste covers all
//! accounts at once but only carries cash + total-value
//! aggregates, no per-position detail.
//!
//! ## Schwab CSV — limitations
//!
//! 1. NOT a general-purpose CSV parser. Handles Schwab's specific export
//! format where every field is double-quoted.
//!
//! 2. Handles simple quoted fields ("value") but does NOT handle escaped
//! quotes ("value with ""quotes"" inside") or multi-line quoted fields.
//! Schwab's export does not use these in practice.
//!
//! 3. The account number and name are extracted from the title line:
//! "Positions for account <NAME> ...<NUM> as of ..."
//!
//! 4. Rows with symbol "Cash & Cash Investments" are treated as cash.
//! The row with symbol "Positions Total" is skipped.
//!
//! 5. Hardcodes the expected column layout. If Schwab changes the CSV
//! format, this parser will break. The header row is not validated
//! beyond being skipped.
//!
//! ## Schwab summary — limitations
//!
//! The expected paste format is repeating blocks of 2-3 lines per
//! account:
//!
//! Account Name
//! Account number ending in NNN ...NNN
//! Type IRA $46.44 $227,058.15 +$1,072.88 +0.47%
//!
//! 1. NOT a CSV parser — parses freeform text pasted from the Schwab UI.
//!
//! 2. Identifies account blocks by the "Account number ending in" line.
//! The account name is the non-empty line immediately before it.
//!
//! 3. The values line (cash, total, change, pct) is identified by finding
//! dollar amounts. It tolerates missing or extra fields — it looks for
//! the first two dollar amounts as cash and total value.
//!
//! 4. Skips summary lines like "Investment Total", "Day Change Total",
//! and "Day Change Percent Total" which appear at the end of the paste.
//!
//! 5. Tolerant of partial pastes: if the user copies headers once but
//! not on subsequent pastes, or includes extra blank lines, the parser
//! still finds account blocks by the "Account number ending in" anchor.
//!
//! 6. The account number is extracted from "...NNN" at the end of the
//! account number line (the last whitespace-separated token).
const std = @import("std");
const portfolio_mod = @import("../models/portfolio.zig");
const types = @import("types.zig");
const BrokeragePosition = types.BrokeragePosition;
const parseDollarAmount = types.parseDollarAmount;
const expected_columns = 17;
const Col = struct {
const symbol = 0;
const price = 4;
const quantity = 5;
const market_value = 8;
const cost_basis = 9;
const asset_type = 16;
};
/// Split a Schwab CSV line on commas, stripping surrounding quotes from each field.
/// Returns the number of columns parsed. Fields are slices into the input line.
fn splitCsvLine(line: []const u8, cols: *[expected_columns][]const u8) usize {
var col_count: usize = 0;
var pos: usize = 0;
while (pos < line.len and col_count < expected_columns) {
if (line[pos] == '"') {
// Quoted field: find closing quote
const start = pos + 1;
pos = start;
while (pos < line.len and line[pos] != '"') : (pos += 1) {}
cols[col_count] = line[start..pos];
col_count += 1;
if (pos < line.len) pos += 1; // skip closing quote
if (pos < line.len and line[pos] == ',') pos += 1; // skip comma
} else if (line[pos] == ',') {
cols[col_count] = "";
col_count += 1;
pos += 1;
} else {
// Unquoted field
const start = pos;
while (pos < line.len and line[pos] != ',') : (pos += 1) {}
cols[col_count] = line[start..pos];
col_count += 1;
if (pos < line.len) pos += 1; // skip comma
}
}
return col_count;
}
/// Extract account name and number from Schwab title line.
/// Format: "Positions for account <NAME> ...<NUM> as of <TIME>, <DATE>"
/// Returns {name, number} or null if the line doesn't match.
fn parseTitle(line: []const u8) ?struct { name: []const u8, number: []const u8 } {
const stripped = std.mem.trim(u8, line, &.{ '"', ' ', '\r' });
const prefix = "Positions for account ";
if (!std.mem.startsWith(u8, stripped, prefix)) return null;
const rest = stripped[prefix.len..];
// Find "..." which separates name from account number
const dots_idx = std.mem.indexOf(u8, rest, "...") orelse return null;
const name = std.mem.trimEnd(u8, rest[0..dots_idx], &.{' '});
// Account number: after "..." until " as of" or end
const after_dots = rest[dots_idx + 3 ..];
const as_of_idx = std.mem.indexOf(u8, after_dots, " as of") orelse after_dots.len;
const number = std.mem.trim(u8, after_dots[0..as_of_idx], &.{' '});
return .{ .name = name, .number = number };
}
/// Parsed Schwab per-account positions CSV. Returned `positions`
/// slice is heap-allocated; string fields slice into `data`.
pub const CsvResult = struct {
positions: []BrokeragePosition,
account_name: []const u8,
account_number: []const u8,
};
pub fn parseCsv(allocator: std.mem.Allocator, data: []const u8) !CsvResult {
var positions = std.ArrayList(BrokeragePosition).empty;
errdefer positions.deinit(allocator);
var lines = std.mem.splitScalar(u8, data, '\n');
// Line 1: title with account name and number
const title_line = lines.next() orelse return error.EmptyFile;
const title = parseTitle(title_line) orelse return error.UnexpectedHeader;
// Line 2: blank (skip)
_ = lines.next();
// Line 3: header row (skip)
_ = lines.next();
// Data rows
while (lines.next()) |line| {
const trimmed = std.mem.trimEnd(u8, line, &.{ '\r', ' ' });
if (trimmed.len == 0) continue;
var cols: [expected_columns][]const u8 = undefined;
const col_count = splitCsvLine(trimmed, &cols);
if (col_count < expected_columns) continue;
const symbol = cols[Col.symbol];
if (symbol.len == 0) continue;
if (std.mem.eql(u8, symbol, "Positions Total")) continue;
// "Cash & Cash Investments" is Schwab's aggregate cash line.
// Actual money-market holdings (SWVXX, etc.) appear as normal rows
// with their real ticker and price — treat those as cash too so
// the reconciliation matches what brokerage users think of as
// "cash" in the account.
const is_cash = std.mem.eql(u8, symbol, "Cash & Cash Investments") or
portfolio_mod.isMoneyMarketSymbol(symbol);
try positions.append(allocator, .{
.account_number = title.number,
.account_name = title.name,
.symbol = symbol,
.description = if (col_count > 1) cols[1] else "",
.quantity = if (is_cash) null else parseDollarAmount(cols[Col.quantity]),
.current_value = parseDollarAmount(cols[Col.market_value]),
.cost_basis = if (is_cash) null else parseDollarAmount(cols[Col.cost_basis]),
.is_cash = is_cash,
});
}
return .{
.positions = try positions.toOwnedSlice(allocator),
.account_name = title.name,
.account_number = title.number,
};
}
/// Account-level summary from a Schwab paste (no per-position detail).
pub const AccountSummary = struct {
account_name: []const u8,
account_number: []const u8,
cash: ?f64,
total_value: ?f64,
};
/// Parse Schwab account summary from pasted text.
/// All string fields in the returned summaries are slices into `data`.
/// Only the returned slice itself is heap-allocated (caller must free it).
pub fn parseSummary(allocator: std.mem.Allocator, data: []const u8) ![]AccountSummary {
var accounts = std.ArrayList(AccountSummary).empty;
errdefer accounts.deinit(allocator);
// Collect all lines, trimmed
var all_lines = std.ArrayList([]const u8).empty;
defer all_lines.deinit(allocator);
var line_iter = std.mem.splitScalar(u8, data, '\n');
while (line_iter.next()) |line| {
const trimmed = std.mem.trim(u8, line, &.{ '\r', ' ', '\t' });
try all_lines.append(allocator, trimmed);
}
const lines = all_lines.items;
// Scan for "Account number ending in" anchors
for (lines, 0..) |line, i| {
if (!std.mem.startsWith(u8, line, "Account number ending in")) continue;
// Extract account number: last token on the line (e.g. "...901" -> "901")
var acct_num: []const u8 = "";
var tok_iter = std.mem.tokenizeAny(u8, line, &.{ ' ', '\t' });
while (tok_iter.next()) |tok| {
acct_num = tok;
}
// Strip leading dots
while (acct_num.len > 0 and acct_num[0] == '.') {
acct_num = acct_num[1..];
}
// Account name: nearest non-empty line before the anchor
var acct_name: []const u8 = "";
if (i > 0) {
var j: usize = i - 1;
while (true) {
if (lines[j].len > 0 and
!std.mem.startsWith(u8, lines[j], "Account number") and
!std.mem.startsWith(u8, lines[j], "Investment Total") and
!std.mem.startsWith(u8, lines[j], "Day Change"))
{
acct_name = lines[j];
break;
}
if (j == 0) break;
j -= 1;
}
}
// Values line: look at lines after the anchor for dollar amounts.
// The format is "Type XXX $CASH $TOTAL +$CHANGE +PCT%"
// We want the first two dollar amounts (cash and total).
var cash: ?f64 = null;
var total: ?f64 = null;
if (i + 1 < lines.len) {
var dollar_values = std.ArrayList(f64).empty;
defer dollar_values.deinit(allocator);
var val_iter = std.mem.tokenizeAny(u8, lines[i + 1], &.{ ' ', '\t' });
while (val_iter.next()) |tok| {
if (parseDollarAmount(tok)) |v| {
try dollar_values.append(allocator, v);
}
}
if (dollar_values.items.len >= 2) {
cash = dollar_values.items[0];
total = dollar_values.items[1];
} else if (dollar_values.items.len == 1) {
total = dollar_values.items[0];
}
}
try accounts.append(allocator, .{
.account_name = acct_name,
.account_number = acct_num,
.cash = cash,
.total_value = total,
});
}
if (accounts.items.len == 0) return error.NoAccountsFound;
return accounts.toOwnedSlice(allocator);
}
// ── Tests ────────────────────────────────────────────────────
test "parseTitle" {
const t1 = parseTitle("\"Positions for account Sample Trust ...1234 as of 10:47 AM ET, 2026/04/10\"");
try std.testing.expect(t1 != null);
try std.testing.expectEqualStrings("Sample Trust", t1.?.name);
try std.testing.expectEqualStrings("1234", t1.?.number);
const t2 = parseTitle("\"Positions for account Sample IRA ...5678 as of 3:00 PM ET, 2026/04/10\"");
try std.testing.expect(t2 != null);
try std.testing.expectEqualStrings("Sample IRA", t2.?.name);
try std.testing.expectEqualStrings("5678", t2.?.number);
try std.testing.expect(parseTitle("some random text") == null);
}
test "splitCsvLine" {
var cols: [expected_columns][]const u8 = undefined;
const n = splitCsvLine("\"AMZN\",\"AMAZON.COM INC\",\"5.558\",\"2.38%\",\"239.208\",\"1,488\",\"$8,270.30\",\"2.38%\",\"$355,941.50\",\"$110,243.38\",\"$245,698.12\",\"222.87%\",\"C\",\"No\",\"N/A\",\"41.54%\",\"Equity\",", &cols);
try std.testing.expectEqual(@as(usize, 17), n);
try std.testing.expectEqualStrings("AMZN", cols[0]);
try std.testing.expectEqualStrings("AMAZON.COM INC", cols[1]);
try std.testing.expectEqualStrings("1,488", cols[5]);
try std.testing.expectEqualStrings("$355,941.50", cols[8]);
try std.testing.expectEqualStrings("Equity", cols[16]);
}
test "parseCsv basic" {
const csv =
"\"Positions for account Sample Trust ...1234 as of 10:47 AM ET, 2026/04/10\"\n" ++
"\n" ++
"\"Symbol\",\"Description\",\"Price Chng $\",\"Price Chng %\",\"Price\",\"Qty\",\"Day Chng $\",\"Day Chng %\",\"Mkt Val\",\"Cost Basis\",\"Gain $\",\"Gain %\",\"Ratings\",\"Reinvest?\",\"Reinvest Capital Gains?\",\"% of Acct\",\"Asset Type\",\n" ++
"\"AMZN\",\"AMAZON.COM INC\",\"5.558\",\"2.38%\",\"239.208\",\"1,488\",\"$8,270.30\",\"2.38%\",\"$355,941.50\",\"$110,243.38\",\"$245,698.12\",\"222.87%\",\"C\",\"No\",\"N/A\",\"41.54%\",\"Equity\",\n" ++
"\"Cash & Cash Investments\",\"--\",\"--\",\"--\",\"--\",\"--\",\"$0.00\",\"0%\",\"$8,271.12\",\"--\",\"--\",\"--\",\"--\",\"--\",\"--\",\"0.97%\",\"Cash and Money Market\",\n" ++
"\"Positions Total\",\"\",\"--\",\"--\",\"--\",\"--\",\"$7,718.87\",\"0.9%\",\"$856,805.99\",\"$348,440.61\",\"$500,094.26\",\"143.52%\",\"--\",\"--\",\"--\",\"--\",\"--\",\n";
const allocator = std.testing.allocator;
const parsed = try parseCsv(allocator, csv);
defer allocator.free(parsed.positions);
try std.testing.expectEqualStrings("Sample Trust", parsed.account_name);
try std.testing.expectEqualStrings("1234", parsed.account_number);
try std.testing.expectEqual(@as(usize, 2), parsed.positions.len);
// Stock position
try std.testing.expectEqualStrings("AMZN", parsed.positions[0].symbol);
try std.testing.expect(!parsed.positions[0].is_cash);
try std.testing.expectApproxEqAbs(@as(f64, 1488), parsed.positions[0].quantity.?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 355941.50), parsed.positions[0].current_value.?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 110243.38), parsed.positions[0].cost_basis.?, 0.01);
// Cash
try std.testing.expectEqualStrings("Cash & Cash Investments", parsed.positions[1].symbol);
try std.testing.expect(parsed.positions[1].is_cash);
try std.testing.expectApproxEqAbs(@as(f64, 8271.12), parsed.positions[1].current_value.?, 0.01);
try std.testing.expect(parsed.positions[1].quantity == null);
}
test "parseSummary basic" {
const data =
\\Sample Roth
\\Account number ending in 1234 ...1234
\\Type IRA $46.44 $227,058.15 +$1,072.88 +0.47%
\\Inherited IRA
\\Account number ending in 5678 ...5678
\\Type IRA $2,461.82 $167,544.08 +$1,208.34 +0.73%
;
const allocator = std.testing.allocator;
const accounts = try parseSummary(allocator, data);
defer allocator.free(accounts);
try std.testing.expectEqual(@as(usize, 2), accounts.len);
try std.testing.expectEqualStrings("Sample Roth", accounts[0].account_name);
try std.testing.expectEqualStrings("1234", accounts[0].account_number);
try std.testing.expectApproxEqAbs(@as(f64, 46.44), accounts[0].cash.?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 227058.15), accounts[0].total_value.?, 0.01);
try std.testing.expectEqualStrings("Inherited IRA", accounts[1].account_name);
try std.testing.expectEqualStrings("5678", accounts[1].account_number);
try std.testing.expectApproxEqAbs(@as(f64, 2461.82), accounts[1].cash.?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 167544.08), accounts[1].total_value.?, 0.01);
}
test "parseSummary tolerates missing headers and extra blank lines" {
const data =
\\
\\Sample Trust
\\Account number ending in 1234 ...1234
\\Type Brokerage $8,271.12 $849,087.12 +$20,488.80 +2.47%
\\
\\Tax Loss
\\Account number ending in 5678 ...5678
\\$4,654.15 $488,481.18 +$1,686.91 +0.35%
;
const allocator = std.testing.allocator;
const accounts = try parseSummary(allocator, data);
defer allocator.free(accounts);
try std.testing.expectEqual(@as(usize, 2), accounts.len);
try std.testing.expectEqualStrings("Sample Trust", accounts[0].account_name);
try std.testing.expectEqualStrings("1234", accounts[0].account_number);
// Second account has no "Type" prefix — parser still finds dollar amounts
try std.testing.expectEqualStrings("Tax Loss", accounts[1].account_name);
try std.testing.expectApproxEqAbs(@as(f64, 4654.15), accounts[1].cash.?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 488481.18), accounts[1].total_value.?, 0.01);
}
test "parseSummary skips summary footer" {
const data =
\\Sample Account
\\Account number ending in 1234 ...1234
\\Type Brokerage $3,492.85 $161,676.14 +$749.40 +0.47%
\\Investment Total
\\$22,070.35
\\$4,338,116.38
\\Day Change Total
\\+$31,633.86
;
const allocator = std.testing.allocator;
const accounts = try parseSummary(allocator, data);
defer allocator.free(accounts);
try std.testing.expectEqual(@as(usize, 1), accounts.len);
try std.testing.expectEqualStrings("Sample Account", accounts[0].account_name);
}
test "parseSummary no accounts" {
const allocator = std.testing.allocator;
const result = parseSummary(allocator, "some random text\nno accounts here\n");
try std.testing.expectError(error.NoAccountsFound, result);
}