423 lines
17 KiB
Zig
423 lines
17 KiB
Zig
//! Schwab export parsers.
|
|
//!
|
|
//! Parses two distinct Schwab inputs:
|
|
//!
|
|
//! 1. The per-account positions CSV exported from Schwab's website
|
|
//! (Accounts → Positions → Export). One file per account.
|
|
//!
|
|
//! 2. The freeform account-summary text the user pastes from
|
|
//! Schwab's Accounts overview page. One paste covers all
|
|
//! accounts at once but only carries cash + total-value
|
|
//! aggregates, no per-position detail.
|
|
//!
|
|
//! ## Schwab CSV — limitations
|
|
//!
|
|
//! 1. NOT a general-purpose CSV parser. Handles Schwab's specific export
|
|
//! format where every field is double-quoted.
|
|
//!
|
|
//! 2. Handles simple quoted fields ("value") but does NOT handle escaped
|
|
//! quotes ("value with ""quotes"" inside") or multi-line quoted fields.
|
|
//! Schwab's export does not use these in practice.
|
|
//!
|
|
//! 3. The account number and name are extracted from the title line:
|
|
//! "Positions for account <NAME> ...<NUM> as of ..."
|
|
//!
|
|
//! 4. Rows with symbol "Cash & Cash Investments" are treated as cash.
|
|
//! The row with symbol "Positions Total" is skipped.
|
|
//!
|
|
//! 5. Hardcodes the expected column layout. If Schwab changes the CSV
|
|
//! format, this parser will break. The header row is not validated
|
|
//! beyond being skipped.
|
|
//!
|
|
//! ## Schwab summary — limitations
|
|
//!
|
|
//! The expected paste format is repeating blocks of 2-3 lines per
|
|
//! account:
|
|
//!
|
|
//! Account Name
|
|
//! Account number ending in NNN ...NNN
|
|
//! Type IRA $46.44 $227,058.15 +$1,072.88 +0.47%
|
|
//!
|
|
//! 1. NOT a CSV parser — parses freeform text pasted from the Schwab UI.
|
|
//!
|
|
//! 2. Identifies account blocks by the "Account number ending in" line.
|
|
//! The account name is the non-empty line immediately before it.
|
|
//!
|
|
//! 3. The values line (cash, total, change, pct) is identified by finding
|
|
//! dollar amounts. It tolerates missing or extra fields — it looks for
|
|
//! the first two dollar amounts as cash and total value.
|
|
//!
|
|
//! 4. Skips summary lines like "Investment Total", "Day Change Total",
|
|
//! and "Day Change Percent Total" which appear at the end of the paste.
|
|
//!
|
|
//! 5. Tolerant of partial pastes: if the user copies headers once but
|
|
//! not on subsequent pastes, or includes extra blank lines, the parser
|
|
//! still finds account blocks by the "Account number ending in" anchor.
|
|
//!
|
|
//! 6. The account number is extracted from "...NNN" at the end of the
|
|
//! account number line (the last whitespace-separated token).
|
|
|
|
const std = @import("std");
|
|
const portfolio_mod = @import("../models/portfolio.zig");
|
|
const types = @import("types.zig");
|
|
|
|
const BrokeragePosition = types.BrokeragePosition;
|
|
const parseDollarAmount = types.parseDollarAmount;
|
|
|
|
const expected_columns = 17;
|
|
|
|
const Col = struct {
|
|
const symbol = 0;
|
|
const price = 4;
|
|
const quantity = 5;
|
|
const market_value = 8;
|
|
const cost_basis = 9;
|
|
const asset_type = 16;
|
|
};
|
|
|
|
/// Split a Schwab CSV line on commas, stripping surrounding quotes from each field.
|
|
/// Returns the number of columns parsed. Fields are slices into the input line.
|
|
fn splitCsvLine(line: []const u8, cols: *[expected_columns][]const u8) usize {
|
|
var col_count: usize = 0;
|
|
var pos: usize = 0;
|
|
while (pos < line.len and col_count < expected_columns) {
|
|
if (line[pos] == '"') {
|
|
// Quoted field: find closing quote
|
|
const start = pos + 1;
|
|
pos = start;
|
|
while (pos < line.len and line[pos] != '"') : (pos += 1) {}
|
|
cols[col_count] = line[start..pos];
|
|
col_count += 1;
|
|
if (pos < line.len) pos += 1; // skip closing quote
|
|
if (pos < line.len and line[pos] == ',') pos += 1; // skip comma
|
|
} else if (line[pos] == ',') {
|
|
cols[col_count] = "";
|
|
col_count += 1;
|
|
pos += 1;
|
|
} else {
|
|
// Unquoted field
|
|
const start = pos;
|
|
while (pos < line.len and line[pos] != ',') : (pos += 1) {}
|
|
cols[col_count] = line[start..pos];
|
|
col_count += 1;
|
|
if (pos < line.len) pos += 1; // skip comma
|
|
}
|
|
}
|
|
return col_count;
|
|
}
|
|
|
|
/// Extract account name and number from Schwab title line.
|
|
/// Format: "Positions for account <NAME> ...<NUM> as of <TIME>, <DATE>"
|
|
/// Returns {name, number} or null if the line doesn't match.
|
|
fn parseTitle(line: []const u8) ?struct { name: []const u8, number: []const u8 } {
|
|
const stripped = std.mem.trim(u8, line, &.{ '"', ' ', '\r' });
|
|
const prefix = "Positions for account ";
|
|
if (!std.mem.startsWith(u8, stripped, prefix)) return null;
|
|
const rest = stripped[prefix.len..];
|
|
|
|
// Find "..." which separates name from account number
|
|
const dots_idx = std.mem.indexOf(u8, rest, "...") orelse return null;
|
|
const name = std.mem.trimEnd(u8, rest[0..dots_idx], &.{' '});
|
|
|
|
// Account number: after "..." until " as of" or end
|
|
const after_dots = rest[dots_idx + 3 ..];
|
|
const as_of_idx = std.mem.indexOf(u8, after_dots, " as of") orelse after_dots.len;
|
|
const number = std.mem.trim(u8, after_dots[0..as_of_idx], &.{' '});
|
|
|
|
return .{ .name = name, .number = number };
|
|
}
|
|
|
|
/// Parsed Schwab per-account positions CSV. Returned `positions`
|
|
/// slice is heap-allocated; string fields slice into `data`.
|
|
pub const CsvResult = struct {
|
|
positions: []BrokeragePosition,
|
|
account_name: []const u8,
|
|
account_number: []const u8,
|
|
};
|
|
|
|
pub fn parseCsv(allocator: std.mem.Allocator, data: []const u8) !CsvResult {
|
|
var positions = std.ArrayList(BrokeragePosition).empty;
|
|
errdefer positions.deinit(allocator);
|
|
|
|
var lines = std.mem.splitScalar(u8, data, '\n');
|
|
|
|
// Line 1: title with account name and number
|
|
const title_line = lines.next() orelse return error.EmptyFile;
|
|
const title = parseTitle(title_line) orelse return error.UnexpectedHeader;
|
|
|
|
// Line 2: blank (skip)
|
|
_ = lines.next();
|
|
// Line 3: header row (skip)
|
|
_ = lines.next();
|
|
|
|
// Data rows
|
|
while (lines.next()) |line| {
|
|
const trimmed = std.mem.trimEnd(u8, line, &.{ '\r', ' ' });
|
|
if (trimmed.len == 0) continue;
|
|
|
|
var cols: [expected_columns][]const u8 = undefined;
|
|
const col_count = splitCsvLine(trimmed, &cols);
|
|
if (col_count < expected_columns) continue;
|
|
|
|
const symbol = cols[Col.symbol];
|
|
if (symbol.len == 0) continue;
|
|
if (std.mem.eql(u8, symbol, "Positions Total")) continue;
|
|
|
|
// "Cash & Cash Investments" is Schwab's aggregate cash line.
|
|
// Actual money-market holdings (SWVXX, etc.) appear as normal rows
|
|
// with their real ticker and price — treat those as cash too so
|
|
// the reconciliation matches what brokerage users think of as
|
|
// "cash" in the account.
|
|
const is_cash = std.mem.eql(u8, symbol, "Cash & Cash Investments") or
|
|
portfolio_mod.isMoneyMarketSymbol(symbol);
|
|
|
|
try positions.append(allocator, .{
|
|
.account_number = title.number,
|
|
.account_name = title.name,
|
|
.symbol = symbol,
|
|
.description = if (col_count > 1) cols[1] else "",
|
|
.quantity = if (is_cash) null else parseDollarAmount(cols[Col.quantity]),
|
|
.current_value = parseDollarAmount(cols[Col.market_value]),
|
|
.cost_basis = if (is_cash) null else parseDollarAmount(cols[Col.cost_basis]),
|
|
.is_cash = is_cash,
|
|
});
|
|
}
|
|
|
|
return .{
|
|
.positions = try positions.toOwnedSlice(allocator),
|
|
.account_name = title.name,
|
|
.account_number = title.number,
|
|
};
|
|
}
|
|
|
|
/// Account-level summary from a Schwab paste (no per-position detail).
|
|
pub const AccountSummary = struct {
|
|
account_name: []const u8,
|
|
account_number: []const u8,
|
|
cash: ?f64,
|
|
total_value: ?f64,
|
|
};
|
|
|
|
/// Parse Schwab account summary from pasted text.
|
|
/// All string fields in the returned summaries are slices into `data`.
|
|
/// Only the returned slice itself is heap-allocated (caller must free it).
|
|
pub fn parseSummary(allocator: std.mem.Allocator, data: []const u8) ![]AccountSummary {
|
|
var accounts = std.ArrayList(AccountSummary).empty;
|
|
errdefer accounts.deinit(allocator);
|
|
|
|
// Collect all lines, trimmed
|
|
var all_lines = std.ArrayList([]const u8).empty;
|
|
defer all_lines.deinit(allocator);
|
|
|
|
var line_iter = std.mem.splitScalar(u8, data, '\n');
|
|
while (line_iter.next()) |line| {
|
|
const trimmed = std.mem.trim(u8, line, &.{ '\r', ' ', '\t' });
|
|
try all_lines.append(allocator, trimmed);
|
|
}
|
|
|
|
const lines = all_lines.items;
|
|
|
|
// Scan for "Account number ending in" anchors
|
|
for (lines, 0..) |line, i| {
|
|
if (!std.mem.startsWith(u8, line, "Account number ending in")) continue;
|
|
|
|
// Extract account number: last token on the line (e.g. "...901" -> "901")
|
|
var acct_num: []const u8 = "";
|
|
var tok_iter = std.mem.tokenizeAny(u8, line, &.{ ' ', '\t' });
|
|
while (tok_iter.next()) |tok| {
|
|
acct_num = tok;
|
|
}
|
|
// Strip leading dots
|
|
while (acct_num.len > 0 and acct_num[0] == '.') {
|
|
acct_num = acct_num[1..];
|
|
}
|
|
|
|
// Account name: nearest non-empty line before the anchor
|
|
var acct_name: []const u8 = "";
|
|
if (i > 0) {
|
|
var j: usize = i - 1;
|
|
while (true) {
|
|
if (lines[j].len > 0 and
|
|
!std.mem.startsWith(u8, lines[j], "Account number") and
|
|
!std.mem.startsWith(u8, lines[j], "Investment Total") and
|
|
!std.mem.startsWith(u8, lines[j], "Day Change"))
|
|
{
|
|
acct_name = lines[j];
|
|
break;
|
|
}
|
|
if (j == 0) break;
|
|
j -= 1;
|
|
}
|
|
}
|
|
|
|
// Values line: look at lines after the anchor for dollar amounts.
|
|
// The format is "Type XXX $CASH $TOTAL +$CHANGE +PCT%"
|
|
// We want the first two dollar amounts (cash and total).
|
|
var cash: ?f64 = null;
|
|
var total: ?f64 = null;
|
|
if (i + 1 < lines.len) {
|
|
var dollar_values = std.ArrayList(f64).empty;
|
|
defer dollar_values.deinit(allocator);
|
|
|
|
var val_iter = std.mem.tokenizeAny(u8, lines[i + 1], &.{ ' ', '\t' });
|
|
while (val_iter.next()) |tok| {
|
|
if (parseDollarAmount(tok)) |v| {
|
|
try dollar_values.append(allocator, v);
|
|
}
|
|
}
|
|
if (dollar_values.items.len >= 2) {
|
|
cash = dollar_values.items[0];
|
|
total = dollar_values.items[1];
|
|
} else if (dollar_values.items.len == 1) {
|
|
total = dollar_values.items[0];
|
|
}
|
|
}
|
|
|
|
try accounts.append(allocator, .{
|
|
.account_name = acct_name,
|
|
.account_number = acct_num,
|
|
.cash = cash,
|
|
.total_value = total,
|
|
});
|
|
}
|
|
|
|
if (accounts.items.len == 0) return error.NoAccountsFound;
|
|
|
|
return accounts.toOwnedSlice(allocator);
|
|
}
|
|
|
|
// ── Tests ────────────────────────────────────────────────────
|
|
|
|
test "parseTitle" {
|
|
const t1 = parseTitle("\"Positions for account Sample Trust ...1234 as of 10:47 AM ET, 2026/04/10\"");
|
|
try std.testing.expect(t1 != null);
|
|
try std.testing.expectEqualStrings("Sample Trust", t1.?.name);
|
|
try std.testing.expectEqualStrings("1234", t1.?.number);
|
|
|
|
const t2 = parseTitle("\"Positions for account Sample IRA ...5678 as of 3:00 PM ET, 2026/04/10\"");
|
|
try std.testing.expect(t2 != null);
|
|
try std.testing.expectEqualStrings("Sample IRA", t2.?.name);
|
|
try std.testing.expectEqualStrings("5678", t2.?.number);
|
|
|
|
try std.testing.expect(parseTitle("some random text") == null);
|
|
}
|
|
|
|
test "splitCsvLine" {
|
|
var cols: [expected_columns][]const u8 = undefined;
|
|
|
|
const n = splitCsvLine("\"AMZN\",\"AMAZON.COM INC\",\"5.558\",\"2.38%\",\"239.208\",\"1,488\",\"$8,270.30\",\"2.38%\",\"$355,941.50\",\"$110,243.38\",\"$245,698.12\",\"222.87%\",\"C\",\"No\",\"N/A\",\"41.54%\",\"Equity\",", &cols);
|
|
try std.testing.expectEqual(@as(usize, 17), n);
|
|
try std.testing.expectEqualStrings("AMZN", cols[0]);
|
|
try std.testing.expectEqualStrings("AMAZON.COM INC", cols[1]);
|
|
try std.testing.expectEqualStrings("1,488", cols[5]);
|
|
try std.testing.expectEqualStrings("$355,941.50", cols[8]);
|
|
try std.testing.expectEqualStrings("Equity", cols[16]);
|
|
}
|
|
|
|
test "parseCsv basic" {
|
|
const csv =
|
|
"\"Positions for account Sample Trust ...1234 as of 10:47 AM ET, 2026/04/10\"\n" ++
|
|
"\n" ++
|
|
"\"Symbol\",\"Description\",\"Price Chng $\",\"Price Chng %\",\"Price\",\"Qty\",\"Day Chng $\",\"Day Chng %\",\"Mkt Val\",\"Cost Basis\",\"Gain $\",\"Gain %\",\"Ratings\",\"Reinvest?\",\"Reinvest Capital Gains?\",\"% of Acct\",\"Asset Type\",\n" ++
|
|
"\"AMZN\",\"AMAZON.COM INC\",\"5.558\",\"2.38%\",\"239.208\",\"1,488\",\"$8,270.30\",\"2.38%\",\"$355,941.50\",\"$110,243.38\",\"$245,698.12\",\"222.87%\",\"C\",\"No\",\"N/A\",\"41.54%\",\"Equity\",\n" ++
|
|
"\"Cash & Cash Investments\",\"--\",\"--\",\"--\",\"--\",\"--\",\"$0.00\",\"0%\",\"$8,271.12\",\"--\",\"--\",\"--\",\"--\",\"--\",\"--\",\"0.97%\",\"Cash and Money Market\",\n" ++
|
|
"\"Positions Total\",\"\",\"--\",\"--\",\"--\",\"--\",\"$7,718.87\",\"0.9%\",\"$856,805.99\",\"$348,440.61\",\"$500,094.26\",\"143.52%\",\"--\",\"--\",\"--\",\"--\",\"--\",\n";
|
|
|
|
const allocator = std.testing.allocator;
|
|
const parsed = try parseCsv(allocator, csv);
|
|
defer allocator.free(parsed.positions);
|
|
|
|
try std.testing.expectEqualStrings("Sample Trust", parsed.account_name);
|
|
try std.testing.expectEqualStrings("1234", parsed.account_number);
|
|
|
|
try std.testing.expectEqual(@as(usize, 2), parsed.positions.len);
|
|
|
|
// Stock position
|
|
try std.testing.expectEqualStrings("AMZN", parsed.positions[0].symbol);
|
|
try std.testing.expect(!parsed.positions[0].is_cash);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 1488), parsed.positions[0].quantity.?, 0.01);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 355941.50), parsed.positions[0].current_value.?, 0.01);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 110243.38), parsed.positions[0].cost_basis.?, 0.01);
|
|
|
|
// Cash
|
|
try std.testing.expectEqualStrings("Cash & Cash Investments", parsed.positions[1].symbol);
|
|
try std.testing.expect(parsed.positions[1].is_cash);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 8271.12), parsed.positions[1].current_value.?, 0.01);
|
|
try std.testing.expect(parsed.positions[1].quantity == null);
|
|
}
|
|
|
|
test "parseSummary basic" {
|
|
const data =
|
|
\\Sample Roth
|
|
\\Account number ending in 1234 ...1234
|
|
\\Type IRA $46.44 $227,058.15 +$1,072.88 +0.47%
|
|
\\Inherited IRA
|
|
\\Account number ending in 5678 ...5678
|
|
\\Type IRA $2,461.82 $167,544.08 +$1,208.34 +0.73%
|
|
;
|
|
const allocator = std.testing.allocator;
|
|
const accounts = try parseSummary(allocator, data);
|
|
defer allocator.free(accounts);
|
|
|
|
try std.testing.expectEqual(@as(usize, 2), accounts.len);
|
|
|
|
try std.testing.expectEqualStrings("Sample Roth", accounts[0].account_name);
|
|
try std.testing.expectEqualStrings("1234", accounts[0].account_number);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 46.44), accounts[0].cash.?, 0.01);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 227058.15), accounts[0].total_value.?, 0.01);
|
|
|
|
try std.testing.expectEqualStrings("Inherited IRA", accounts[1].account_name);
|
|
try std.testing.expectEqualStrings("5678", accounts[1].account_number);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 2461.82), accounts[1].cash.?, 0.01);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 167544.08), accounts[1].total_value.?, 0.01);
|
|
}
|
|
|
|
test "parseSummary tolerates missing headers and extra blank lines" {
|
|
const data =
|
|
\\
|
|
\\Sample Trust
|
|
\\Account number ending in 1234 ...1234
|
|
\\Type Brokerage $8,271.12 $849,087.12 +$20,488.80 +2.47%
|
|
\\
|
|
\\Tax Loss
|
|
\\Account number ending in 5678 ...5678
|
|
\\$4,654.15 $488,481.18 +$1,686.91 +0.35%
|
|
;
|
|
const allocator = std.testing.allocator;
|
|
const accounts = try parseSummary(allocator, data);
|
|
defer allocator.free(accounts);
|
|
|
|
try std.testing.expectEqual(@as(usize, 2), accounts.len);
|
|
try std.testing.expectEqualStrings("Sample Trust", accounts[0].account_name);
|
|
try std.testing.expectEqualStrings("1234", accounts[0].account_number);
|
|
|
|
// Second account has no "Type" prefix — parser still finds dollar amounts
|
|
try std.testing.expectEqualStrings("Tax Loss", accounts[1].account_name);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 4654.15), accounts[1].cash.?, 0.01);
|
|
try std.testing.expectApproxEqAbs(@as(f64, 488481.18), accounts[1].total_value.?, 0.01);
|
|
}
|
|
|
|
test "parseSummary skips summary footer" {
|
|
const data =
|
|
\\Sample Account
|
|
\\Account number ending in 1234 ...1234
|
|
\\Type Brokerage $3,492.85 $161,676.14 +$749.40 +0.47%
|
|
\\Investment Total
|
|
\\$22,070.35
|
|
\\$4,338,116.38
|
|
\\Day Change Total
|
|
\\+$31,633.86
|
|
;
|
|
const allocator = std.testing.allocator;
|
|
const accounts = try parseSummary(allocator, data);
|
|
defer allocator.free(accounts);
|
|
|
|
try std.testing.expectEqual(@as(usize, 1), accounts.len);
|
|
try std.testing.expectEqualStrings("Sample Account", accounts[0].account_name);
|
|
}
|
|
|
|
test "parseSummary no accounts" {
|
|
const allocator = std.testing.allocator;
|
|
const result = parseSummary(allocator, "some random text\nno accounts here\n");
|
|
try std.testing.expectError(error.NoAccountsFound, result);
|
|
}
|