zfin/src/brokerage/schwab.zig

//! Schwab export parsers.
//!
//! Parses two distinct Schwab inputs:
//!
//! 1. The per-account positions CSV exported from Schwab's website
//!    (Accounts → Positions → Export). One file per account.
//!
//! 2. The freeform account-summary text the user pastes from
//!    Schwab's Accounts overview page. One paste covers all
//!    accounts at once but only carries cash + total-value
//!    aggregates, no per-position detail.
//!
//! ## Schwab CSV — limitations
//!
//! 1. NOT a general-purpose CSV parser. Handles Schwab's specific export
//!    format where every field is double-quoted.
//!
//! 2. Handles simple quoted fields ("value") but does NOT handle escaped
//!    quotes ("value with ""quotes"" inside") or multi-line quoted fields.
//!    Schwab's export does not use these in practice.
//!
//! 3. The account number and name are extracted from the title line:
//!    "Positions for account <NAME> ...<NUM> as of ..."
//!
//! 4. Rows with symbol "Cash & Cash Investments" are treated as cash.
//!    The row with symbol "Positions Total" is skipped.
//!
//! 5. Hardcodes the expected column layout. If Schwab changes the CSV
//!    format, this parser will break. The header row is not validated
//!    beyond being skipped.
//!
//! ## Schwab summary — limitations
//!
//! The expected paste format is repeating blocks of 2-3 lines per
//! account:
//!
//!   Account Name
//!   Account number ending in NNN ...NNN
//!   Type IRA $46.44 $227,058.15 +$1,072.88 +0.47%
//!
//! 1. NOT a CSV parser — parses freeform text pasted from the Schwab UI.
//!
//! 2. Identifies account blocks by the "Account number ending in" line.
//!    The account name is the non-empty line immediately before it.
//!
//! 3. The values line (cash, total, change, pct) is identified by finding
//!    dollar amounts. It tolerates missing or extra fields — it looks for
//!    the first two dollar amounts as cash and total value.
//!
//! 4. Skips summary lines like "Investment Total", "Day Change Total",
//!    and "Day Change Percent Total" which appear at the end of the paste.
//!
//! 5. Tolerant of partial pastes: if the user copies headers once but
//!    not on subsequent pastes, or includes extra blank lines, the parser
//!    still finds account blocks by the "Account number ending in" anchor.
//!
//! 6. The account number is extracted from "...NNN" at the end of the
//!    account number line (the last whitespace-separated token).

const std = @import("std");
const portfolio_mod = @import("../models/portfolio.zig");
const types = @import("types.zig");

const BrokeragePosition = types.BrokeragePosition;
const parseDollarAmount = types.parseDollarAmount;

const expected_columns = 17;

const Col = struct {
    const symbol = 0;
    const price = 4;
    const quantity = 5;
    const market_value = 8;
    const cost_basis = 9;
    const asset_type = 16;
};

/// Split a Schwab CSV line on commas, stripping surrounding quotes from each field.
/// Returns the number of columns parsed. Fields are slices into the input line.
fn splitCsvLine(line: []const u8, cols: *[expected_columns][]const u8) usize {
    var col_count: usize = 0;
    var pos: usize = 0;
    while (pos < line.len and col_count < expected_columns) {
        if (line[pos] == '"') {
            // Quoted field: find closing quote
            const start = pos + 1;
            pos = start;
            while (pos < line.len and line[pos] != '"') : (pos += 1) {}
            cols[col_count] = line[start..pos];
            col_count += 1;
            if (pos < line.len) pos += 1; // skip closing quote
            if (pos < line.len and line[pos] == ',') pos += 1; // skip comma
        } else if (line[pos] == ',') {
            cols[col_count] = "";
            col_count += 1;
            pos += 1;
        } else {
            // Unquoted field
            const start = pos;
            while (pos < line.len and line[pos] != ',') : (pos += 1) {}
            cols[col_count] = line[start..pos];
            col_count += 1;
            if (pos < line.len) pos += 1; // skip comma
        }
    }
    return col_count;
}

/// Extract account name and number from Schwab title line.
/// Format: "Positions for account <NAME> ...<NUM> as of <TIME>, <DATE>"
/// Returns {name, number} or null if the line doesn't match.
fn parseTitle(line: []const u8) ?struct { name: []const u8, number: []const u8 } {
    const stripped = std.mem.trim(u8, line, &.{ '"', ' ', '\r' });
    const prefix = "Positions for account ";
    if (!std.mem.startsWith(u8, stripped, prefix)) return null;
    const rest = stripped[prefix.len..];

    // Find "..." which separates name from account number
    const dots_idx = std.mem.indexOf(u8, rest, "...") orelse return null;
    const name = std.mem.trimEnd(u8, rest[0..dots_idx], &.{' '});

    // Account number: after "..." until " as of" or end
    const after_dots = rest[dots_idx + 3 ..];
    const as_of_idx = std.mem.indexOf(u8, after_dots, " as of") orelse after_dots.len;
    const number = std.mem.trim(u8, after_dots[0..as_of_idx], &.{' '});

    return .{ .name = name, .number = number };
}

/// Parsed Schwab per-account positions CSV. Returned `positions`
/// slice is heap-allocated; string fields slice into `data`.
pub const CsvResult = struct {
    positions: []BrokeragePosition,
    account_name: []const u8,
    account_number: []const u8,
};

pub fn parseCsv(allocator: std.mem.Allocator, data: []const u8) !CsvResult {
    var positions = std.ArrayList(BrokeragePosition).empty;
    errdefer positions.deinit(allocator);

    var lines = std.mem.splitScalar(u8, data, '\n');

    // Line 1: title with account name and number
    const title_line = lines.next() orelse return error.EmptyFile;
    const title = parseTitle(title_line) orelse return error.UnexpectedHeader;

    // Line 2: blank (skip)
    _ = lines.next();
    // Line 3: header row (skip)
    _ = lines.next();

    // Data rows
    while (lines.next()) |line| {
        const trimmed = std.mem.trimEnd(u8, line, &.{ '\r', ' ' });
        if (trimmed.len == 0) continue;

        var cols: [expected_columns][]const u8 = undefined;
        const col_count = splitCsvLine(trimmed, &cols);
        if (col_count < expected_columns) continue;

        const symbol = cols[Col.symbol];
        if (symbol.len == 0) continue;
        if (std.mem.eql(u8, symbol, "Positions Total")) continue;

        // "Cash & Cash Investments" is Schwab's aggregate cash line.
        // Actual money-market holdings (SWVXX, etc.) appear as normal rows
        // with their real ticker and price — treat those as cash too so
        // the reconciliation matches what brokerage users think of as
        // "cash" in the account.
        const is_cash = std.mem.eql(u8, symbol, "Cash & Cash Investments") or
            portfolio_mod.isMoneyMarketSymbol(symbol);

        try positions.append(allocator, .{
            .account_number = title.number,
            .account_name = title.name,
            .symbol = symbol,
            .description = if (col_count > 1) cols[1] else "",
            .quantity = if (is_cash) null else parseDollarAmount(cols[Col.quantity]),
            .current_value = parseDollarAmount(cols[Col.market_value]),
            .cost_basis = if (is_cash) null else parseDollarAmount(cols[Col.cost_basis]),
            .is_cash = is_cash,
        });
    }

    return .{
        .positions = try positions.toOwnedSlice(allocator),
        .account_name = title.name,
        .account_number = title.number,
    };
}

/// Account-level summary from a Schwab paste (no per-position detail).
pub const AccountSummary = struct {
    account_name: []const u8,
    account_number: []const u8,
    cash: ?f64,
    total_value: ?f64,
};

/// Parse Schwab account summary from pasted text.
/// All string fields in the returned summaries are slices into `data`.
/// Only the returned slice itself is heap-allocated (caller must free it).
pub fn parseSummary(allocator: std.mem.Allocator, data: []const u8) ![]AccountSummary {
    var accounts = std.ArrayList(AccountSummary).empty;
    errdefer accounts.deinit(allocator);

    // Collect all lines, trimmed
    var all_lines = std.ArrayList([]const u8).empty;
    defer all_lines.deinit(allocator);

    var line_iter = std.mem.splitScalar(u8, data, '\n');
    while (line_iter.next()) |line| {
        const trimmed = std.mem.trim(u8, line, &.{ '\r', ' ', '\t' });
        try all_lines.append(allocator, trimmed);
    }

    const lines = all_lines.items;

    // Scan for "Account number ending in" anchors
    for (lines, 0..) |line, i| {
        if (!std.mem.startsWith(u8, line, "Account number ending in")) continue;

        // Extract account number: last token on the line (e.g. "...901" -> "901")
        var acct_num: []const u8 = "";
        var tok_iter = std.mem.tokenizeAny(u8, line, &.{ ' ', '\t' });
        while (tok_iter.next()) |tok| {
            acct_num = tok;
        }
        // Strip leading dots
        while (acct_num.len > 0 and acct_num[0] == '.') {
            acct_num = acct_num[1..];
        }

        // Account name: nearest non-empty line before the anchor
        var acct_name: []const u8 = "";
        if (i > 0) {
            var j: usize = i - 1;
            while (true) {
                if (lines[j].len > 0 and
                    !std.mem.startsWith(u8, lines[j], "Account number") and
                    !std.mem.startsWith(u8, lines[j], "Investment Total") and
                    !std.mem.startsWith(u8, lines[j], "Day Change"))
                {
                    acct_name = lines[j];
                    break;
                }
                if (j == 0) break;
                j -= 1;
            }
        }

        // Values line: look at lines after the anchor for dollar amounts.
        // The format is "Type XXX $CASH $TOTAL +$CHANGE +PCT%"
        // We want the first two dollar amounts (cash and total).
        var cash: ?f64 = null;
        var total: ?f64 = null;
        if (i + 1 < lines.len) {
            var dollar_values = std.ArrayList(f64).empty;
            defer dollar_values.deinit(allocator);

            var val_iter = std.mem.tokenizeAny(u8, lines[i + 1], &.{ ' ', '\t' });
            while (val_iter.next()) |tok| {
                if (parseDollarAmount(tok)) |v| {
                    try dollar_values.append(allocator, v);
                }
            }
            if (dollar_values.items.len >= 2) {
                cash = dollar_values.items[0];
                total = dollar_values.items[1];
            } else if (dollar_values.items.len == 1) {
                total = dollar_values.items[0];
            }
        }

        try accounts.append(allocator, .{
            .account_name = acct_name,
            .account_number = acct_num,
            .cash = cash,
            .total_value = total,
        });
    }

    if (accounts.items.len == 0) return error.NoAccountsFound;

    return accounts.toOwnedSlice(allocator);
}

// ── Tests ────────────────────────────────────────────────────

test "parseTitle" {
    const t1 = parseTitle("\"Positions for account Sample Trust ...1234 as of 10:47 AM ET, 2026/04/10\"");
    try std.testing.expect(t1 != null);
    try std.testing.expectEqualStrings("Sample Trust", t1.?.name);
    try std.testing.expectEqualStrings("1234", t1.?.number);

    const t2 = parseTitle("\"Positions for account Sample IRA ...5678 as of 3:00 PM ET, 2026/04/10\"");
    try std.testing.expect(t2 != null);
    try std.testing.expectEqualStrings("Sample IRA", t2.?.name);
    try std.testing.expectEqualStrings("5678", t2.?.number);

    try std.testing.expect(parseTitle("some random text") == null);
}

test "splitCsvLine" {
    var cols: [expected_columns][]const u8 = undefined;

    const n = splitCsvLine("\"AMZN\",\"AMAZON.COM INC\",\"5.558\",\"2.38%\",\"239.208\",\"1,488\",\"$8,270.30\",\"2.38%\",\"$355,941.50\",\"$110,243.38\",\"$245,698.12\",\"222.87%\",\"C\",\"No\",\"N/A\",\"41.54%\",\"Equity\",", &cols);
    try std.testing.expectEqual(@as(usize, 17), n);
    try std.testing.expectEqualStrings("AMZN", cols[0]);
    try std.testing.expectEqualStrings("AMAZON.COM INC", cols[1]);
    try std.testing.expectEqualStrings("1,488", cols[5]);
    try std.testing.expectEqualStrings("$355,941.50", cols[8]);
    try std.testing.expectEqualStrings("Equity", cols[16]);
}

test "parseCsv basic" {
    const csv =
        "\"Positions for account Sample Trust ...1234 as of 10:47 AM ET, 2026/04/10\"\n" ++
        "\n" ++
        "\"Symbol\",\"Description\",\"Price Chng $\",\"Price Chng %\",\"Price\",\"Qty\",\"Day Chng $\",\"Day Chng %\",\"Mkt Val\",\"Cost Basis\",\"Gain $\",\"Gain %\",\"Ratings\",\"Reinvest?\",\"Reinvest Capital Gains?\",\"% of Acct\",\"Asset Type\",\n" ++
        "\"AMZN\",\"AMAZON.COM INC\",\"5.558\",\"2.38%\",\"239.208\",\"1,488\",\"$8,270.30\",\"2.38%\",\"$355,941.50\",\"$110,243.38\",\"$245,698.12\",\"222.87%\",\"C\",\"No\",\"N/A\",\"41.54%\",\"Equity\",\n" ++
        "\"Cash & Cash Investments\",\"--\",\"--\",\"--\",\"--\",\"--\",\"$0.00\",\"0%\",\"$8,271.12\",\"--\",\"--\",\"--\",\"--\",\"--\",\"--\",\"0.97%\",\"Cash and Money Market\",\n" ++
        "\"Positions Total\",\"\",\"--\",\"--\",\"--\",\"--\",\"$7,718.87\",\"0.9%\",\"$856,805.99\",\"$348,440.61\",\"$500,094.26\",\"143.52%\",\"--\",\"--\",\"--\",\"--\",\"--\",\n";

    const allocator = std.testing.allocator;
    const parsed = try parseCsv(allocator, csv);
    defer allocator.free(parsed.positions);

    try std.testing.expectEqualStrings("Sample Trust", parsed.account_name);
    try std.testing.expectEqualStrings("1234", parsed.account_number);

    try std.testing.expectEqual(@as(usize, 2), parsed.positions.len);

    // Stock position
    try std.testing.expectEqualStrings("AMZN", parsed.positions[0].symbol);
    try std.testing.expect(!parsed.positions[0].is_cash);
    try std.testing.expectApproxEqAbs(@as(f64, 1488), parsed.positions[0].quantity.?, 0.01);
    try std.testing.expectApproxEqAbs(@as(f64, 355941.50), parsed.positions[0].current_value.?, 0.01);
    try std.testing.expectApproxEqAbs(@as(f64, 110243.38), parsed.positions[0].cost_basis.?, 0.01);

    // Cash
    try std.testing.expectEqualStrings("Cash & Cash Investments", parsed.positions[1].symbol);
    try std.testing.expect(parsed.positions[1].is_cash);
    try std.testing.expectApproxEqAbs(@as(f64, 8271.12), parsed.positions[1].current_value.?, 0.01);
    try std.testing.expect(parsed.positions[1].quantity == null);
}

test "parseSummary basic" {
    const data =
        \\Sample Roth
        \\Account number ending in 1234 ...1234
        \\Type IRA $46.44 $227,058.15 +$1,072.88 +0.47%
        \\Inherited IRA
        \\Account number ending in 5678 ...5678
        \\Type IRA $2,461.82 $167,544.08 +$1,208.34 +0.73%
    ;
    const allocator = std.testing.allocator;
    const accounts = try parseSummary(allocator, data);
    defer allocator.free(accounts);

    try std.testing.expectEqual(@as(usize, 2), accounts.len);

    try std.testing.expectEqualStrings("Sample Roth", accounts[0].account_name);
    try std.testing.expectEqualStrings("1234", accounts[0].account_number);
    try std.testing.expectApproxEqAbs(@as(f64, 46.44), accounts[0].cash.?, 0.01);
    try std.testing.expectApproxEqAbs(@as(f64, 227058.15), accounts[0].total_value.?, 0.01);

    try std.testing.expectEqualStrings("Inherited IRA", accounts[1].account_name);
    try std.testing.expectEqualStrings("5678", accounts[1].account_number);
    try std.testing.expectApproxEqAbs(@as(f64, 2461.82), accounts[1].cash.?, 0.01);
    try std.testing.expectApproxEqAbs(@as(f64, 167544.08), accounts[1].total_value.?, 0.01);
}

test "parseSummary tolerates missing headers and extra blank lines" {
    const data =
        \\
        \\Sample Trust
        \\Account number ending in 1234 ...1234
        \\Type Brokerage $8,271.12 $849,087.12 +$20,488.80 +2.47%
        \\
        \\Tax Loss
        \\Account number ending in 5678 ...5678
        \\$4,654.15 $488,481.18 +$1,686.91 +0.35%
    ;
    const allocator = std.testing.allocator;
    const accounts = try parseSummary(allocator, data);
    defer allocator.free(accounts);

    try std.testing.expectEqual(@as(usize, 2), accounts.len);
    try std.testing.expectEqualStrings("Sample Trust", accounts[0].account_name);
    try std.testing.expectEqualStrings("1234", accounts[0].account_number);

    // Second account has no "Type" prefix — parser still finds dollar amounts
    try std.testing.expectEqualStrings("Tax Loss", accounts[1].account_name);
    try std.testing.expectApproxEqAbs(@as(f64, 4654.15), accounts[1].cash.?, 0.01);
    try std.testing.expectApproxEqAbs(@as(f64, 488481.18), accounts[1].total_value.?, 0.01);
}

test "parseSummary skips summary footer" {
    const data =
        \\Sample Account
        \\Account number ending in 1234 ...1234
        \\Type Brokerage $3,492.85 $161,676.14 +$749.40 +0.47%
        \\Investment Total
        \\$22,070.35
        \\$4,338,116.38
        \\Day Change Total
        \\+$31,633.86
    ;
    const allocator = std.testing.allocator;
    const accounts = try parseSummary(allocator, data);
    defer allocator.free(accounts);

    try std.testing.expectEqual(@as(usize, 1), accounts.len);
    try std.testing.expectEqualStrings("Sample Account", accounts[0].account_name);
}

test "parseSummary no accounts" {
    const allocator = std.testing.allocator;
    const result = parseSummary(allocator, "some random text\nno accounts here\n");
    try std.testing.expectError(error.NoAccountsFound, result);
}