zfin/src/analytics/analysis.zig

635 lines
24 KiB
Zig

/// Portfolio analysis engine.
///
/// Takes portfolio allocations (with market values) and classification metadata,
/// produces breakdowns by asset class, sector, geographic region, account, and tax type.
const std = @import("std");
const srf = @import("srf");
const Allocation = @import("valuation.zig").Allocation;
const ClassificationEntry = @import("../models/classification.zig").ClassificationEntry;
const ClassificationMap = @import("../models/classification.zig").ClassificationMap;
const LotType = @import("../models/portfolio.zig").LotType;
const Portfolio = @import("../models/portfolio.zig").Portfolio;
const Date = @import("../models/date.zig").Date;
/// A single slice of a breakdown (e.g., "Technology" -> 25.3%)
pub const BreakdownItem = struct {
label: []const u8,
value: f64, // dollar amount
weight: f64, // fraction of total (0.0 - 1.0)
};
/// Tax type classification for accounts.
pub const TaxType = enum {
taxable,
roth,
traditional,
hsa,
pub fn label(self: TaxType) []const u8 {
return switch (self) {
.taxable => "Taxable",
.roth => "Roth (Post-Tax)",
.traditional => "Traditional (Pre-Tax)",
.hsa => "HSA (Triple Tax-Free)",
};
}
};
/// Account tax type classification entry, parsed from accounts.srf.
pub const AccountTaxEntry = struct {
account: []const u8,
tax_type: TaxType,
institution: ?[]const u8 = null,
account_number: ?[]const u8 = null,
update_cadence: UpdateCadence = .weekly,
/// When true, raw cash-balance changes (`cash_delta` in the
/// contributions diff) on this account roll up into the
/// attribution total as real contributions.
///
/// Defaults to false because most cash accounts generate
/// `cash_delta` entries from internal movement — interest posting,
/// dividend credit, CD coupon, settlement sweeps — that would
/// inflate the attribution number if counted. Set to true only
/// for accounts whose cash movement is dominated by external
/// contributions (payroll ESPP accrual, direct 401k cash
/// deposits). See TODO.md for the design history.
cash_is_contribution: bool = false,
/// When true, marks the account as a direct-indexing proxy
/// (lots track a benchmark with tracking-error drift rather
/// than holding the benchmark directly). Two behaviors:
///
/// 1. Contributions (`zfin contributions` / `zfin compare`
/// attribution): the edit-detection residual tolerance is
/// loosened from 0.01% (noise floor) to 1% — tracking-
/// error share reconciliation no longer lands in
/// `rollup_delta` / `drip_negative` and the attribution
/// total stays clean.
///
/// 2. Audit (`zfin audit` ratio-suggestions section): lots
/// with `price_ratio == 1.0` in this account get a
/// suggested ratio to bridge the brokerage vs. portfolio
/// value gap. Default audit behavior skips ratio == 1.0
/// lots since there's nothing to adjust; direct-indexing
/// accounts opt out of that skip.
///
/// Not a general "ignore drift" flag — use only for accounts
/// whose underlying lots explicitly track a benchmark (e.g. a
/// basket of 500 individual stocks tracked as SPY via `ticker::`
/// alias).
direct_indexing: bool = false,
};
/// Update cadence for manual account maintenance. Parsed from accounts.srf.
/// Default is `weekly` (fail-open: every account nags until explicitly silenced).
pub const UpdateCadence = enum {
weekly,
monthly,
quarterly,
none,
/// Number of calendar days before an account is considered overdue.
pub fn thresholdDays(self: UpdateCadence) ?u32 {
return switch (self) {
.weekly => 7,
.monthly => 30,
.quarterly => 90,
.none => null,
};
}
pub fn label(self: UpdateCadence) []const u8 {
return switch (self) {
.weekly => "weekly",
.monthly => "monthly",
.quarterly => "quarterly",
.none => "none",
};
}
};
/// Parsed account metadata.
pub const AccountMap = struct {
entries: []AccountTaxEntry,
allocator: std.mem.Allocator,
pub fn deinit(self: *AccountMap) void {
for (self.entries) |e| {
self.allocator.free(e.account);
if (e.institution) |s| self.allocator.free(s);
if (e.account_number) |s| self.allocator.free(s);
}
self.allocator.free(self.entries);
}
/// Look up the tax type label for a given account name.
pub fn taxTypeFor(self: AccountMap, account: []const u8) []const u8 {
for (self.entries) |e| {
if (std.mem.eql(u8, e.account, account)) {
return e.tax_type.label();
}
}
return "Unknown";
}
/// Find the portfolio account name for a given institution + account number.
pub fn findByInstitutionAccount(self: AccountMap, institution: []const u8, account_number: []const u8) ?[]const u8 {
for (self.entries) |e| {
if (e.institution) |inst| {
if (e.account_number) |num| {
if (std.mem.eql(u8, inst, institution) and std.mem.eql(u8, num, account_number))
return e.account;
}
}
}
return null;
}
/// Return all entries matching a given institution.
pub fn entriesForInstitution(self: AccountMap, institution: []const u8) []const AccountTaxEntry {
var count: usize = 0;
for (self.entries) |e| {
if (e.institution) |inst| {
if (std.mem.eql(u8, inst, institution)) count += 1;
}
}
if (count == 0) return &.{};
return self.entries;
}
/// Is cash-balance movement on `account` treated as a real
/// contribution (vs. internal noise) for the attribution total?
/// Defaults to false when the account isn't in the map.
pub fn cashIsContribution(self: AccountMap, account: []const u8) bool {
for (self.entries) |e| {
if (std.mem.eql(u8, e.account, account)) {
return e.cash_is_contribution;
}
}
return false;
}
/// Is `account` flagged as a direct-indexing proxy? See
/// `AccountTaxEntry.direct_indexing` for the two behaviors this
/// drives. Defaults to false when the account isn't in the map.
pub fn isDirectIndexing(self: AccountMap, account: []const u8) bool {
for (self.entries) |e| {
if (std.mem.eql(u8, e.account, account)) {
return e.direct_indexing;
}
}
return false;
}
};
/// Parse an accounts.srf file into an AccountMap.
/// Each record has: account::<NAME>,tax_type::<TYPE>[,institution::<INST>][,account_number::<NUM>]
pub fn parseAccountsFile(allocator: std.mem.Allocator, data: []const u8) !AccountMap {
var entries = std.ArrayList(AccountTaxEntry).empty;
errdefer {
for (entries.items) |e| {
allocator.free(e.account);
if (e.institution) |s| allocator.free(s);
if (e.account_number) |s| allocator.free(s);
}
entries.deinit(allocator);
}
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
defer it.deinit();
while (try it.next()) |fields| {
const entry = fields.to(AccountTaxEntry) catch continue;
try entries.append(allocator, .{
.account = try allocator.dupe(u8, entry.account),
.tax_type = entry.tax_type,
.institution = if (entry.institution) |s| try allocator.dupe(u8, s) else null,
.account_number = if (entry.account_number) |s| try allocator.dupe(u8, s) else null,
.update_cadence = entry.update_cadence,
.cash_is_contribution = entry.cash_is_contribution,
.direct_indexing = entry.direct_indexing,
});
}
return .{
.entries = try entries.toOwnedSlice(allocator),
.allocator = allocator,
};
}
/// Complete portfolio analysis result.
pub const AnalysisResult = struct {
/// Breakdown by asset class (US Large Cap, Bonds, Cash & CDs, etc.)
asset_class: []BreakdownItem,
/// Breakdown by sector (Technology, Healthcare, etc.) -- equities only
sector: []BreakdownItem,
/// Breakdown by geographic region (US, International, etc.)
geo: []BreakdownItem,
/// Breakdown by account name
account: []BreakdownItem,
/// Breakdown by tax type (Taxable, Roth, Traditional, HSA)
tax_type: []BreakdownItem,
/// Positions not covered by classification metadata
unclassified: []const []const u8,
/// Total portfolio value used as denominator
total_value: f64,
pub fn deinit(self: *AnalysisResult, allocator: std.mem.Allocator) void {
allocator.free(self.asset_class);
allocator.free(self.sector);
allocator.free(self.geo);
allocator.free(self.account);
allocator.free(self.tax_type);
allocator.free(self.unclassified);
}
};
/// Compute portfolio analysis from allocations and classification metadata.
/// `allocations` are the stock/ETF positions with market values.
/// `classifications` is the metadata file data.
/// `portfolio` is the full portfolio (for cash/CD/illiquid totals).
/// `account_map` is optional account tax type metadata.
/// `as_of` is the date against which lot open/closed status is
/// evaluated. Pass `null` to use wall-clock today (the default for
/// interactive commands); historical snapshot backfill passes the
/// target date so lots opened/closed/matured between `as_of` and today
/// are counted correctly.
pub fn analyzePortfolio(
allocator: std.mem.Allocator,
allocations: []const Allocation,
classifications: ClassificationMap,
portfolio: Portfolio,
total_portfolio_value: f64,
account_map: ?AccountMap,
as_of: ?Date,
) !AnalysisResult {
// Accumulators: label -> dollar amount
var ac_map = std.StringHashMap(f64).init(allocator);
defer ac_map.deinit();
var sector_map = std.StringHashMap(f64).init(allocator);
defer sector_map.deinit();
var geo_map = std.StringHashMap(f64).init(allocator);
defer geo_map.deinit();
var acct_map = std.StringHashMap(f64).init(allocator);
defer acct_map.deinit();
var tax_map = std.StringHashMap(f64).init(allocator);
defer tax_map.deinit();
var unclassified_list = std.ArrayList([]const u8).empty;
errdefer unclassified_list.deinit(allocator);
// Process each equity allocation (for asset class, sector, geo, unclassified)
for (allocations) |alloc| {
const mv = alloc.market_value;
if (mv <= 0) continue;
// Find classification entries for this symbol
// Try both the raw symbol and display_symbol
var found = false;
for (classifications.entries) |entry| {
if (std.mem.eql(u8, entry.symbol, alloc.symbol) or
std.mem.eql(u8, entry.symbol, alloc.display_symbol))
{
found = true;
const frac = entry.pct / 100.0;
const portion = mv * frac;
if (entry.asset_class) |ac| {
const prev = ac_map.get(ac) orelse 0;
ac_map.put(ac, prev + portion) catch {};
}
if (entry.sector) |s| {
const prev = sector_map.get(s) orelse 0;
sector_map.put(s, prev + portion) catch {};
}
if (entry.geo) |g| {
const prev = geo_map.get(g) orelse 0;
geo_map.put(g, prev + portion) catch {};
}
}
}
if (!found) {
try unclassified_list.append(allocator, alloc.display_symbol);
}
}
// Build symbol -> (current_price, price_ratio) lookup from allocations.
// For unmerged allocations, current_price already includes price_ratio (preadjusted).
// For merged allocations, current_price is the base-ticker price (not preadjusted).
const PriceEntry = struct { price: f64, is_preadjusted: bool };
var price_lookup = std.StringHashMap(PriceEntry).init(allocator);
defer price_lookup.deinit();
for (allocations) |alloc| {
price_lookup.put(alloc.symbol, .{
.price = alloc.current_price,
.is_preadjusted = alloc.price_ratio != 1.0,
}) catch {};
}
// Account breakdown from individual lots (avoids "Multiple" aggregation issue).
// Use `lotIsOpenAsOf(as_of)` when provided so backfilled snapshots
// correctly include/exclude lots based on the target date rather
// than wall-clock today. `isOpen()` = `lotIsOpenAsOf(today)`.
const reference_date = as_of orelse Date.fromEpoch(std.time.timestamp());
for (portfolio.lots) |lot| {
if (!lot.lotIsOpenAsOf(reference_date)) continue;
const acct = lot.account orelse continue;
const value: f64 = switch (lot.security_type) {
.stock => blk: {
if (price_lookup.get(lot.priceSymbol())) |entry| {
break :blk lot.marketValue(entry.price, entry.is_preadjusted);
} else {
// Fallback to open_price (already in lot-specific terms)
break :blk lot.shares * lot.open_price;
}
},
.cash => lot.shares,
.cd => lot.shares, // face value
.option => @abs(lot.shares) * lot.open_price,
.illiquid, .watch => continue,
};
const prev = acct_map.get(acct) orelse 0;
acct_map.put(acct, prev + value) catch {};
}
// Add non-stock asset classes (combine Cash + CDs)
const cash_total = portfolio.totalCash();
const cd_total = portfolio.totalCdFaceValue();
const cash_cd_total = cash_total + cd_total;
if (cash_cd_total > 0) {
const prev = ac_map.get("Cash & CDs") orelse 0;
ac_map.put("Cash & CDs", prev + cash_cd_total) catch {};
const gprev = geo_map.get("US") orelse 0;
geo_map.put("US", gprev + cash_cd_total) catch {};
}
const opt_total = portfolio.totalOptionCost();
if (opt_total > 0) {
const prev = ac_map.get("Options") orelse 0;
ac_map.put("Options", prev + opt_total) catch {};
}
// Tax type breakdown: map each account's total to its tax type
if (account_map) |am| {
var acct_iter = acct_map.iterator();
while (acct_iter.next()) |kv| {
const tt = am.taxTypeFor(kv.key_ptr.*);
const prev = tax_map.get(tt) orelse 0;
tax_map.put(tt, prev + kv.value_ptr.*) catch {};
}
}
// Convert maps to sorted slices
const total = if (total_portfolio_value > 0) total_portfolio_value else 1.0;
return .{
.asset_class = try mapToSortedBreakdown(allocator, ac_map, total),
.sector = try mapToSortedBreakdown(allocator, sector_map, total),
.geo = try mapToSortedBreakdown(allocator, geo_map, total),
.account = try mapToSortedBreakdown(allocator, acct_map, total),
.tax_type = try mapToSortedBreakdown(allocator, tax_map, total),
.unclassified = try unclassified_list.toOwnedSlice(allocator),
.total_value = total_portfolio_value,
};
}
/// Convert a label->value HashMap to a sorted BreakdownItem slice (descending by value).
fn mapToSortedBreakdown(
allocator: std.mem.Allocator,
map: std.StringHashMap(f64),
total: f64,
) ![]BreakdownItem {
var items = std.ArrayList(BreakdownItem).empty;
errdefer items.deinit(allocator);
var iter = map.iterator();
while (iter.next()) |kv| {
try items.append(allocator, .{
.label = kv.key_ptr.*,
.value = kv.value_ptr.*,
.weight = kv.value_ptr.* / total,
});
}
// Sort descending by value
std.mem.sort(BreakdownItem, items.items, {}, struct {
fn f(_: void, a: BreakdownItem, b: BreakdownItem) bool {
return a.value > b.value;
}
}.f);
return items.toOwnedSlice(allocator);
}
test "parseAccountsFile" {
const data =
\\#!srfv1
\\account::Emil Roth,tax_type::roth
\\account::Joint trust,tax_type::taxable
\\account::Fidelity Emil HSA,tax_type::hsa
;
const allocator = std.testing.allocator;
var am = try parseAccountsFile(allocator, data);
defer am.deinit();
try std.testing.expectEqual(@as(usize, 3), am.entries.len);
try std.testing.expectEqualStrings("Roth (Post-Tax)", am.taxTypeFor("Emil Roth"));
try std.testing.expectEqualStrings("Taxable", am.taxTypeFor("Joint trust"));
try std.testing.expectEqualStrings("HSA (Triple Tax-Free)", am.taxTypeFor("Fidelity Emil HSA"));
try std.testing.expectEqualStrings("Unknown", am.taxTypeFor("Nonexistent"));
}
test "parseAccountsFile: cash_is_contribution default false, opt-in true" {
const data =
\\#!srfv1
\\account::Kelly ESPP,tax_type::taxable,cash_is_contribution:bool:true
\\account::Joint cash,tax_type::taxable
;
const allocator = std.testing.allocator;
var am = try parseAccountsFile(allocator, data);
defer am.deinit();
try std.testing.expectEqual(@as(usize, 2), am.entries.len);
// Opted-in account
try std.testing.expect(am.cashIsContribution("Kelly ESPP"));
// Default-off account
try std.testing.expect(!am.cashIsContribution("Joint cash"));
// Unknown account defaults to false
try std.testing.expect(!am.cashIsContribution("Nonexistent"));
}
test "parseAccountsFile: direct_indexing default false, opt-in true" {
const data =
\\#!srfv1
\\account::Tax Loss,tax_type::taxable,direct_indexing:bool:true
\\account::Regular Brokerage,tax_type::taxable
;
const allocator = std.testing.allocator;
var am = try parseAccountsFile(allocator, data);
defer am.deinit();
try std.testing.expectEqual(@as(usize, 2), am.entries.len);
try std.testing.expect(am.isDirectIndexing("Tax Loss"));
try std.testing.expect(!am.isDirectIndexing("Regular Brokerage"));
try std.testing.expect(!am.isDirectIndexing("Nonexistent"));
}
test "TaxType.label" {
try std.testing.expectEqualStrings("Taxable", TaxType.taxable.label());
try std.testing.expectEqualStrings("Roth (Post-Tax)", TaxType.roth.label());
try std.testing.expectEqualStrings("Traditional (Pre-Tax)", TaxType.traditional.label());
try std.testing.expectEqualStrings("HSA (Triple Tax-Free)", TaxType.hsa.label());
}
test "mapToSortedBreakdown" {
const allocator = std.testing.allocator;
var map = std.StringHashMap(f64).init(allocator);
defer map.deinit();
try map.put("Technology", 50_000);
try map.put("Healthcare", 30_000);
try map.put("Energy", 20_000);
const total = 100_000.0;
const breakdown = try mapToSortedBreakdown(allocator, map, total);
defer allocator.free(breakdown);
try std.testing.expectEqual(@as(usize, 3), breakdown.len);
// Should be sorted descending by value
try std.testing.expectEqualStrings("Technology", breakdown[0].label);
try std.testing.expectApproxEqAbs(@as(f64, 50_000), breakdown[0].value, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.5), breakdown[0].weight, 0.001);
try std.testing.expectEqualStrings("Healthcare", breakdown[1].label);
try std.testing.expectEqualStrings("Energy", breakdown[2].label);
}
test "mapToSortedBreakdown empty" {
const allocator = std.testing.allocator;
var map = std.StringHashMap(f64).init(allocator);
defer map.deinit();
const breakdown = try mapToSortedBreakdown(allocator, map, 100_000.0);
defer allocator.free(breakdown);
try std.testing.expectEqual(@as(usize, 0), breakdown.len);
}
test "parseAccountsFile empty" {
const allocator = std.testing.allocator;
var am = try parseAccountsFile(allocator, "#!srfv1\n");
defer am.deinit();
try std.testing.expectEqual(@as(usize, 0), am.entries.len);
}
test "parseAccountsFile missing fields" {
// Line with only account but no tax_type -> skipped via Record.to() error.
// Override log level to suppress expected srf log.err output that
// would otherwise cause the test runner to report failure.
const prev_level = std.testing.log_level;
std.testing.log_level = .err;
defer std.testing.log_level = prev_level;
const allocator = std.testing.allocator;
var am = try parseAccountsFile(allocator, "#!srfv1\naccount::Test Account\n# comment\n");
defer am.deinit();
try std.testing.expectEqual(@as(usize, 0), am.entries.len);
}
test "account breakdown applies price_ratio" {
const allocator = std.testing.allocator;
const Lot = @import("../models/portfolio.zig").Lot;
// Three lots across two accounts:
// - Brokerage: direct SPY (ratio 1.0)
// - 401(k): CIT mapped to SPY (ratio 0.25, merged allocation)
// - 401(k): CUSIP with ticker=VTTHX (ratio 5.0, unmerged allocation)
var lots = [_]Lot{
.{
.symbol = "SPY",
.shares = 100,
.open_date = Date.fromYmd(2020, 1, 1),
.open_price = 400,
.account = "Brokerage",
},
.{
.symbol = "CIT-SPY",
.shares = 500,
.open_date = Date.fromYmd(2020, 1, 1),
.open_price = 100,
.ticker = "SPY",
.price_ratio = 0.25,
.account = "401(k)",
},
.{
.symbol = "CUSIP123",
.shares = 200,
.open_date = Date.fromYmd(2020, 1, 1),
.open_price = 50,
.ticker = "VTTHX",
.price_ratio = 5.0,
.account = "401(k)",
},
};
const portfolio = Portfolio{ .lots = &lots, .allocator = allocator };
// Allocations as produced by portfolioSummary + mergeAllocsBySymbol:
// SPY: merged (direct + CIT). current_price = base SPY price = 500, price_ratio = 1.0
// VTTHX: unmerged. current_price = 30 * 5.0 = 150 (already includes ratio), price_ratio = 5.0
const allocations = [_]Allocation{
.{
.symbol = "SPY",
.display_symbol = "SPY",
.shares = 225, // 100 + 500*0.25
.avg_cost = 300,
.current_price = 500, // base-ticker price (merged, ratio=1.0)
.market_value = 112_500,
.cost_basis = 67_500,
.weight = 0.789,
.unrealized_gain_loss = 45_000,
.unrealized_return = 0.667,
.price_ratio = 1.0, // merged
},
.{
.symbol = "VTTHX",
.display_symbol = "VTTHX",
.shares = 200,
.avg_cost = 50,
.current_price = 150, // already includes price_ratio (30 * 5.0)
.market_value = 30_000, // 200 * 150
.cost_basis = 10_000,
.weight = 0.211,
.unrealized_gain_loss = 20_000,
.unrealized_return = 2.0,
.price_ratio = 5.0, // unmerged, ratio preserved
},
};
const cm = ClassificationMap{ .entries = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&allocations,
cm,
portfolio,
142_500,
null,
null,
);
defer result.deinit(allocator);
// Expected account values:
// Brokerage: SPY direct, 100 shares * $500 * 1.0 = $50,000
// 401(k): CIT-SPY 500 shares * $500 * 0.25 = $62,500
// + CUSIP123 200 shares * $150 (already includes ratio) = $30,000
// = $92,500
// Total: $142,500
for (result.account) |item| {
if (std.mem.eql(u8, item.label, "Brokerage")) {
try std.testing.expectApproxEqAbs(@as(f64, 50_000), item.value, 1.0);
} else if (std.mem.eql(u8, item.label, "401(k)")) {
try std.testing.expectApproxEqAbs(@as(f64, 92_500), item.value, 1.0);
}
}
// Sum of accounts must equal total portfolio value
var account_sum: f64 = 0;
for (result.account) |item| {
account_sum += item.value;
}
try std.testing.expectApproxEqAbs(@as(f64, 142_500), account_sum, 1.0);
}