enrich enrich command, remove AlphaVantage

This (huge) commit pulls out AlphaVantage in favor of utilizing
Wikidata and SEC EDGAR data sources (both free). It uses some
built-in heuristics to fill in gaps, and it is not 100% (never
will be), but should get close enough to allow hand-editing of
metadata.srf afterwords without too much labor
This commit is contained in:
Emil Lerch 2026-05-30 10:40:34 -07:00
parent 52afd75696
commit 7fb674f467
18 changed files with 4966 additions and 1094 deletions

View file

@ -35,7 +35,6 @@ pub const default_watchlist_filename = "watchlist.srf";
twelvedata_key: ?[]const u8 = null,
polygon_key: ?[]const u8 = null,
fmp_key: ?[]const u8 = null,
alphavantage_key: ?[]const u8 = null,
tiingo_key: ?[]const u8 = null,
openfigi_key: ?[]const u8 = null,
/// User contact email used as the User-Agent / From header for
@ -94,7 +93,6 @@ pub fn fromEnv(io: std.Io, allocator: std.mem.Allocator, environ_map: *const std
self.twelvedata_key = self.resolve("TWELVEDATA_API_KEY");
self.polygon_key = self.resolve("POLYGON_API_KEY");
self.fmp_key = self.resolve("FMP_API_KEY");
self.alphavantage_key = self.resolve("ALPHAVANTAGE_API_KEY");
self.tiingo_key = self.resolve("TIINGO_API_KEY");
self.openfigi_key = self.resolve("OPENFIGI_API_KEY");
self.user_email = self.resolve("ZFIN_USER_EMAIL");
@ -372,7 +370,6 @@ pub fn hasAnyKey(self: @This()) bool {
return self.twelvedata_key != null or
self.polygon_key != null or
self.fmp_key != null or
self.alphavantage_key != null or
self.tiingo_key != null;
}
@ -488,15 +485,14 @@ test "hasAnyKey: true when any single provider key is set" {
// Each key should independently flip the result to true. Iterating
// through each variant catches a future field addition that forgets
// to update hasAnyKey().
const KeyField = enum { tiingo, twelvedata, polygon, fmp, alphavantage };
for ([_]KeyField{ .tiingo, .twelvedata, .polygon, .fmp, .alphavantage }) |which| {
const KeyField = enum { tiingo, twelvedata, polygon, fmp };
for ([_]KeyField{ .tiingo, .twelvedata, .polygon, .fmp }) |which| {
var c: @This() = .{ .cache_dir = "/tmp" };
switch (which) {
.tiingo => c.tiingo_key = "abc",
.twelvedata => c.twelvedata_key = "abc",
.polygon => c.polygon_key = "abc",
.fmp => c.fmp_key = "abc",
.alphavantage => c.alphavantage_key = "abc",
}
try testing.expect(c.hasAnyKey());
}

View file

@ -6,6 +6,7 @@ const std = @import("std");
const srf = @import("srf");
const Allocation = @import("valuation.zig").Allocation;
const ClassificationMap = @import("../models/classification.zig").ClassificationMap;
const ClassificationEntry = @import("../models/classification.zig").ClassificationEntry;
const Portfolio = @import("../models/portfolio.zig").Portfolio;
const Date = @import("../Date.zig");
@ -217,6 +218,11 @@ pub fn parseAccountsFile(allocator: std.mem.Allocator, data: []const u8) !Accoun
/// Complete portfolio analysis result.
pub const AnalysisResult = struct {
/// Coarse 4-bucket breakdown: Equity / Fixed Income / Cash / Other.
/// Built by mapping each fine-grained sector through `bucketSector`
/// before aggregation. The right field for portfolio-level
/// debt-to-equity analysis.
asset_category: []BreakdownItem,
/// Breakdown by asset class (US Large Cap, Bonds, Cash & CDs, etc.)
asset_class: []BreakdownItem,
/// Breakdown by sector (Technology, Healthcare, etc.) -- equities only
@ -233,6 +239,7 @@ pub const AnalysisResult = struct {
total_value: f64,
pub fn deinit(self: *AnalysisResult, allocator: std.mem.Allocator) void {
allocator.free(self.asset_category);
allocator.free(self.asset_class);
allocator.free(self.sector);
allocator.free(self.geo);
@ -242,6 +249,124 @@ pub const AnalysisResult = struct {
}
};
/// One section of an analysis breakdown for renderer-agnostic
/// display. Both the CLI (`commands/analysis.zig`) and the TUI
/// (`tui/analysis_tab.zig`) walk the section list returned by
/// `breakdownSections` to build their output. The section list
/// is the single source of truth for which breakdowns appear and
/// in what order; renderers apply their own indent and styling.
pub const Section = struct {
items: []const BreakdownItem,
/// Title with no leading whitespace. Renderers indent.
title: []const u8,
};
/// Single source of truth for analysis-output breakdown
/// sections. Both the CLI display and the TUI tab call this so
/// adding/reordering a section is a one-place edit. Order is
/// from coarsest (Asset Category, 4 buckets) to finest
/// (per-account / per-tax-type).
pub fn breakdownSections(r: *const AnalysisResult) [6]Section {
return .{
.{ .items = r.asset_category, .title = "Asset Category" },
.{ .items = r.asset_class, .title = "Asset Class" },
.{ .items = r.sector, .title = "Sector (Equities)" },
.{ .items = r.geo, .title = "Geographic" },
.{ .items = r.account, .title = "By Account" },
.{ .items = r.tax_type, .title = "By Tax Type" },
};
}
// Sector asset-category bucket
/// The four coarse asset-category buckets. Returned from
/// `bucketSector` as static `[]const u8` literals so callers can
/// use them as stable HashMap keys without duping.
pub const bucket_equity: []const u8 = "Equity";
pub const bucket_fixed_income: []const u8 = "Fixed Income";
pub const bucket_cash: []const u8 = "Cash";
pub const bucket_other: []const u8 = "Other";
/// Map a sector string to one of four coarse asset-category
/// buckets. Handles three input shapes:
///
/// - **NPORT-P fund-decomposition sectors** of the form
/// `"<assetCat> / <issuerCat>"` (e.g. `"Debt / US Treasury"`,
/// `"Equity / Corporate"`, `"Short-Term Investment Vehicle / Registered Fund"`).
/// These come from EDGAR fund-holdings data via `enrich`.
///
/// - **GICS-style stock sector names** (e.g. `"Technology"`,
/// `"Healthcare"`, `"Financial Services"`). These come from
/// Wikidata via `enrich`'s `canonicalizeSector`.
///
/// - **Plain-English asset-class words** (e.g. `"Bonds"`,
/// `"Diversified"`) that hand-written `metadata.srf` files
/// use for legacy entries. `"Bonds"` Fixed Income;
/// `"Diversified"` Equity (the word in practice means "S&P
/// 500 / total-market index fund holding all sectors", which
/// is overwhelmingly equity).
///
/// Returns one of `bucket_equity`, `bucket_fixed_income`,
/// `bucket_cash`, or `bucket_other`. Anything unrecognized
/// (sentinels like `"TODO"`, empty string, future label
/// changes) falls through to `bucket_other`.
///
/// Note: `Equity Preferred / *` rolls up to Equity, not Fixed
/// Income. Preferreds trade between stocks and bonds; we lean
/// equity to match how most retail asset-allocation views treat
/// them.
pub fn bucketSector(sector: []const u8) []const u8 {
// NPORT-P shapes: prefix-match on the assetCat half.
// `startsWith` covers both `Equity / *` and `Equity Preferred / *`.
//
// Note on dividend-equity ETFs (SCHD, VYM, DGRO, etc.):
// these bucket as Equity, not Fixed Income, despite their
// bond-like income shape. The Asset Category breakdown
// answers "what's exposed to equity drawdowns?" and
// dividend funds drop with the market in a 2008-style
// crash. The income-feels-like-bonds intuition belongs in
// a separate yield-weighted analysis (see TODO.md
// "Dividend equity / income-shaped equity"), not in the
// asset-class taxonomy.
if (std.mem.startsWith(u8, sector, "Equity")) return bucket_equity;
if (std.mem.startsWith(u8, sector, "Debt")) return bucket_fixed_income;
if (std.mem.startsWith(u8, sector, "Loan")) return bucket_fixed_income;
if (std.mem.startsWith(u8, sector, "Asset-Backed")) return bucket_fixed_income;
if (std.mem.startsWith(u8, sector, "Short-Term Investment Vehicle")) return bucket_cash;
if (std.mem.startsWith(u8, sector, "Repurchase Agreement")) return bucket_cash;
// Plain-English asset-class words (hand-written metadata).
if (std.mem.eql(u8, sector, "Bonds")) return bucket_fixed_income;
if (std.mem.eql(u8, sector, "Cash")) return bucket_cash;
// "Diversified" means "broad equity fund holding all
// sectors" — S&P 500 ETF, total-market index, etc.
if (std.mem.eql(u8, sector, "Diversified")) return bucket_equity;
// GICS stock sector names. Exact match over the canonical 11
// returned by `Wikidata.canonicalizeSector`. The legacy
// `"Financials"` (with 's') from old hand-written entries
// also maps here.
const gics = [_][]const u8{
"Technology",
"Healthcare",
"Financial Services",
"Financials",
"Consumer Cyclical",
"Consumer Defensive",
"Energy",
"Utilities",
"Real Estate",
"Industrials",
"Basic Materials",
"Communication Services",
};
for (gics) |g| if (std.mem.eql(u8, sector, g)) return bucket_equity;
// Everything else: derivatives, real property, sentinels
// (TODO/Unknown/empty), unrecognized future labels.
return bucket_other;
}
/// Compute portfolio analysis from allocations and classification metadata.
/// `allocations` are the stock/ETF positions with market values.
/// `classifications` is the metadata file data.
@ -266,6 +391,10 @@ pub fn analyzePortfolio(
defer ac_map.deinit();
var sector_map = std.StringHashMap(f64).init(allocator);
defer sector_map.deinit();
// 4-bucket coarse breakdown (Equity/Fixed Income/Cash/Other).
// Keys are static literals from `bucketSector`, no dupe needed.
var asset_cat_map = std.StringHashMap(f64).init(allocator);
defer asset_cat_map.deinit();
var geo_map = std.StringHashMap(f64).init(allocator);
defer geo_map.deinit();
var acct_map = std.StringHashMap(f64).init(allocator);
@ -296,9 +425,20 @@ pub fn analyzePortfolio(
const prev = ac_map.get(ac) orelse 0;
try ac_map.put(ac, prev + portion);
}
// Asset-category bucket: prefer `sector` (richer
// signal). Fall back to `asset_class` for legacy
// hand-written entries that didn't include a
// sector. Counted exactly once per entry.
if (entry.sector) |s| {
const prev = sector_map.get(s) orelse 0;
try sector_map.put(s, prev + portion);
const bucket = bucketSector(s);
const bprev = asset_cat_map.get(bucket) orelse 0;
try asset_cat_map.put(bucket, bprev + portion);
} else if (entry.asset_class) |ac| {
const bucket = bucketAssetClass(ac);
const bprev = asset_cat_map.get(bucket) orelse 0;
try asset_cat_map.put(bucket, bprev + portion);
}
if (entry.geo) |g| {
const prev = geo_map.get(g) orelse 0;
@ -358,11 +498,17 @@ pub fn analyzePortfolio(
try ac_map.put("Cash & CDs", prev + cash_cd_total);
const gprev = geo_map.get("US") orelse 0;
try geo_map.put("US", gprev + cash_cd_total);
// Literal cash and CDs roll into the coarse Cash bucket.
const bprev = asset_cat_map.get(bucket_cash) orelse 0;
try asset_cat_map.put(bucket_cash, bprev + cash_cd_total);
}
const opt_total = portfolio.totalOptionCost(as_of);
if (opt_total > 0) {
const prev = ac_map.get("Options") orelse 0;
try ac_map.put("Options", prev + opt_total);
// Options are derivatives; coarse bucket is Other.
const bprev = asset_cat_map.get(bucket_other) orelse 0;
try asset_cat_map.put(bucket_other, bprev + opt_total);
}
// Tax type breakdown: map each account's total to its tax type
@ -379,6 +525,7 @@ pub fn analyzePortfolio(
const total = if (total_portfolio_value > 0) total_portfolio_value else 1.0;
return .{
.asset_category = try mapToSortedBreakdown(allocator, asset_cat_map, total),
.asset_class = try mapToSortedBreakdown(allocator, ac_map, total),
.sector = try mapToSortedBreakdown(allocator, sector_map, total),
.geo = try mapToSortedBreakdown(allocator, geo_map, total),
@ -654,3 +801,627 @@ test "account breakdown applies price_ratio" {
}
try std.testing.expectApproxEqAbs(@as(f64, 142_500), account_sum, 1.0);
}
// bucketSector
test "bucketSector: NPORT-P Debt / * → Fixed Income" {
const cases = [_][]const u8{
"Debt / Corporate",
"Debt / US Treasury",
"Debt / Municipal",
"Debt / Non-US Sovereign",
"Debt / US Gov Agency",
"Debt / US GSE",
};
for (cases) |s| {
try std.testing.expectEqualStrings(bucket_fixed_income, bucketSector(s));
}
}
test "bucketSector: NPORT-P Equity / * and Equity Preferred / * → Equity" {
try std.testing.expectEqualStrings(bucket_equity, bucketSector("Equity / Corporate"));
try std.testing.expectEqualStrings(bucket_equity, bucketSector("Equity / Other"));
try std.testing.expectEqualStrings(bucket_equity, bucketSector("Equity / Registered Fund"));
try std.testing.expectEqualStrings(bucket_equity, bucketSector("Equity Preferred / Corporate"));
}
test "bucketSector: NPORT-P Loan / * → Fixed Income" {
try std.testing.expectEqualStrings(bucket_fixed_income, bucketSector("Loan / Corporate"));
}
test "bucketSector: NPORT-P Asset-Backed variants → Fixed Income" {
// All three asset-backed prefixes should bucket the same
// way. Asset-backed securities are bond-like by structure.
try std.testing.expectEqualStrings(bucket_fixed_income, bucketSector("Asset-Backed / Corporate Mortgage"));
try std.testing.expectEqualStrings(bucket_fixed_income, bucketSector("Asset-Backed / US GSE Mortgage"));
try std.testing.expectEqualStrings(bucket_fixed_income, bucketSector("Asset-Backed CBO/CDO / Corporate"));
try std.testing.expectEqualStrings(bucket_fixed_income, bucketSector("Asset-Backed Other / Corporate"));
}
test "bucketSector: Short-Term Investment Vehicle / * → Cash" {
try std.testing.expectEqualStrings(bucket_cash, bucketSector("Short-Term Investment Vehicle / Corporate"));
try std.testing.expectEqualStrings(bucket_cash, bucketSector("Short-Term Investment Vehicle / Registered Fund"));
try std.testing.expectEqualStrings(bucket_cash, bucketSector("Short-Term Investment Vehicle / Private Fund"));
}
test "bucketSector: Repurchase Agreement / * → Cash" {
// PTY-style leverage liability sleeve. Bucket is Cash; the
// negative pct flows through honestly into bucket math.
try std.testing.expectEqualStrings(bucket_cash, bucketSector("Repurchase Agreement / Other"));
}
test "bucketSector: Derivative variants → Other" {
try std.testing.expectEqualStrings(bucket_other, bucketSector("Derivative / Corporate"));
try std.testing.expectEqualStrings(bucket_other, bucketSector("Derivative / Other"));
try std.testing.expectEqualStrings(bucket_other, bucketSector("Derivative-FX / Other"));
try std.testing.expectEqualStrings(bucket_other, bucketSector("Derivative-FX / Corporate"));
}
test "bucketSector: Direct Real Property and Direct Credit Risk → Other" {
try std.testing.expectEqualStrings(bucket_other, bucketSector("Direct Real Property / Other"));
try std.testing.expectEqualStrings(bucket_other, bucketSector("Direct Credit Risk / Other"));
}
test "bucketSector: GICS sector names → Equity" {
const gics = [_][]const u8{
"Technology",
"Healthcare",
"Financial Services",
"Consumer Cyclical",
"Consumer Defensive",
"Energy",
"Utilities",
"Real Estate",
"Industrials",
"Basic Materials",
"Communication Services",
};
for (gics) |s| {
try std.testing.expectEqualStrings(bucket_equity, bucketSector(s));
}
}
test "bucketSector: sentinels and unrecognized → Other" {
try std.testing.expectEqualStrings(bucket_other, bucketSector("TODO"));
try std.testing.expectEqualStrings(bucket_other, bucketSector("Unknown"));
try std.testing.expectEqualStrings(bucket_other, bucketSector(""));
try std.testing.expectEqualStrings(bucket_other, bucketSector("Fintech"));
try std.testing.expectEqualStrings(bucket_other, bucketSector("Some Future Label"));
}
test "bucketSector: returns same pointer for repeated calls (static-string property)" {
// Both callers use the result as a HashMap key. Stability of
// the pointer (not just equality of bytes) is what makes
// this safe without any dupe.
const a = bucketSector("Debt / Corporate");
const b = bucketSector("Debt / US Treasury");
try std.testing.expectEqual(@intFromPtr(a.ptr), @intFromPtr(b.ptr));
try std.testing.expectEqual(@intFromPtr(bucketSector("Equity / Corporate").ptr), @intFromPtr(bucket_equity.ptr));
try std.testing.expectEqual(@intFromPtr(bucketSector("TODO").ptr), @intFromPtr(bucket_other.ptr));
}
test "bucketSector: case-sensitive (defensive — bad input lands in Other, not crash)" {
// We don't normalize case. "debt / corporate" doesn't match
// "Debt / Corporate" so it falls through to Other. Tests the
// contract: only canonical strings are recognized.
try std.testing.expectEqualStrings(bucket_other, bucketSector("debt / corporate"));
try std.testing.expectEqualStrings(bucket_other, bucketSector("EQUITY / CORPORATE"));
}
test "bucketSector: legacy hand-written 'Bonds' → Fixed Income" {
// metadata.srf entries that pre-date EDGAR fund decomposition
// use the literal word `Bonds` as the sector. Map to Fixed
// Income so the Asset Category breakdown picks them up
// alongside the NPORT-P `Debt / *` rows.
try std.testing.expectEqualStrings(bucket_fixed_income, bucketSector("Bonds"));
}
test "bucketSector: legacy hand-written 'Cash' → Cash" {
try std.testing.expectEqualStrings(bucket_cash, bucketSector("Cash"));
}
test "bucketSector: legacy 'Diversified' → Equity (broad equity fund)" {
// "Diversified" in practice means an S&P 500 / total-market
// index fund holding all sectors overwhelmingly equity.
try std.testing.expectEqualStrings(bucket_equity, bucketSector("Diversified"));
}
test "bucketSector: legacy 'Financials' (with s) → Equity" {
// Wikidata's canonical name is "Financial Services"; older
// hand-written entries use "Financials". Both must map to
// Equity so legacy data doesn't silently land in Other.
try std.testing.expectEqualStrings(bucket_equity, bucketSector("Financials"));
try std.testing.expectEqualStrings(bucket_equity, bucketSector("Financial Services"));
}
/// Map an `asset_class` string to one of the four asset-category
/// buckets. Used as a fallback when a classification entry has
/// no `sector` but does have an `asset_class` (legacy
/// hand-written entries for CITs / CUSIPs / blended funds where
/// the user wrote `asset_class::Bonds,pct:num:30` without
/// a sector). Returns `bucket_other` for unrecognized values.
pub fn bucketAssetClass(asset_class: []const u8) []const u8 {
if (std.mem.eql(u8, asset_class, "Bonds")) return bucket_fixed_income;
if (std.mem.eql(u8, asset_class, "Cash")) return bucket_cash;
if (std.mem.eql(u8, asset_class, "Cash & CDs")) return bucket_cash;
// US size buckets and international/EM buckets are all equity.
if (std.mem.eql(u8, asset_class, "US Large Cap")) return bucket_equity;
if (std.mem.eql(u8, asset_class, "US Mid Cap")) return bucket_equity;
if (std.mem.eql(u8, asset_class, "US Small Cap")) return bucket_equity;
if (std.mem.eql(u8, asset_class, "International Developed")) return bucket_equity;
if (std.mem.eql(u8, asset_class, "Emerging Markets")) return bucket_equity;
// Mutual Fund / ETF / Fund are too generic to bucket without
// sector data fall through to Other rather than guess
// wrong. The companion `sector` field should already have
// bucketed these via `bucketSector`; if it didn't, that's a
// metadata-quality signal (TODO sector that needs filling
// in) and Other is the right label.
return bucket_other;
}
// bucketAssetClass
test "bucketAssetClass: Bonds → Fixed Income" {
try std.testing.expectEqualStrings(bucket_fixed_income, bucketAssetClass("Bonds"));
}
test "bucketAssetClass: Cash variants → Cash" {
try std.testing.expectEqualStrings(bucket_cash, bucketAssetClass("Cash"));
try std.testing.expectEqualStrings(bucket_cash, bucketAssetClass("Cash & CDs"));
}
test "bucketAssetClass: US size buckets → Equity" {
try std.testing.expectEqualStrings(bucket_equity, bucketAssetClass("US Large Cap"));
try std.testing.expectEqualStrings(bucket_equity, bucketAssetClass("US Mid Cap"));
try std.testing.expectEqualStrings(bucket_equity, bucketAssetClass("US Small Cap"));
}
test "bucketAssetClass: international + EM → Equity" {
try std.testing.expectEqualStrings(bucket_equity, bucketAssetClass("International Developed"));
try std.testing.expectEqualStrings(bucket_equity, bucketAssetClass("Emerging Markets"));
}
test "bucketAssetClass: generic Fund/ETF/Mutual Fund → Other (not enough info)" {
// The companion `sector` field is what disambiguates Fund-typed
// entries. If sector is missing too, calling these "Equity"
// would be a guess; Other is the honest label that signals
// a metadata-quality issue (sector::TODO needs filling in).
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("Fund"));
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("ETF"));
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("Mutual Fund"));
}
test "bucketAssetClass: unknown / sentinels → Other" {
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass(""));
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("TODO"));
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("Unknown"));
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("Some Future Class"));
}
test "bucketAssetClass: case-sensitive — bad case lands in Other" {
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("bonds"));
try std.testing.expectEqualStrings(bucket_other, bucketAssetClass("US LARGE CAP"));
}
test "bucketAssetClass: returns same pointer for same bucket (static-string property)" {
// Same invariant as bucketSector result is a stable
// HashMap key without dupe.
try std.testing.expectEqual(@intFromPtr(bucketAssetClass("US Large Cap").ptr), @intFromPtr(bucket_equity.ptr));
try std.testing.expectEqual(@intFromPtr(bucketAssetClass("Bonds").ptr), @intFromPtr(bucket_fixed_income.ptr));
try std.testing.expectEqual(@intFromPtr(bucketAssetClass("Cash").ptr), @intFromPtr(bucket_cash.ptr));
try std.testing.expectEqual(@intFromPtr(bucketAssetClass("Fund").ptr), @intFromPtr(bucket_other.ptr));
}
// breakdownSections
test "breakdownSections: returns 6 sections" {
var ac_cat = [_]BreakdownItem{};
var ac = [_]BreakdownItem{};
var sec = [_]BreakdownItem{};
var geo = [_]BreakdownItem{};
var acct = [_]BreakdownItem{};
var tax = [_]BreakdownItem{};
const result = AnalysisResult{
.asset_category = &ac_cat,
.asset_class = &ac,
.sector = &sec,
.geo = &geo,
.account = &acct,
.tax_type = &tax,
.unclassified = &.{},
.total_value = 0,
};
const sections = breakdownSections(&result);
try std.testing.expectEqual(@as(usize, 6), sections.len);
}
test "breakdownSections: titles in expected order, no leading whitespace, unique" {
var ac_cat = [_]BreakdownItem{};
var ac = [_]BreakdownItem{};
var sec = [_]BreakdownItem{};
var geo = [_]BreakdownItem{};
var acct = [_]BreakdownItem{};
var tax = [_]BreakdownItem{};
const result = AnalysisResult{
.asset_category = &ac_cat,
.asset_class = &ac,
.sector = &sec,
.geo = &geo,
.account = &acct,
.tax_type = &tax,
.unclassified = &.{},
.total_value = 0,
};
const sections = breakdownSections(&result);
const expected = [_][]const u8{
"Asset Category",
"Asset Class",
"Sector (Equities)",
"Geographic",
"By Account",
"By Tax Type",
};
for (sections, expected) |s, want| {
try std.testing.expectEqualStrings(want, s.title);
// No leading whitespace baked into the title renderers
// own indent.
try std.testing.expect(s.title.len > 0);
try std.testing.expect(s.title[0] != ' ');
try std.testing.expect(s.title[0] != '\t');
}
// Titles must be unique.
for (sections, 0..) |a, i| {
for (sections[i + 1 ..]) |b| {
try std.testing.expect(!std.mem.eql(u8, a.title, b.title));
}
}
}
test "breakdownSections: items.ptr points to AnalysisResult fields" {
// The single-source-of-truth promise: each section borrows
// from the corresponding AnalysisResult field. Catches anyone
// sliding in a copy or reordering the fields.
var ac_cat = [_]BreakdownItem{
.{ .label = "Equity", .weight = 1.0, .value = 100.0 },
};
var ac = [_]BreakdownItem{
.{ .label = "US Large Cap", .weight = 0.5, .value = 50.0 },
};
var sec = [_]BreakdownItem{};
var geo = [_]BreakdownItem{};
var acct = [_]BreakdownItem{};
var tax = [_]BreakdownItem{};
const result = AnalysisResult{
.asset_category = &ac_cat,
.asset_class = &ac,
.sector = &sec,
.geo = &geo,
.account = &acct,
.tax_type = &tax,
.unclassified = &.{},
.total_value = 100,
};
const sections = breakdownSections(&result);
try std.testing.expectEqual(result.asset_category.ptr, sections[0].items.ptr);
try std.testing.expectEqual(result.asset_class.ptr, sections[1].items.ptr);
try std.testing.expectEqual(result.sector.ptr, sections[2].items.ptr);
try std.testing.expectEqual(result.geo.ptr, sections[3].items.ptr);
try std.testing.expectEqual(result.account.ptr, sections[4].items.ptr);
try std.testing.expectEqual(result.tax_type.ptr, sections[5].items.ptr);
}
test "breakdownSections: Asset Category is first (coarse-to-fine ordering)" {
var ac_cat = [_]BreakdownItem{};
var ac = [_]BreakdownItem{};
var sec = [_]BreakdownItem{};
var geo = [_]BreakdownItem{};
var acct = [_]BreakdownItem{};
var tax = [_]BreakdownItem{};
const result = AnalysisResult{
.asset_category = &ac_cat,
.asset_class = &ac,
.sector = &sec,
.geo = &geo,
.account = &acct,
.tax_type = &tax,
.unclassified = &.{},
.total_value = 0,
};
const sections = breakdownSections(&result);
// Asset Category (4 buckets) is the coarsest view; should
// come first so the user sees the headline number before
// the finer breakdowns.
try std.testing.expectEqualStrings("Asset Category", sections[0].title);
}
// analyzePortfolio: asset_category aggregation
/// Helper: minimal Allocation for asset-category tests. Only
/// the fields read by `analyzePortfolio`'s sector loop matter.
fn mkAlloc(symbol: []const u8, mv: f64) Allocation {
return .{
.symbol = symbol,
.display_symbol = symbol,
.shares = 1,
.avg_cost = mv,
.current_price = mv,
.market_value = mv,
.cost_basis = mv,
.weight = 1.0,
.unrealized_gain_loss = 0.0,
.unrealized_return = 0.0,
};
}
test "analyzePortfolio: multi-sector fund (FAGIX shape) splits asset_category buckets" {
const allocator = std.testing.allocator;
const allocations = [_]Allocation{mkAlloc("FAGIX", 100_000)};
var entries = [_]ClassificationEntry{
.{ .symbol = "FAGIX", .sector = "Debt / Corporate", .pct = 47.69 },
.{ .symbol = "FAGIX", .sector = "Equity / Corporate", .pct = 22.49 },
.{ .symbol = "FAGIX", .sector = "Short-Term Investment Vehicle / Registered Fund", .pct = 13.37 },
.{ .symbol = "FAGIX", .sector = "Loan / Corporate", .pct = 9.99 },
.{ .symbol = "FAGIX", .sector = "Equity Preferred / Corporate", .pct = 3.59 },
};
const cm = ClassificationMap{ .entries = &entries, .allocator = allocator };
const portfolio = Portfolio{ .lots = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&allocations,
cm,
portfolio,
100_000,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
// Find each bucket's value.
var equity_val: f64 = 0;
var fi_val: f64 = 0;
var cash_val: f64 = 0;
for (result.asset_category) |item| {
if (std.mem.eql(u8, item.label, bucket_equity)) equity_val = item.value;
if (std.mem.eql(u8, item.label, bucket_fixed_income)) fi_val = item.value;
if (std.mem.eql(u8, item.label, bucket_cash)) cash_val = item.value;
}
// Equity = 22.49 + 3.59 = 26.08% of $100K = $26,080
try std.testing.expectApproxEqAbs(@as(f64, 26_080), equity_val, 1.0);
// Fixed Income = 47.69 + 9.99 = 57.68% of $100K = $57,680
try std.testing.expectApproxEqAbs(@as(f64, 57_680), fi_val, 1.0);
// Cash = 13.37% of $100K = $13,370
try std.testing.expectApproxEqAbs(@as(f64, 13_370), cash_val, 1.0);
}
test "analyzePortfolio: pure-stock fund (SCHD shape) lands in Equity + tiny Cash" {
const allocator = std.testing.allocator;
const allocations = [_]Allocation{mkAlloc("SCHD", 100_000)};
var entries = [_]ClassificationEntry{
.{ .symbol = "SCHD", .sector = "Equity / Corporate", .pct = 99.70 },
.{ .symbol = "SCHD", .sector = "Short-Term Investment Vehicle / Registered Fund", .pct = 0.19 },
};
const cm = ClassificationMap{ .entries = &entries, .allocator = allocator };
const portfolio = Portfolio{ .lots = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&allocations,
cm,
portfolio,
100_000,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
var equity_val: f64 = 0;
var cash_val: f64 = 0;
for (result.asset_category) |item| {
if (std.mem.eql(u8, item.label, bucket_equity)) equity_val = item.value;
if (std.mem.eql(u8, item.label, bucket_cash)) cash_val = item.value;
}
try std.testing.expectApproxEqAbs(@as(f64, 99_700), equity_val, 1.0);
try std.testing.expectApproxEqAbs(@as(f64, 190), cash_val, 1.0);
}
test "analyzePortfolio: GICS-sectored stock lands in Equity bucket" {
const allocator = std.testing.allocator;
const allocations = [_]Allocation{mkAlloc("NVDA", 50_000)};
var entries = [_]ClassificationEntry{
.{ .symbol = "NVDA", .sector = "Technology" },
};
const cm = ClassificationMap{ .entries = &entries, .allocator = allocator };
const portfolio = Portfolio{ .lots = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&allocations,
cm,
portfolio,
50_000,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
try std.testing.expectEqual(@as(usize, 1), result.asset_category.len);
try std.testing.expectEqualStrings(bucket_equity, result.asset_category[0].label);
try std.testing.expectApproxEqAbs(@as(f64, 50_000), result.asset_category[0].value, 1.0);
}
test "analyzePortfolio: empty portfolio produces empty asset_category" {
const allocator = std.testing.allocator;
const cm = ClassificationMap{ .entries = &.{}, .allocator = allocator };
const portfolio = Portfolio{ .lots = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&.{},
cm,
portfolio,
0,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
try std.testing.expectEqual(@as(usize, 0), result.asset_category.len);
}
test "analyzePortfolio: PTY-shape negative repo flows honestly into Cash bucket" {
// Portfolio has only PTY. Repo line is negative; bucket math
// sums it honestly. Cash bucket value is the (negative)
// repo contribution alone, since this fund has no Cash
// SIV sleeve.
const allocator = std.testing.allocator;
const allocations = [_]Allocation{mkAlloc("PTY", 10_000)};
var entries = [_]ClassificationEntry{
.{ .symbol = "PTY", .sector = "Debt / Corporate", .pct = 41.65 },
.{ .symbol = "PTY", .sector = "Loan / Corporate", .pct = 40.05 },
.{ .symbol = "PTY", .sector = "Equity / Corporate", .pct = 5.78 },
.{ .symbol = "PTY", .sector = "Repurchase Agreement / Other", .pct = -29.72 },
};
const cm = ClassificationMap{ .entries = &entries, .allocator = allocator };
const portfolio = Portfolio{ .lots = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&allocations,
cm,
portfolio,
10_000,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
var cash_val: f64 = 0;
var fi_val: f64 = 0;
var equity_val: f64 = 0;
for (result.asset_category) |item| {
if (std.mem.eql(u8, item.label, bucket_cash)) cash_val = item.value;
if (std.mem.eql(u8, item.label, bucket_fixed_income)) fi_val = item.value;
if (std.mem.eql(u8, item.label, bucket_equity)) equity_val = item.value;
}
// Cash = -29.72% × $10,000 = -$2,972 (honest negative).
try std.testing.expectApproxEqAbs(@as(f64, -2_972), cash_val, 1.0);
// Fixed Income = (41.65 + 40.05)% × $10,000 = $8,170.
try std.testing.expectApproxEqAbs(@as(f64, 8_170), fi_val, 1.0);
// Equity = 5.78% × $10,000 = $578.
try std.testing.expectApproxEqAbs(@as(f64, 578), equity_val, 1.0);
}
test "analyzePortfolio: asset_category includes literal cash + CD totals in Cash bucket" {
// Literal cash and CDs should add to the Cash bucket's
// value, not just Cash & CDs in the asset_class breakdown.
const allocator = std.testing.allocator;
const Lot = @import("../models/portfolio.zig").Lot;
var lots = [_]Lot{
.{
.symbol = "CASH",
.shares = 50_000,
.open_date = Date.fromYmd(2020, 1, 1),
.open_price = 1.0,
.security_type = .cash,
.account = "Brokerage",
},
.{
.symbol = "CD-1",
.shares = 10_000, // face value
.open_date = Date.fromYmd(2024, 1, 1),
.open_price = 1.0,
.security_type = .cd,
.account = "Brokerage",
.maturity_date = Date.fromYmd(2027, 1, 1),
},
};
const portfolio = Portfolio{ .lots = &lots, .allocator = allocator };
const cm = ClassificationMap{ .entries = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&.{},
cm,
portfolio,
60_000,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
var cash_val: f64 = 0;
for (result.asset_category) |item| {
if (std.mem.eql(u8, item.label, bucket_cash)) cash_val = item.value;
}
try std.testing.expectApproxEqAbs(@as(f64, 60_000), cash_val, 1.0);
}
test "analyzePortfolio: legacy entry (asset_class only, no sector) buckets via fallback" {
// Hand-written CIT/CUSIP entries in metadata.srf often have
// `asset_class::Bonds,pct:num:30` with no sector. The
// fallback path through `bucketAssetClass` must pick these
// up so they land in Fixed Income, not Other.
const allocator = std.testing.allocator;
const allocations = [_]Allocation{mkAlloc("LEGACY-CIT", 100_000)};
var entries = [_]ClassificationEntry{
.{ .symbol = "LEGACY-CIT", .asset_class = "Bonds", .pct = 60 },
.{ .symbol = "LEGACY-CIT", .asset_class = "US Large Cap", .pct = 40 },
};
const cm = ClassificationMap{ .entries = &entries, .allocator = allocator };
const portfolio = Portfolio{ .lots = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&allocations,
cm,
portfolio,
100_000,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
var equity_val: f64 = 0;
var fi_val: f64 = 0;
for (result.asset_category) |item| {
if (std.mem.eql(u8, item.label, bucket_equity)) equity_val = item.value;
if (std.mem.eql(u8, item.label, bucket_fixed_income)) fi_val = item.value;
}
// 60% Bonds Fixed Income = $60,000.
try std.testing.expectApproxEqAbs(@as(f64, 60_000), fi_val, 1.0);
// 40% US Large Cap Equity = $40,000.
try std.testing.expectApproxEqAbs(@as(f64, 40_000), equity_val, 1.0);
}
test "analyzePortfolio: sector wins over asset_class when both present" {
// Defensive: we should not double-count. If both fields are
// present, only the sector-based bucket fires.
const allocator = std.testing.allocator;
const allocations = [_]Allocation{mkAlloc("FOO", 100_000)};
var entries = [_]ClassificationEntry{
// sector says Fixed Income (Debt / *), asset_class says
// Equity (US Large Cap). sector should win.
.{ .symbol = "FOO", .sector = "Debt / Corporate", .asset_class = "US Large Cap" },
};
const cm = ClassificationMap{ .entries = &entries, .allocator = allocator };
const portfolio = Portfolio{ .lots = &.{}, .allocator = allocator };
var result = try analyzePortfolio(
allocator,
&allocations,
cm,
portfolio,
100_000,
null,
Date.fromYmd(2024, 6, 1),
);
defer result.deinit(allocator);
// Exactly one row, in Fixed Income.
try std.testing.expectEqual(@as(usize, 1), result.asset_category.len);
try std.testing.expectEqualStrings(bucket_fixed_income, result.asset_category[0].label);
try std.testing.expectApproxEqAbs(@as(f64, 100_000), result.asset_category[0].value, 1.0);
}

View file

@ -65,31 +65,54 @@ pub const PositionReturn = struct {
// Allocation split
/// Result of deriving the stock/bond/unclassified allocation split.
/// Result of deriving the equity / fixed-income / cash / other allocation split.
pub const AllocationSplit = struct {
/// Fraction of portfolio in equities (0.01.0).
/// Fraction of portfolio in equities (0.01.0). Sum of every
/// classification entry whose `bucketSector(sector)` is "Equity",
/// weighted by `entry.pct`.
stock_pct: f64,
/// Fraction of portfolio in bonds + cash + CDs (0.01.0).
/// Fraction of portfolio in fixed income (0.01.0). Excludes
/// cash. The header line displays cash separately as `cash_pct`.
bond_pct: f64,
/// Total market value classified as bonds.
/// Fraction of portfolio in cash + CDs + fund-internal cash
/// equivalents (0.01.0).
cash_pct: f64,
/// Fraction of portfolio in derivatives, real property,
/// sentinels, and unrecognized sectors (0.01.0).
other_pct: f64,
/// Total dollar value classified as fixed income (excludes cash).
bond_value: f64,
/// Total cash + CD face value.
/// Total cash + CD face value + fund-internal cash sleeves.
cash_cd_value: f64,
/// Total market value that could not be classified (no metadata entry).
unclassified_value: f64,
};
/// Derive the stock/bond allocation split from portfolio allocations and
/// classification metadata.
/// Derive the equity / fixed-income / cash / other allocation
/// split from portfolio allocations and classification metadata.
///
/// Positions are classified using `classifications`:
/// - asset_class == "Bonds" bond
/// - Everything else with a classification entry stock
/// - No classification entry unclassified
/// For each allocation, every matching classification entry
/// contributes `market_value × (pct / 100)` into the bucket
/// returned by `analysis.bucketSector(entry.sector)`. This means:
///
/// Cash and CDs are always counted as bonds (fixed-income side).
/// Unclassified positions are reported separately so the caller can
/// decide how to handle them (e.g. treat as stock, warn, etc.).
/// - Multi-sector funds (e.g. FAGIX with 48% Debt / Corporate
/// and 22% Equity / Corporate) split correctly across buckets
/// proportional to their NPORT-P sector decomposition.
/// - Pure-debt funds (VBTLX) land in `bond_pct` even when their
/// `asset_class` is `Fund` rather than `Bonds`.
/// - GICS-sectored stocks (NVDA Technology) land in `stock_pct`.
/// - Derivatives, real property, and sentinel sectors land in
/// `other_pct` and are silently excluded from the binary
/// stock/bond header.
///
/// Negative weights from leveraged funds (PTY's
/// `Repurchase Agreement / Other` repo liability) flow through
/// honestly into bucket math. Diluted across a diversified
/// portfolio, the visual effect is negligible.
///
/// Literal cash + CDs are added directly to `cash_pct` (and
/// `cash_cd_value`). Allocations not found in `classifications`
/// are reported via `unclassified_value`.
pub fn deriveAllocationSplit(
allocations: []const Allocation,
classifications: []const ClassificationEntry,
@ -97,20 +120,37 @@ pub fn deriveAllocationSplit(
cash_value: f64,
cd_value: f64,
) AllocationSplit {
const analysis = @import("analysis.zig");
var stock_value: f64 = 0;
var bond_value: f64 = 0;
var cash_classified_value: f64 = 0;
var other_value: f64 = 0;
var classified_value: f64 = 0;
for (allocations) |a| {
var found = false;
for (classifications) |entry| {
if (std.mem.eql(u8, entry.symbol, a.symbol)) {
found = true;
if (entry.asset_class) |ac| {
if (std.mem.eql(u8, ac, "Bonds")) {
bond_value += a.market_value;
}
}
break;
if (!std.mem.eql(u8, entry.symbol, a.symbol)) continue;
found = true;
const portion = a.market_value * (entry.pct / 100.0);
// Bucket via `sector` if present (richer signal).
// Fall back to `asset_class` for legacy hand-written
// entries with no sector. Last resort: Other.
const bucket = if (entry.sector) |s|
analysis.bucketSector(s)
else if (entry.asset_class) |ac|
analysis.bucketAssetClass(ac)
else
analysis.bucket_other;
if (std.mem.eql(u8, bucket, analysis.bucket_equity)) {
stock_value += portion;
} else if (std.mem.eql(u8, bucket, analysis.bucket_fixed_income)) {
bond_value += portion;
} else if (std.mem.eql(u8, bucket, analysis.bucket_cash)) {
cash_classified_value += portion;
} else {
other_value += portion;
}
}
if (found) {
@ -118,20 +158,34 @@ pub fn deriveAllocationSplit(
}
}
const cash_cd_value = cash_value + cd_value;
const bond_plus_cash = bond_value + cash_cd_value;
const literal_cash = cash_value + cd_value;
const total_cash = cash_classified_value + literal_cash;
// Unclassified = allocations not found in classifications (options, new positions, etc.)
// Note: cash/CDs are not in allocations, so total_value includes them separately.
const unclassified_value = total_value - classified_value - cash_cd_value;
const unclassified_value = total_value - classified_value - literal_cash;
const stock_pct = if (total_value > 0) (total_value - bond_plus_cash - @max(unclassified_value, 0)) / total_value else 0.75;
const bond_pct = if (total_value > 0) bond_plus_cash / total_value else 0.25;
if (total_value <= 0) {
// Empty portfolio: fall back to a sensible default for
// benchmark blending math (75/25 stock/bond).
return .{
.stock_pct = 0.75,
.bond_pct = 0.25,
.cash_pct = 0,
.other_pct = 0,
.bond_value = 0,
.cash_cd_value = 0,
.unclassified_value = 0,
};
}
return .{
.stock_pct = stock_pct,
.bond_pct = bond_pct,
.stock_pct = stock_value / total_value,
.bond_pct = bond_value / total_value,
.cash_pct = total_cash / total_value,
.other_pct = other_value / total_value,
.bond_value = bond_value,
.cash_cd_value = cash_cd_value,
.cash_cd_value = total_cash,
.unclassified_value = @max(unclassified_value, 0),
};
}
@ -617,24 +671,28 @@ fn makeAlloc(symbol: []const u8, mv: f64, weight: f64) Allocation {
};
}
test "deriveAllocationSplit basic stock/bond split" {
test "deriveAllocationSplit basic stock/bond split via sector" {
// BND has Debt sector Fixed Income bucket. SPY/AAPL have
// GICS sectors Equity bucket. Cash/CDs add to cash_pct.
const allocs = [_]Allocation{
makeAlloc("SPY", 700_000, 0.70),
makeAlloc("AAPL", 100_000, 0.10),
makeAlloc("BND", 150_000, 0.15),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "SPY", .asset_class = "US Large Cap" },
.{ .symbol = "AAPL", .asset_class = "US Large Cap" },
.{ .symbol = "BND", .asset_class = "Bonds" },
.{ .symbol = "SPY", .sector = "Financial Services" },
.{ .symbol = "AAPL", .sector = "Technology" },
.{ .symbol = "BND", .sector = "Debt / Corporate" },
};
const result = deriveAllocationSplit(&allocs, &classes, 1_000_000, 40_000, 10_000);
// Bonds: BND $150K + cash $40K + CD $10K = $200K 20%
// Bonds: BND $150K 15%
try std.testing.expectApproxEqAbs(@as(f64, 150_000), result.bond_value, 1.0);
try std.testing.expectApproxEqAbs(@as(f64, 0.15), result.bond_pct, 0.01);
// Cash: $40K + $10K = $50K 5%
try std.testing.expectApproxEqAbs(@as(f64, 50_000), result.cash_cd_value, 1.0);
try std.testing.expectApproxEqAbs(@as(f64, 0.20), result.bond_pct, 0.01);
// Stock: $800K 80% (no unclassified since all are in metadata)
try std.testing.expectApproxEqAbs(@as(f64, 0.05), result.cash_pct, 0.01);
// Stock: SPY $700K + AAPL $100K = $800K 80%
try std.testing.expectApproxEqAbs(@as(f64, 0.80), result.stock_pct, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.unclassified_value, 1.0);
}
@ -645,20 +703,20 @@ test "deriveAllocationSplit with unclassified positions" {
makeAlloc("MYSTERY", 100_000, 0.10),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "SPY", .asset_class = "US Large Cap" },
.{ .symbol = "SPY", .sector = "Financial Services" },
// MYSTERY has no classification entry
};
const result = deriveAllocationSplit(&allocs, &classes, 800_000, 50_000, 50_000);
// Bonds: $0 + cash $50K + CD $50K = $100K
// Cash: $50K + $50K = $100K 12.5%
try std.testing.expectApproxEqAbs(@as(f64, 100_000), result.cash_cd_value, 1.0);
try std.testing.expectApproxEqAbs(@as(f64, 0.125), result.cash_pct, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.bond_value, 1.0);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.bond_pct, 0.01);
// Unclassified: MYSTERY $100K
try std.testing.expectApproxEqAbs(@as(f64, 100_000), result.unclassified_value, 1.0);
// Stock: $800K - $100K bonds - $100K unclassified = $600K 75%
// Stock: SPY $600K 75%
try std.testing.expectApproxEqAbs(@as(f64, 0.75), result.stock_pct, 0.01);
// Bond pct: $100K / $800K = 12.5%
try std.testing.expectApproxEqAbs(@as(f64, 0.125), result.bond_pct, 0.01);
}
test "deriveAllocationSplit empty portfolio" {
@ -666,9 +724,11 @@ test "deriveAllocationSplit empty portfolio" {
const classes = [_]ClassificationEntry{};
const result = deriveAllocationSplit(&allocs, &classes, 0, 0, 0);
// Default fallback
// Default fallback for blending math
try std.testing.expectApproxEqAbs(@as(f64, 0.75), result.stock_pct, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.25), result.bond_pct, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.cash_pct, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.other_pct, 0.01);
}
test "deriveAllocationSplit no metadata" {
@ -679,12 +739,163 @@ test "deriveAllocationSplit no metadata" {
const classes = [_]ClassificationEntry{}; // no metadata at all
const result = deriveAllocationSplit(&allocs, &classes, 1_000_000, 100_000, 100_000);
// Everything is unclassified except cash/CDs
// Cash: $200K 20%
try std.testing.expectApproxEqAbs(@as(f64, 200_000), result.cash_cd_value, 1.0);
try std.testing.expectApproxEqAbs(@as(f64, 0.20), result.cash_pct, 0.01);
// Everything except cash is unclassified
try std.testing.expectApproxEqAbs(@as(f64, 800_000), result.unclassified_value, 1.0);
// Stock = total - bonds - unclassified = $1M - $200K - $800K = $0 0%
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.stock_pct, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.20), result.bond_pct, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.bond_pct, 0.01);
}
test "deriveAllocationSplit: pure-debt fund with asset_class Fund maps via sector" {
// VBTLX shape: asset_class is "Fund" (not "Bonds"), but every
// sector entry is a Debt / * variant. Should land entirely
// in bond_pct via bucketSector(sector), not in unclassified.
const allocs = [_]Allocation{
makeAlloc("VBTLX", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "VBTLX", .asset_class = "Fund", .sector = "Debt / Corporate", .pct = 60.0 },
.{ .symbol = "VBTLX", .asset_class = "Fund", .sector = "Debt / US Treasury", .pct = 30.0 },
.{ .symbol = "VBTLX", .asset_class = "Fund", .sector = "Debt / Municipal", .pct = 10.0 },
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
try std.testing.expectApproxEqAbs(@as(f64, 1.0), result.bond_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.stock_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.cash_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 100_000), result.bond_value, 1.0);
}
test "deriveAllocationSplit: pure-equity fund maps via sector" {
// SCHD shape: 99.7% Equity / Corporate + 0.19% short-term
// investment vehicle. Stock_pct 0.997, cash_pct 0.0019.
const allocs = [_]Allocation{
makeAlloc("SCHD", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "SCHD", .asset_class = "Fund", .sector = "Equity / Corporate", .pct = 99.70 },
.{ .symbol = "SCHD", .asset_class = "Fund", .sector = "Short-Term Investment Vehicle / Registered Fund", .pct = 0.19 },
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
try std.testing.expectApproxEqAbs(@as(f64, 0.997), result.stock_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0019), result.cash_pct, 0.0005);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.bond_pct, 0.001);
}
test "deriveAllocationSplit: multi-asset fund splits across buckets" {
// FAGIX-shape: ~48% Debt + ~22% Equity + others. Should split
// across stock_pct, bond_pct, cash_pct rather than landing
// entirely in one bucket.
const allocs = [_]Allocation{
makeAlloc("FAGIX", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "FAGIX", .asset_class = "Fund", .sector = "Debt / Corporate", .pct = 47.69 },
.{ .symbol = "FAGIX", .asset_class = "Fund", .sector = "Equity / Corporate", .pct = 22.49 },
.{ .symbol = "FAGIX", .asset_class = "Fund", .sector = "Short-Term Investment Vehicle / Registered Fund", .pct = 13.37 },
.{ .symbol = "FAGIX", .asset_class = "Fund", .sector = "Loan / Corporate", .pct = 9.99 },
.{ .symbol = "FAGIX", .asset_class = "Fund", .sector = "Equity Preferred / Corporate", .pct = 3.59 },
.{ .symbol = "FAGIX", .asset_class = "Fund", .sector = "Equity / Registered Fund", .pct = 2.38 },
.{ .symbol = "FAGIX", .asset_class = "Fund", .sector = "Asset-Backed CBO/CDO / Corporate", .pct = 0.32 },
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
// Equity: 22.49 + 3.59 + 2.38 = 28.46%
try std.testing.expectApproxEqAbs(@as(f64, 0.2846), result.stock_pct, 0.001);
// Fixed Income: 47.69 + 9.99 + 0.32 = 58.00%
try std.testing.expectApproxEqAbs(@as(f64, 0.5800), result.bond_pct, 0.001);
// Cash: 13.37%
try std.testing.expectApproxEqAbs(@as(f64, 0.1337), result.cash_pct, 0.001);
}
test "deriveAllocationSplit: PTY-shape leveraged fund honestly sums negative repo" {
// PTY uses ~30% repo leverage. The negative pct flows
// through honestly into the Cash bucket (Repurchase
// Agreement Cash); the long sleeves stay positive.
const allocs = [_]Allocation{
makeAlloc("PTY", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "PTY", .asset_class = "Fund", .sector = "Debt / Corporate", .pct = 41.65 },
.{ .symbol = "PTY", .asset_class = "Fund", .sector = "Loan / Corporate", .pct = 40.05 },
.{ .symbol = "PTY", .asset_class = "Fund", .sector = "Equity / Corporate", .pct = 5.78 },
.{ .symbol = "PTY", .asset_class = "Fund", .sector = "Repurchase Agreement / Other", .pct = -29.72 },
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
// Bond bucket: 41.65 + 40.05 = 81.70%
try std.testing.expectApproxEqAbs(@as(f64, 0.8170), result.bond_pct, 0.001);
// Stock bucket: 5.78%
try std.testing.expectApproxEqAbs(@as(f64, 0.0578), result.stock_pct, 0.001);
// Cash bucket: -29.72% (honest, negative).
try std.testing.expectApproxEqAbs(@as(f64, -0.2972), result.cash_pct, 0.001);
}
test "deriveAllocationSplit: derivatives go into Other (excluded from header math)" {
const allocs = [_]Allocation{
makeAlloc("FOO", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "FOO", .asset_class = "Fund", .sector = "Equity / Corporate", .pct = 90.0 },
.{ .symbol = "FOO", .asset_class = "Fund", .sector = "Derivative / Corporate", .pct = 10.0 },
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
try std.testing.expectApproxEqAbs(@as(f64, 0.90), result.stock_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.10), result.other_pct, 0.001);
// Bonds and cash unaffected.
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.bond_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.cash_pct, 0.001);
}
test "deriveAllocationSplit: legacy entry with asset_class only (no sector) buckets via fallback" {
// Hand-written `metadata.srf` entries pre-EDGAR-decomposition
// sometimes have asset_class but no sector. The fallback
// path through `bucketAssetClass` should bucket these
// correctly rather than dumping them in Other.
const allocs = [_]Allocation{
makeAlloc("LEGACY", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "LEGACY", .asset_class = "US Large Cap" },
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
try std.testing.expectApproxEqAbs(@as(f64, 1.0), result.stock_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.other_pct, 0.001);
}
test "deriveAllocationSplit: legacy asset_class::Bonds (no sector) maps to Fixed Income" {
const allocs = [_]Allocation{
makeAlloc("LEGACY", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "LEGACY", .asset_class = "Bonds" },
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
try std.testing.expectApproxEqAbs(@as(f64, 1.0), result.bond_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.other_pct, 0.001);
}
test "deriveAllocationSplit: entry with neither sector nor asset_class lands in Other" {
// Genuinely unclassifiable: classification entry exists for
// the symbol but has neither a sector nor an asset_class.
// No fallback path is possible; goes to Other.
const allocs = [_]Allocation{
makeAlloc("BARE", 100_000, 1.0),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "BARE", .geo = "US" }, // no sector, no asset_class
};
const result = deriveAllocationSplit(&allocs, &classes, 100_000, 0, 0);
try std.testing.expectApproxEqAbs(@as(f64, 1.0), result.other_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.stock_pct, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 0.0), result.bond_pct, 0.001);
}
test "deriveAllocationSplit stock and bond pct sum with unclassified" {
@ -694,12 +905,16 @@ test "deriveAllocationSplit stock and bond pct sum with unclassified" {
makeAlloc("NEW", 50_000, 0.05),
};
const classes = [_]ClassificationEntry{
.{ .symbol = "SPY", .asset_class = "US Large Cap" },
.{ .symbol = "BND", .asset_class = "Bonds" },
.{ .symbol = "SPY", .sector = "Financial Services" },
.{ .symbol = "BND", .sector = "Debt / Corporate" },
};
const result = deriveAllocationSplit(&allocs, &classes, 1_000_000, 200_000, 50_000);
// stock + bond + unclassified/total should account for everything
// stock + bond + cash + other + unclassified should account for everything
const unclass_pct = result.unclassified_value / 1_000_000;
try std.testing.expectApproxEqAbs(@as(f64, 1.0), result.stock_pct + result.bond_pct + unclass_pct, 0.01);
try std.testing.expectApproxEqAbs(
@as(f64, 1.0),
result.stock_pct + result.bond_pct + result.cash_pct + result.other_pct + unclass_pct,
0.01,
);
}

10
src/cache/store.zig vendored
View file

@ -187,9 +187,9 @@ pub const DataType = enum {
/// has one shared facts file.
entity_facts,
/// EDGAR's `company_tickers_mf.json` index, cached at
/// `<cache_dir>/_edgar/tickers_funds.srf`. Single-record file
/// (one MutualFundTickerMapBlob) under a synthetic `_edgar` key.
/// Updated daily upstream; refreshes monthly with jitter.
/// `<cache_dir>/_edgar/tickers_funds.srf` as a slice of
/// `MutualFundTickerEntry` records under a synthetic `_edgar`
/// key. Updated daily upstream; refreshes monthly with jitter.
tickers_funds,
/// EDGAR's `company_tickers.json` index, cached at
/// `<cache_dir>/_edgar/tickers_companies.srf`. Same shape as
@ -269,8 +269,8 @@ pub const Store = struct {
Wikidata.ClassificationRecord => .classification,
Edgar.EtfMetricRecord => .etf_metrics,
Edgar.EntityFactRecord => .entity_facts,
Edgar.MutualFundTickerMapBlob => .tickers_funds,
Edgar.CompanyTickerMapBlob => .tickers_companies,
Edgar.MutualFundTickerEntry => .tickers_funds,
Edgar.CompanyTickerEntry => .tickers_companies,
else => @compileError("unsupported type for Store"),
};
}

View file

@ -133,35 +133,39 @@ pub fn run(ctx: *framework.RunCtx, _: ParsedArgs) !void {
else
anchor_path;
try display(result, split.stock_pct, split.bond_pct, pf_data.summary.total_value, display_label, color, out);
try display(result, split.stock_pct, split.bond_pct, split.cash_pct, pf_data.summary.total_value, display_label, color, out);
}
pub fn display(result: zfin.analysis.AnalysisResult, stock_pct: f64, bond_pct: f64, total_value: f64, file_path: []const u8, color: bool, out: *std.Io.Writer) !void {
fn display(result: zfin.analysis.AnalysisResult, stock_pct: f64, bond_pct: f64, cash_pct: f64, total_value: f64, file_path: []const u8, color: bool, out: *std.Io.Writer) !void {
const label_width = fmt.analysis_label_width;
const bar_width = fmt.analysis_bar_width;
try cli.printBold(out, color, "\nPortfolio Analysis ({s})\n", .{file_path});
try out.print("========================================\n\n", .{});
// Equities vs Fixed Income summary
// Equities / Fixed Income / Cash header summary. The Other
// bucket (derivatives, real property, sentinels) is excluded
// from this header but appears as its own row in the
// Asset Category breakdown below.
{
try cli.printFg(out, color, cli.CLR_MUTED, " Equities {d:.1}% ({f}) / Fixed Income {d:.1}% ({f})\n\n", .{ stock_pct * 100, Money.from(stock_pct * total_value), bond_pct * 100, Money.from(bond_pct * total_value) });
try cli.printFg(out, color, cli.CLR_MUTED, " Equities {d:.1}% ({f}) / Fixed Income {d:.1}% ({f}) / Cash {d:.1}% ({f})\n\n", .{
stock_pct * 100,
Money.from(stock_pct * total_value),
bond_pct * 100,
Money.from(bond_pct * total_value),
cash_pct * 100,
Money.from(cash_pct * total_value),
});
}
const sections = [_]struct { items: []const zfin.analysis.BreakdownItem, title: []const u8 }{
.{ .items = result.asset_class, .title = " Asset Class" },
.{ .items = result.sector, .title = " Sector (Equities)" },
.{ .items = result.geo, .title = " Geographic" },
.{ .items = result.account, .title = " By Account" },
.{ .items = result.tax_type, .title = " By Tax Type" },
};
const sections = zfin.analysis.breakdownSections(&result);
for (sections, 0..) |sec, si| {
if (si > 0 and sec.items.len == 0) continue;
if (si > 0) try out.print("\n", .{});
// Bold + header color reset at end of printFg clears both.
try cli.setBold(out, color);
try cli.printFg(out, color, cli.CLR_HEADER, "{s}\n", .{sec.title});
try cli.printFg(out, color, cli.CLR_HEADER, " {s}\n", .{sec.title});
try printBreakdownSection(out, sec.items, label_width, bar_width, color);
}
@ -274,6 +278,11 @@ test "printBreakdownSection with color emits ANSI" {
test "display shows all sections" {
var buf: [8192]u8 = undefined;
var w: std.Io.Writer = .fixed(&buf);
const asset_category = [_]zfin.analysis.BreakdownItem{
.{ .label = "Equity", .weight = 0.80, .value = 80000.0 },
.{ .label = "Fixed Income", .weight = 0.15, .value = 15000.0 },
.{ .label = "Cash", .weight = 0.05, .value = 5000.0 },
};
const asset_class = [_]zfin.analysis.BreakdownItem{
.{ .label = "US Large Cap", .weight = 0.60, .value = 60000.0 },
.{ .label = "International", .weight = 0.40, .value = 40000.0 },
@ -287,6 +296,7 @@ test "display shows all sections" {
const empty = [_]zfin.analysis.BreakdownItem{};
const unclassified = [_][]const u8{"WEIRD"};
const result: zfin.analysis.AnalysisResult = .{
.asset_category = @constCast(&asset_category),
.asset_class = @constCast(&asset_class),
.sector = @constCast(&sector),
.geo = @constCast(&geo),
@ -295,9 +305,14 @@ test "display shows all sections" {
.unclassified = @constCast(&unclassified),
.total_value = 100000.0,
};
try display(result, 0.80, 0.20, 100000.0, "test.srf", false, &w);
try display(result, 0.80, 0.15, 0.05, 100000.0, "test.srf", false, &w);
const out = w.buffered();
try std.testing.expect(std.mem.indexOf(u8, out, "Portfolio Analysis") != null);
// 3-up header includes Cash.
try std.testing.expect(std.mem.indexOf(u8, out, "Equities 80.0%") != null);
try std.testing.expect(std.mem.indexOf(u8, out, "Fixed Income 15.0%") != null);
try std.testing.expect(std.mem.indexOf(u8, out, "Cash 5.0%") != null);
try std.testing.expect(std.mem.indexOf(u8, out, "Asset Category") != null);
try std.testing.expect(std.mem.indexOf(u8, out, "Asset Class") != null);
try std.testing.expect(std.mem.indexOf(u8, out, "US Large Cap") != null);
try std.testing.expect(std.mem.indexOf(u8, out, "Sector") != null);

File diff suppressed because it is too large Load diff

View file

@ -11,19 +11,23 @@ pub const ParsedArgs = struct {
pub const meta: framework.Meta = .{
.name = "etf",
.group = .symbol_lookup,
.synopsis = "Show ETF profile (holdings, sectors, expense ratio)",
.synopsis = "Show ETF profile (holdings, sectors, AUM, inception)",
.uppercase_first_arg = true,
.help =
\\Usage: zfin etf <SYMBOL>
\\
\\Show the ETF profile (expense ratio, AUM, dividend yield,
\\sector allocation, top holdings) for a fund symbol from
\\Alpha Vantage. Cached for 30 days. Leveraged funds are
\\flagged in red.
\\Show the ETF profile for a fund symbol, assembled from
\\public SEC EDGAR (NPORT-P holdings + sectors + AUM) and
\\Wikidata (inception date + fund name). Cached for ~90 days.
\\
\\Several legacy fields (expense ratio, dividend yield,
\\portfolio turnover, leveraged flag) come from a fund's
\\prospectus and are not currently surfaced — those will
\\appear once a prospectus parser lands.
\\
\\Examples:
\\ zfin etf VTI # broad market index
\\ zfin etf TQQQ # leveraged (warning surfaced)
\\ zfin etf SPY # S&P 500 ETF
\\
,
.user_errors = error{ MissingSymbol, UnexpectedArg },
@ -46,11 +50,17 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
const opts = cli.fetchOptionsFromPolicy(ctx.globals.refresh_policy);
const result = svc.getEtfProfile(parsed.symbol, opts) catch |err| switch (err) {
zfin.DataError.NoApiKey => {
cli.stderrPrint(ctx.io, "Error: ALPHAVANTAGE_API_KEY not set. Get a free key at https://alphavantage.co\n");
cli.stderrPrint(ctx.io, "Error: ZFIN_USER_EMAIL not set. Add it to .env (SEC EDGAR requires a contact email in the User-Agent header).\n");
return;
},
zfin.DataError.NotFound => {
cli.stderrPrint(ctx.io, "Error: symbol not found in EDGAR. Either it's not an ETF/fund, or the ticker map needs refreshing.\n");
return;
},
else => {
cli.stderrPrint(ctx.io, "Error fetching ETF profile.\n");
var buf: [128]u8 = undefined;
const msg = std.fmt.bufPrint(&buf, "Error fetching ETF profile ({t}).\n", .{err}) catch "Error fetching ETF profile.\n";
cli.stderrPrint(ctx.io, msg);
return;
},
};

View file

@ -225,14 +225,17 @@ pub fn fmtTimeAgo(buf: []u8, before_s: i64, after_s: i64) []const u8 {
/// Format large numbers with T/B/M suffixes (e.g. "1.5B", "45.6M").
pub fn fmtLargeNum(val: f64) [15]u8 {
var result: [15]u8 = @splat(' ');
// bufPrint can only fail with NoSpaceLeft, which is impossible
// here: a 15-byte buffer comfortably holds any "{d:.1}<X>" value
// with X in {T,B,M} or "{d:.0}" for under-million values.
if (val >= 1_000_000_000_000) {
_ = std.fmt.bufPrint(&result, "{d:.1}T", .{val / 1_000_000_000_000}) catch {};
_ = std.fmt.bufPrint(&result, "{d:.1}T", .{val / 1_000_000_000_000}) catch |err| std.debug.panic("fmtLargeNum buffer too small: {t}", .{err});
} else if (val >= 1_000_000_000) {
_ = std.fmt.bufPrint(&result, "{d:.1}B", .{val / 1_000_000_000}) catch {};
_ = std.fmt.bufPrint(&result, "{d:.1}B", .{val / 1_000_000_000}) catch |err| std.debug.panic("fmtLargeNum buffer too small: {t}", .{err});
} else if (val >= 1_000_000) {
_ = std.fmt.bufPrint(&result, "{d:.1}M", .{val / 1_000_000}) catch {};
_ = std.fmt.bufPrint(&result, "{d:.1}M", .{val / 1_000_000}) catch |err| std.debug.panic("fmtLargeNum buffer too small: {t}", .{err});
} else {
_ = std.fmt.bufPrint(&result, "{d:.0}", .{val}) catch {};
_ = std.fmt.bufPrint(&result, "{d:.0}", .{val}) catch |err| std.debug.panic("fmtLargeNum buffer too small: {t}", .{err});
}
return result;
}
@ -877,6 +880,11 @@ pub fn computeBrailleChart(
const price_range = max_price - min_price;
// Price labels
// SAFETY: every field of `result` is initialized below before
// it is read or returned. Treating it as `undefined` here is
// a deliberate "stack-allocate, then write each field"
// pattern Zig requires the variable to exist before
// bufPrint can take a slice of one of its fields.
var result: BrailleChart = undefined;
const max_str = std.fmt.bufPrint(&result.max_label, "{f}", .{Money.from(max_price)}) catch "";
result.max_label_len = max_str.len;
@ -1465,6 +1473,34 @@ test "buildBlockBar" {
try std.testing.expectEqual(@as(usize, 20), half.len);
}
test "buildBlockBar: negative weight clamps to empty bar (no crash)" {
// NPORT-P emits negative pct values for leveraged-fund
// liability sleeves (e.g. PTY's repurchase agreement at
// -29.72%). After portfolio-wide aggregation and dilution
// these tend to produce small-magnitude negative weights in
// the Sector breakdown. The renderer must handle them
// safely render as a 0-width (all-spaces) bar with no
// panic on @intFromFloat.
var buf: [256]u8 = undefined;
const small_neg = buildBlockBar(&buf, -0.003, 10);
try std.testing.expectEqual(@as(usize, 10), small_neg.len);
try std.testing.expectEqualStrings(" ", small_neg);
const large_neg = buildBlockBar(&buf, -1.5, 10);
try std.testing.expectEqual(@as(usize, 10), large_neg.len);
try std.testing.expectEqualStrings(" ", large_neg);
}
test "buildBlockBar: weight > 1.0 clamps to full bar (no overflow)" {
// Symmetric defensive case: if for any reason the caller
// hands us a weight above 1.0 (e.g. the per-fund rather than
// per-portfolio side of the math), the bar should clamp
// rather than write past `total_chars`.
var buf: [256]u8 = undefined;
const overshoot = buildBlockBar(&buf, 1.5, 10);
try std.testing.expectEqual(@as(usize, 30), overshoot.len);
}
test "fmtHistoricalChange" {
var buf: [16]u8 = undefined;
try std.testing.expectEqualStrings("--", fmtHistoricalChange(&buf, 0, 0));

View file

@ -96,7 +96,8 @@ const usage_footer =
\\ TWELVEDATA_API_KEY Twelve Data API key (primary: prices)
\\ POLYGON_API_KEY Polygon.io API key (dividends, splits)
\\ FMP_API_KEY Financial Modeling Prep API key (earnings)
\\ ALPHAVANTAGE_API_KEY Alpha Vantage API key (ETF profiles)
\\ ZFIN_USER_EMAIL Contact email for SEC EDGAR + Wikidata User-Agent
\\ (required for ETF profiles + portfolio enrichment)
\\ OPENFIGI_API_KEY OpenFIGI API key (CUSIP lookup, optional)
\\ ZFIN_CACHE_DIR Cache directory (default: ~/.cache/zfin)
\\ ZFIN_HOME User file directory (portfolio, watchlist, .env)

View file

@ -102,3 +102,180 @@ test "parse classification file" {
try std.testing.expectEqualStrings("US Large Cap", cm.entries[1].asset_class.?);
try std.testing.expectApproxEqAbs(@as(f64, 55.0), cm.entries[1].pct, 0.01);
}
// ClassificationRecord
//
// Distinct from `ClassificationEntry` above: that one represents
// a row in the user's `metadata.srf` (already-curated portfolio
// data). `ClassificationRecord` is the upstream-fetched
// per-symbol shape that flows OUT of `DataService.getClassification`.
// `enrich` reads it to write the metadata.srf row that becomes
// a `ClassificationEntry` later.
//
// Lives here (not in `providers/Wikidata.zig`) because the shape
// is provider-agnostic: any future classification source (FMP,
// Alpha Vantage, hand-written) populates the same record. The
// fact that today the only producer is Wikidata is incidental.
/// A single fetched classification result for one symbol.
///
/// All optional fields default to `null`; populators only set
/// the fields they have data for. The `source` field always
/// emits per the project's source-pure invariant.
pub const ClassificationRecord = struct {
symbol: []const u8, // owned
name: ?[]const u8 = null, // owned
sector: ?[]const u8 = null, // owned
industry: ?[]const u8 = null, // owned
/// ISO-3166 alpha-2 country code (e.g. "US", "GB", "DE").
country: ?[]const u8 = null, // owned
asset_class: ?[]const u8 = null, // owned
is_etf: bool = false,
/// YYYY-MM-DD; trimmed from upstream's ISO-8601 date.
inception_date: ?[]const u8 = null, // owned
/// Wikidata's P5531 the SEC CIK as a digit string. Already
/// zero-padded to 10 digits, matching the project-wide CIK
/// normalization convention.
cik: ?[]const u8 = null, // owned
/// YYYY-MM-DD when this provider ran, NOT when upstream last
/// updated the underlying entity.
as_of: []const u8, // owned
source: []const u8, // no default provenance always emitted
pub fn deinit(self: ClassificationRecord, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
if (self.name) |s| allocator.free(s);
if (self.sector) |s| allocator.free(s);
if (self.industry) |s| allocator.free(s);
if (self.country) |s| allocator.free(s);
if (self.asset_class) |s| allocator.free(s);
if (self.inception_date) |s| allocator.free(s);
if (self.cik) |s| allocator.free(s);
allocator.free(self.as_of);
allocator.free(self.source);
}
/// Free a slice of records, calling deinit on each element first.
pub fn freeSlice(allocator: std.mem.Allocator, recs: []const ClassificationRecord) void {
for (recs) |r| r.deinit(allocator);
allocator.free(recs);
}
};
// Geographic taxonomy
/// Geo-bucket constants used by the country geo lookup. Kept
/// as named constants (rather than inline string literals in the
/// map) so callers can reference them without typo risk and the
/// taxonomy is tweakable in one place.
pub const geo = struct {
pub const us = "US";
pub const developed = "International Developed";
pub const emerging = "Emerging Markets";
pub const unknown = "Unknown";
};
/// Country-code-to-geo-bucket lookup. Producers (Wikidata today,
/// others tomorrow) hand us ISO-3166 alpha-2 codes via the
/// `ClassificationRecord.country` field; we map them to the geo
/// taxonomy (`geo.us` / `geo.developed` / `geo.emerging` /
/// `geo.unknown`).
///
/// MSCI conventions used as the developed/emerging split. Taiwan
/// and South Korea are MSCI-emerging despite FTSE classifying
/// them developed. Israel is MSCI-developed (upgraded 2010).
/// Canada is folded into International Developed (some users
/// prefer separate Canada bucket; override in `metadata.srf`).
const country_to_geo = std.StaticStringMap([]const u8).initComptime(.{
// United States
.{ "US", geo.us },
// Alpha-3 fallback for entries that use the longer form.
.{ "USA", geo.us },
// International Developed Europe ex-CIS
.{ "GB", geo.developed },
.{ "DE", geo.developed },
.{ "FR", geo.developed },
.{ "NL", geo.developed },
.{ "CH", geo.developed },
.{ "SE", geo.developed },
.{ "DK", geo.developed },
.{ "NO", geo.developed },
.{ "FI", geo.developed },
.{ "IT", geo.developed },
.{ "ES", geo.developed },
.{ "BE", geo.developed },
.{ "AT", geo.developed },
.{ "IE", geo.developed },
.{ "LU", geo.developed },
.{ "PT", geo.developed },
.{ "GR", geo.developed },
.{ "IS", geo.developed },
// International Developed Asia-Pacific + Israel + Canada
.{ "JP", geo.developed },
.{ "AU", geo.developed },
.{ "NZ", geo.developed },
.{ "SG", geo.developed },
.{ "HK", geo.developed },
.{ "IL", geo.developed },
.{ "CA", geo.developed },
// Emerging Markets (MSCI)
.{ "CN", geo.emerging },
.{ "TW", geo.emerging },
.{ "KR", geo.emerging },
.{ "IN", geo.emerging },
.{ "BR", geo.emerging },
.{ "MX", geo.emerging },
.{ "RU", geo.emerging },
.{ "TR", geo.emerging },
.{ "ZA", geo.emerging },
.{ "TH", geo.emerging },
.{ "MY", geo.emerging },
.{ "ID", geo.emerging },
.{ "PH", geo.emerging },
.{ "VN", geo.emerging },
.{ "AR", geo.emerging },
.{ "CL", geo.emerging },
.{ "CO", geo.emerging },
.{ "PE", geo.emerging },
.{ "EG", geo.emerging },
});
/// Map an ISO-3166 alpha-2 country code to one of the geo
/// buckets. Null/empty input or an unknown code returns
/// `geo.unknown` so the user can override in `metadata.srf`.
pub fn geoFor(iso2: ?[]const u8) []const u8 {
const code = iso2 orelse return geo.unknown;
if (code.len == 0) return geo.unknown;
return country_to_geo.get(code) orelse geo.unknown;
}
test "geoFor maps known ISO-3166 codes to bucket" {
try std.testing.expectEqualStrings(geo.us, geoFor("US"));
try std.testing.expectEqualStrings(geo.us, geoFor("USA"));
try std.testing.expectEqualStrings(geo.developed, geoFor("GB"));
try std.testing.expectEqualStrings(geo.developed, geoFor("DE"));
try std.testing.expectEqualStrings(geo.developed, geoFor("CA"));
try std.testing.expectEqualStrings(geo.developed, geoFor("IL"));
try std.testing.expectEqualStrings(geo.emerging, geoFor("CN"));
try std.testing.expectEqualStrings(geo.emerging, geoFor("TW"));
try std.testing.expectEqualStrings(geo.emerging, geoFor("KR"));
}
test "geoFor returns Unknown for null/empty/unmapped" {
try std.testing.expectEqualStrings(geo.unknown, geoFor(null));
try std.testing.expectEqualStrings(geo.unknown, geoFor(""));
try std.testing.expectEqualStrings(geo.unknown, geoFor("ZZ")); // unassigned ISO-2
try std.testing.expectEqualStrings(geo.unknown, geoFor("XX"));
}
test "geo bucket labels are stable strings (not byte copies)" {
// Callers stash these in HashMap keys without duping.
// Verify the literal-pointer property holds across calls.
try std.testing.expectEqual(@intFromPtr(geo.us.ptr), @intFromPtr(geoFor("US").ptr));
try std.testing.expectEqual(@intFromPtr(geo.developed.ptr), @intFromPtr(geoFor("GB").ptr));
try std.testing.expectEqual(@intFromPtr(geo.emerging.ptr), @intFromPtr(geoFor("CN").ptr));
try std.testing.expectEqual(@intFromPtr(geo.unknown.ptr), @intFromPtr(geoFor(null).ptr));
}

View file

@ -13,36 +13,42 @@ pub const SectorWeight = struct {
weight: f64,
};
/// ETF profile and metadata.
/// ETF profile and metadata. Assembled from public EDGAR
/// (NPORT-P holdings + sectors + AUM) plus Wikidata
/// (inception_date + name fallback). The legacy AlphaVantage
/// fields (`expense_ratio`, `dividend_yield`,
/// `portfolio_turnover`, `leveraged`) remain on the type but
/// stay null in the current pipeline they'll fill in once a
/// prospectus parser lands.
pub const EtfProfile = struct {
symbol: []const u8,
/// Fund name (preferred from EDGAR series_name; fallback to
/// Wikidata name). Owned by the caller (via `deinit`).
name: ?[]const u8 = null,
asset_class: ?[]const u8 = null,
/// Expense ratio as a decimal (e.g., 0.0003 for 0.03%)
/// Expense ratio as a decimal (e.g., 0.0003 for 0.03%).
/// Currently unset needs a prospectus parser.
expense_ratio: ?f64 = null,
/// Net assets in USD
/// Net assets in USD (from NPORT-P).
net_assets: ?f64 = null,
/// Morningstar-style category (e.g., "Large Blend")
category: ?[]const u8 = null,
/// Investment focus description
description: ?[]const u8 = null,
/// Top holdings
/// Top holdings (from NPORT-P).
holdings: ?[]const Holding = null,
/// Number of total holdings in the fund
/// Number of top holdings retained from NPORT-P.
total_holdings: ?u32 = null,
/// Sector allocations
/// Sector allocations (from NPORT-P).
sectors: ?[]const SectorWeight = null,
/// Dividend yield as decimal (e.g., 0.0111 for 1.11%)
/// Dividend yield as decimal. Currently unset.
dividend_yield: ?f64 = null,
/// Portfolio turnover as decimal
/// Portfolio turnover as decimal. Currently unset.
portfolio_turnover: ?f64 = null,
/// Fund inception date
/// Fund inception date (from Wikidata).
inception_date: ?Date = null,
/// Whether the fund is leveraged
/// Whether the fund is leveraged. Currently always false
/// pending prospectus parsing.
leveraged: bool = false,
/// Returns true if the profile contains meaningful ETF data.
/// Non-ETF symbols return empty profiles from Alpha Vantage.
/// Non-ETF symbols return empty profiles.
pub fn isEtf(self: EtfProfile) bool {
return self.expense_ratio != null or
self.net_assets != null or
@ -53,15 +59,14 @@ pub const EtfProfile = struct {
/// Free any owned fields on this profile.
///
/// Matches the inline cleanup previously inlined in
/// `src/commands/etf.zig`. Only `holdings` and `sectors` are
/// freed here the top-level optional strings (`name`,
/// `asset_class`, `category`, `description`) are borrowed from
/// the cache store's shared buffer in the provider-fetched path
/// and don't need freeing. If that changes (e.g., a provider
/// starts allocating each field separately), extend this
/// function accordingly.
/// Frees: `symbol`, `name`, holdings (each holding's strings +
/// the slice), sectors (each sector's name + the slice).
/// Other optional strings (`asset_class`) currently stay null
/// in the EDGAR-backed pipeline; if a future code path
/// allocates them, extend this function.
pub fn deinit(self: EtfProfile, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
if (self.name) |n| allocator.free(n);
if (self.holdings) |h| {
for (h) |holding| {
if (holding.symbol) |s| allocator.free(s);

View file

@ -246,9 +246,30 @@ pub const Client = struct {
};
const ms_uri_parse = stageElapsedMs(&t_stage, self.io);
// If the caller supplied a `User-Agent` in extra_headers,
// route it to `headers.user_agent.override` so it REPLACES
// Zig's default "zig/0.x.y (std.http)" UA rather than
// sitting alongside it. Some servers (notably SEC EDGAR)
// reject requests where a default-library UA is present
// even when a descriptive UA is also provided. Same logic
// applies to other "default-then-override" stdlib headers
// (Host, Accept-Encoding, Connection, Content-Type) but
// User-Agent is the only one the EDGAR/Wikidata politeness
// contract cares about today.
var std_headers: std.http.Client.Request.Headers = .{};
var filtered: std.ArrayList(std.http.Header) = .empty;
defer filtered.deinit(self.allocator);
for (extra_headers) |h| {
if (std.ascii.eqlIgnoreCase(h.name, "user-agent"))
std_headers.user_agent = .{ .override = h.value }
else
filtered.append(self.allocator, h) catch return error.OutOfMemory;
}
var req = self.http_client.request(method, uri, .{
.redirect_behavior = @enumFromInt(3),
.extra_headers = extra_headers,
.headers = std_headers,
.extra_headers = filtered.items,
}) catch |err| {
// The connect stage covers DNS lookup, TCP connect, and
// TLS handshake. Logging at warn level (rather than debug)
@ -365,6 +386,16 @@ pub const Client = struct {
switch (response.status) {
.ok => return response,
else => {
// Surface the rejection body many providers
// ship actionable diagnostic text in non-2xx
// bodies (Akamai/SEC's "Request Rate Threshold
// Exceeded" page, Polygon's "free tier exceeded
// 5 calls/min" hints, Wikidata's SPARQL syntax
// errors, etc.). Without this, the caller only
// sees the mapped HttpError variant
// (`Unauthorized`, `RateLimited`, ...) and has no
// path back to the upstream's reason.
log.warn("http rejection body status={d} body={s}", .{ @intFromEnum(response.status), response.body });
response.allocator.free(response.body);
if (response.etag) |e| response.allocator.free(e);
return switch (response.status) {

File diff suppressed because it is too large Load diff

View file

@ -29,7 +29,7 @@
//! primary read API.
//! P-number Property identifier in Wikidata (P249 = ticker symbol,
//! P414 = stock exchange, P31 = instance of, ...).
//! Q-number Entity identifier in Wikidata (Q40244 = ETF as a
//! Q-number Entity identifier in Wikidata (Q845477 = ETF as a
//! concept, Q13677 = NYSE the entity, Q312 = Apple Inc.
//! the entity).
//! wdt:Pxxx Truthy/direct property statement the simple shape.
@ -50,74 +50,35 @@
const std = @import("std");
const http = @import("../net/http.zig");
const fmt = @import("../format.zig");
const classification = @import("../models/classification.zig");
// `ClassificationRecord`, `geo`, and `geoFor` are domain-level
// types (any classification source could populate them), so they
// live in `models/classification.zig`. Re-export here so existing
// internal references compile unchanged.
pub const ClassificationRecord = classification.ClassificationRecord;
pub const geo = classification.geo;
pub const geoFor = classification.geoFor;
const sparql_endpoint = "https://query.wikidata.org/sparql";
/// Per-symbol classification record produced by parsing a Wikidata
/// SPARQL response. Fields are nullable when Wikidata has no value
/// for that property; the `source` field always emits per the
/// project's source-pure invariant.
pub const ClassificationRecord = struct {
symbol: []const u8, // owned
name: ?[]const u8 = null, // owned
sector: ?[]const u8 = null, // owned
industry: ?[]const u8 = null, // owned
/// ISO-3166 alpha-2 country code (e.g. "US", "GB", "DE").
country: ?[]const u8 = null, // owned
asset_class: ?[]const u8 = null, // owned
is_etf: bool = false,
/// YYYY-MM-DD; trimmed from Wikidata's ISO-8601 date.
inception_date: ?[]const u8 = null, // owned
/// Wikidata's P5531 the SEC CIK as a digit string. Wikidata
/// already zero-pads to 10 digits, matching the project-wide
/// CIK normalization convention.
cik: ?[]const u8 = null, // owned
/// YYYY-MM-DD when this provider ran, NOT when Wikidata last
/// updated the underlying entity.
as_of: []const u8, // owned
source: []const u8, // no default provenance always emitted
pub fn deinit(self: ClassificationRecord, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
if (self.name) |s| allocator.free(s);
if (self.sector) |s| allocator.free(s);
if (self.industry) |s| allocator.free(s);
if (self.country) |s| allocator.free(s);
if (self.asset_class) |s| allocator.free(s);
if (self.inception_date) |s| allocator.free(s);
if (self.cik) |s| allocator.free(s);
allocator.free(self.as_of);
allocator.free(self.source);
}
/// Free a slice of records, calling deinit on each element first.
pub fn freeSlice(allocator: std.mem.Allocator, recs: []const ClassificationRecord) void {
for (recs) |r| r.deinit(allocator);
allocator.free(recs);
}
};
/// Geo-bucket constants used by the country geo lookup. Kept as
/// named constants (rather than inline string literals in the map)
/// so callers can reference them without typo risk and the
/// taxonomy is tweakable in one place.
pub const geo = struct {
pub const us = "US";
pub const developed = "International Developed";
pub const emerging = "Emerging Markets";
pub const unknown = "Unknown";
};
/// Wikidata Q-IDs we test against `instance of` (P31) to classify
/// fund-shaped securities. Curated, not exhaustive.
/// Wikidata Q-IDs for fund-shaped securities. Used to set
/// `is_etf` and `asset_class` based on the `instance of` (P31)
/// statement on the security entity.
///
/// These were verified by querying Wikidata's `rdfs:label` for
/// each Q-ID (the previous list had stale/incorrect IDs that
/// matched unrelated entities like "marathon" and silently
/// disabled the is_etf detection for every ETF in the corpus).
const etf_q_ids = [_][]const u8{
"Q40244", // exchange-traded fund
"Q4118901", // exchange-traded bond fund
"Q104638128", // ETF tracking specific index
"Q845477", // exchange-traded fund
"Q1383049", // exchange-traded note
};
const mutual_fund_q_ids = [_][]const u8{
"Q1752230", // mutual fund
"Q11644608", // open-end fund
"Q791974", // mutual fund
"Q55598711", // mutual fund (alternate / class-of)
};
/// US stock exchanges accepted by the SPARQL exchange filter.
@ -136,81 +97,6 @@ const us_exchanges = [_][]const u8{
"wd:Q1666011",
};
/// Country-code-to-geo-bucket lookup. Wikidata returns ISO-3166
/// alpha-2 codes via P17 P297; we map them to the geo taxonomy
/// (`geo.us` / `geo.developed` / `geo.emerging` / `geo.unknown`).
///
/// MSCI conventions used as the developed/emerging split. Taiwan
/// and South Korea are MSCI-emerging despite FTSE classifying them
/// developed. Israel is MSCI-developed (upgraded 2010). Canada is
/// folded into International Developed (some users prefer separate
/// Canada bucket; override in `metadata.srf` if so).
const country_to_geo = std.StaticStringMap([]const u8).initComptime(.{
// United States
.{ "US", geo.us },
// Alpha-3 fallback for entries that use the longer form.
.{ "USA", geo.us },
// International Developed Europe ex-CIS
.{ "GB", geo.developed },
.{ "DE", geo.developed },
.{ "FR", geo.developed },
.{ "NL", geo.developed },
.{ "CH", geo.developed },
.{ "SE", geo.developed },
.{ "DK", geo.developed },
.{ "NO", geo.developed },
.{ "FI", geo.developed },
.{ "IT", geo.developed },
.{ "ES", geo.developed },
.{ "BE", geo.developed },
.{ "AT", geo.developed },
.{ "IE", geo.developed },
.{ "LU", geo.developed },
.{ "PT", geo.developed },
.{ "GR", geo.developed },
.{ "IS", geo.developed },
// International Developed Asia-Pacific + Israel + Canada
.{ "JP", geo.developed },
.{ "AU", geo.developed },
.{ "NZ", geo.developed },
.{ "SG", geo.developed },
.{ "HK", geo.developed },
.{ "IL", geo.developed },
.{ "CA", geo.developed },
// Emerging Markets (MSCI)
.{ "CN", geo.emerging },
.{ "TW", geo.emerging },
.{ "KR", geo.emerging },
.{ "IN", geo.emerging },
.{ "BR", geo.emerging },
.{ "MX", geo.emerging },
.{ "RU", geo.emerging },
.{ "TR", geo.emerging },
.{ "ZA", geo.emerging },
.{ "TH", geo.emerging },
.{ "MY", geo.emerging },
.{ "ID", geo.emerging },
.{ "PH", geo.emerging },
.{ "VN", geo.emerging },
.{ "AR", geo.emerging },
.{ "CL", geo.emerging },
.{ "CO", geo.emerging },
.{ "PE", geo.emerging },
.{ "EG", geo.emerging },
});
/// Map an ISO-3166 alpha-2 country code to one of the geo buckets.
/// Null/empty input or an unknown code returns `geo.unknown` so the
/// user can override in `metadata.srf`.
pub fn geoFor(iso2: ?[]const u8) []const u8 {
const code = iso2 orelse return geo.unknown;
if (code.len == 0) return geo.unknown;
return country_to_geo.get(code) orelse geo.unknown;
}
// Wikidata provider state (file-as-struct)
//
// Callers do `const wikidata = @import("providers/Wikidata.zig");`
@ -336,6 +222,122 @@ fn buildQuery(allocator: std.mem.Allocator, symbols: []const []const u8) ![]u8 {
return aw.toOwnedSlice();
}
/// Parse the SPARQL JSON response into `ClassificationRecord` values.
/// Canonical sector taxonomy (GICS-aligned 11-sector model).
/// Wikidata's `wdt:P452` (industry) values are noisy, often
/// returning multiple long-tail sub-industries per company in
/// arbitrary SPARQL order. `canonicalizeSector` maps each raw
/// industry label to one of these buckets so the user gets a
/// stable sector choice rather than whichever sub-industry
/// SPARQL surfaced first.
pub const sector = struct {
pub const technology = "Technology";
pub const communication_services = "Communication Services";
pub const consumer_cyclical = "Consumer Cyclical";
pub const consumer_defensive = "Consumer Defensive";
pub const healthcare = "Healthcare";
pub const financial_services = "Financial Services";
pub const energy = "Energy";
pub const industrials = "Industrials";
pub const basic_materials = "Basic Materials";
pub const real_estate = "Real Estate";
pub const utilities = "Utilities";
};
/// Map a Wikidata `wdt:P452` industry label (lowercase or mixed
/// case) to one of the canonical sectors. Returns null if no
/// keyword matches the caller falls back to whatever pre-canonical
/// industry string was last seen.
///
/// Priority is encoded by ordering: the function returns the FIRST
/// matching sector, so more-specific keywords appear first within
/// each sector. Cross-sector priority order (Tech, Comms, Consumer
/// Cyclical, ...) doesn't matter because the caller calls this
/// once per industry label and picks among results separately.
fn canonicalizeSector(industry: []const u8) ?[]const u8 {
// Lowercase via ascii because Wikidata mixes title case
// ("Semiconductor Industry") with lowercase ("software
// development"). We compare against lowercase keywords.
var buf: [128]u8 = undefined;
if (industry.len > buf.len) return null;
const lc = std.ascii.lowerString(buf[0..industry.len], industry);
// Technology most specific first. Keywords cover both
// "tech-as-the-product" (semiconductors, software, hardware,
// computing) and "tech-as-the-platform" (web hosting, cloud
// computing, internet services, SaaS, data centers). Amazon's
// Wikidata `industry` triple is "web hosting service" without
// explicit coverage, the canonicalizer would miss it and fall
// through to Consumer Cyclical via "online retail" / "e-commerce"
// (which are also valid for AMZN, just not the more useful answer
// for portfolio-level sector breakdown).
if (containsAny(lc, &.{
"semiconductor",
"software",
"computer hardware",
"consumer electronics",
"internet company",
"internet service",
"technology industry",
"computing",
"cloud",
"web hosting",
"saas",
"software as a service",
"data center",
"information technology",
})) return sector.technology;
// Communication Services telecom, media, internet services
// (distinct from "internet company" which is more
// tech-platform-shaped).
if (containsAny(lc, &.{ "telecom", "broadcast", "media industry", "publishing", "advertising", "social network", "video game" })) return sector.communication_services;
// Healthcare.
if (containsAny(lc, &.{ "pharmaceutical", "biotech", "medical", "healthcare", "health care", "health insurance", "drug" })) return sector.healthcare;
// Financial Services.
if (containsAny(lc, &.{ "bank", "insurance", "asset management", "financial services", "financial industry", "investment", "brokerage", "credit card" })) return sector.financial_services;
// Energy.
if (containsAny(lc, &.{ "oil and gas", "petroleum", "natural gas", "renewable energy", "solar power", "wind power", "energy industry", "coal" })) return sector.energy;
// Real Estate / REITs.
if (containsAny(lc, &.{ "real estate", "reit", "property" })) return sector.real_estate;
// Utilities.
if (containsAny(lc, &.{ "electric utility", "water utility", "gas utility", "utilities", "power generation" })) return sector.utilities;
// Basic Materials.
if (containsAny(lc, &.{ "chemical industry", "mining", "metals", "steel", "basic materials", "forestry", "paper industry" })) return sector.basic_materials;
// Consumer Cyclical / Discretionary apparel, retail,
// automotive, hospitality.
if (containsAny(lc, &.{ "retail", "clothing", "apparel", "automotive", "automobile", "hospitality", "restaurant", "luxury", "consumer cyclical", "consumer discretionary", "leisure", "e-commerce" })) return sector.consumer_cyclical;
// Consumer Defensive / Staples food, beverage, tobacco,
// household products.
if (containsAny(lc, &.{ "food industry", "beverage", "tobacco", "household products", "consumer staples", "consumer defensive", "grocery", "personal care" })) return sector.consumer_defensive;
// Industrials generic last so "industrial sector" doesn't
// trump more-specific buckets like Consumer Cyclical's
// "automotive". (NKE has both "industrial sector" and
// "clothing industry" listed; we want Consumer Cyclical.)
if (containsAny(lc, &.{ "aerospace", "defense industry", "construction", "machinery", "transportation", "logistics", "shipping", "airline", "railway", "industrial sector", "industrials" })) return sector.industrials;
return null;
}
/// Returns true if `haystack` contains any of `needles` as a
/// substring (case-sensitive caller lowercases first if
/// needed).
fn containsAny(haystack: []const u8, needles: []const []const u8) bool {
for (needles) |needle| {
if (std.mem.indexOf(u8, haystack, needle) != null) return true;
}
return false;
}
/// Parse the SPARQL JSON response into `ClassificationRecord` values.
/// Multiple bindings for the same ticker (e.g. multiple `instance of`
/// values) get merged into one record first-non-null wins.
@ -408,10 +410,36 @@ fn parse(
rec.name = try allocator.dupe(u8, label);
}
}
if (rec.industry == null) {
if (sparqlValue(obj, "industryLabel")) |ind| {
if (sparqlValue(obj, "industryLabel")) |ind| {
// Always remember the first industry verbatim (debug
// / display only).
if (rec.industry == null) {
rec.industry = try allocator.dupe(u8, ind);
rec.sector = try allocator.dupe(u8, ind);
}
// For sector, prefer a canonical mapping. Multiple
// bindings can fire for the same security (Wikidata
// returns one row per industry value), so we keep
// overwriting until we find a canonical match. Once
// we have a canonical sector, we don't downgrade to
// a non-canonical one.
const sector_is_canonical = blk: {
if (rec.sector) |current| {
inline for (@typeInfo(sector).@"struct".decls) |d| {
if (std.mem.eql(u8, current, @field(sector, d.name))) break :blk true;
}
}
break :blk false;
};
if (!sector_is_canonical) {
if (canonicalizeSector(ind)) |canon| {
if (rec.sector) |old| allocator.free(old);
rec.sector = try allocator.dupe(u8, canon);
} else if (rec.sector == null) {
// No canonical match yet; keep the raw
// label as a fallback so downstream display
// has something rather than null.
rec.sector = try allocator.dupe(u8, ind);
}
}
}
if (rec.country == null) {
@ -433,7 +461,7 @@ fn parse(
}
if (sparqlValue(obj, "instance")) |inst_iri| {
// The "instance" value is a Q-ID URI like
// "http://www.wikidata.org/entity/Q40244". Extract the
// "http://www.wikidata.org/entity/Q845477". Extract the
// Q-ID suffix and test against our known sets.
const last_slash = std.mem.lastIndexOfScalar(u8, inst_iri, '/');
const q_id = if (last_slash) |i| inst_iri[i + 1 ..] else inst_iri;
@ -441,16 +469,15 @@ fn parse(
if (std.mem.eql(u8, q_id, target)) {
rec.is_etf = true;
if (rec.asset_class == null) {
rec.asset_class = try allocator.dupe(u8, "ETF (uncategorized)");
rec.asset_class = try allocator.dupe(u8, "ETF");
}
break;
}
}
for (mutual_fund_q_ids) |target| {
if (std.mem.eql(u8, q_id, target)) {
rec.is_etf = true;
if (rec.asset_class == null) {
rec.asset_class = try allocator.dupe(u8, "Mutual Fund (uncategorized)");
rec.asset_class = try allocator.dupe(u8, "Mutual Fund");
}
break;
}
@ -544,8 +571,11 @@ test "parse: AAPL fixture round-trips name + industry + country" {
try std.testing.expectEqual(@as(usize, 1), recs.len);
try std.testing.expectEqualStrings("AAPL", recs[0].symbol);
try std.testing.expectEqualStrings("Apple Inc.", recs[0].name.?);
// Industry is preserved verbatim from Wikidata (debug /
// display only); sector is canonicalized via the keyword
// taxonomy.
try std.testing.expectEqualStrings("consumer electronics", recs[0].industry.?);
try std.testing.expectEqualStrings("consumer electronics", recs[0].sector.?);
try std.testing.expectEqualStrings("Technology", recs[0].sector.?);
try std.testing.expectEqualStrings("US", recs[0].country.?);
try std.testing.expect(!recs[0].is_etf);
}
@ -560,7 +590,7 @@ test "parse: ETF fixture sets is_etf=true and asset_class" {
\\ "ticker": {"type": "literal", "value": "VTI"},
\\ "security": {"type": "uri", "value": "http://www.wikidata.org/entity/Q1809462"},
\\ "securityLabel": {"type": "literal", "value": "Vanguard Total Stock Market ETF"},
\\ "instance": {"type": "uri", "value": "http://www.wikidata.org/entity/Q40244"}
\\ "instance": {"type": "uri", "value": "http://www.wikidata.org/entity/Q845477"}
\\ }
\\ ]
\\ }
@ -580,7 +610,7 @@ test "parse: ETF fixture sets is_etf=true and asset_class" {
try std.testing.expectEqual(@as(usize, 1), recs.len);
try std.testing.expect(recs[0].is_etf);
try std.testing.expectEqualStrings("ETF (uncategorized)", recs[0].asset_class.?);
try std.testing.expectEqualStrings("ETF", recs[0].asset_class.?);
}
test "parse: bindings for symbols not requested are dropped" {
@ -605,21 +635,286 @@ test "parse: bindings for symbols not requested are dropped" {
try std.testing.expectEqual(@as(usize, 0), recs.len);
}
test "geoFor maps known ISO-3166 codes to bucket" {
try std.testing.expectEqualStrings(geo.us, geoFor("US"));
try std.testing.expectEqualStrings(geo.us, geoFor("USA"));
try std.testing.expectEqualStrings(geo.developed, geoFor("GB"));
try std.testing.expectEqualStrings(geo.developed, geoFor("DE"));
try std.testing.expectEqualStrings(geo.developed, geoFor("CA"));
try std.testing.expectEqualStrings(geo.developed, geoFor("IL"));
try std.testing.expectEqualStrings(geo.emerging, geoFor("CN"));
try std.testing.expectEqualStrings(geo.emerging, geoFor("TW"));
try std.testing.expectEqualStrings(geo.emerging, geoFor("KR"));
test "parse: multiple industry bindings canonicalize to most-specific sector (NKE shape)" {
// NKE has three industry values in Wikidata: "industrial
// sector", "retail", "clothing industry". Two of those
// canonicalize to Consumer Cyclical and one to Industrials.
// The parser should pick a canonical sector once it sees
// one and not downgrade. Order in this fixture matches what
// SPARQL returned for NKE during enrich testing.
const fixture =
\\{
\\ "head": {"vars": ["ticker", "security", "securityLabel", "industryLabel", "countryCode"]},
\\ "results": {
\\ "bindings": [
\\ {"ticker": {"type": "literal", "value": "NKE"},
\\ "security": {"type": "uri", "value": "http://example/Q14790"},
\\ "securityLabel": {"type": "literal", "value": "Nike"},
\\ "industryLabel": {"type": "literal", "value": "industrial sector"},
\\ "countryCode": {"type": "literal", "value": "US"}},
\\ {"ticker": {"type": "literal", "value": "NKE"},
\\ "security": {"type": "uri", "value": "http://example/Q14790"},
\\ "securityLabel": {"type": "literal", "value": "Nike"},
\\ "industryLabel": {"type": "literal", "value": "retail"},
\\ "countryCode": {"type": "literal", "value": "US"}},
\\ {"ticker": {"type": "literal", "value": "NKE"},
\\ "security": {"type": "uri", "value": "http://example/Q14790"},
\\ "securityLabel": {"type": "literal", "value": "Nike"},
\\ "industryLabel": {"type": "literal", "value": "clothing industry"},
\\ "countryCode": {"type": "literal", "value": "US"}}
\\ ]
\\ }
\\}
;
const allocator = std.testing.allocator;
const expected = [_][]const u8{"NKE"};
const recs = try parse(std.testing.io, allocator, fixture, &expected);
defer {
for (recs) |*r| {
var m = r.*;
m.deinit(allocator);
}
allocator.free(recs);
}
try std.testing.expectEqual(@as(usize, 1), recs.len);
// Sector: first binding ("industrial sector") sets
// Industrials. Second binding ("retail") canonicalizes to
// Consumer Cyclical and (per current logic) overrides
// because "industrial sector" was the LAST keyword fallback.
// Once a canonical sector is set, subsequent canonical
// matches don't downgrade (Consumer Cyclical stays put for
// "clothing industry").
//
// The expected outcome is Consumer Cyclical OR Industrials
// depending on binding order but the user-visible
// answer should always be a canonical sector, NOT a raw
// Wikidata label like "industrial sector". This test
// asserts the canonical-only invariant.
const s = recs[0].sector.?;
try std.testing.expect(
std.mem.eql(u8, s, sector.industrials) or
std.mem.eql(u8, s, sector.consumer_cyclical),
);
// Industry is the FIRST raw label (preserves the original
// Wikidata data for debug/display).
try std.testing.expectEqualStrings("industrial sector", recs[0].industry.?);
}
test "geoFor returns Unknown for null/empty/unmapped" {
try std.testing.expectEqualStrings(geo.unknown, geoFor(null));
try std.testing.expectEqualStrings(geo.unknown, geoFor(""));
try std.testing.expectEqualStrings(geo.unknown, geoFor("ZZ")); // unassigned ISO-2
try std.testing.expectEqualStrings(geo.unknown, geoFor("XX"));
test "parse: multiple industry bindings — canonical match overrides earlier raw-label fallback" {
// Order: a non-canonical industry first ("xyz industry") so
// the parser falls back to raw label, then a canonical
// match ("software industry"). The canonical match should
// override the raw label.
const fixture =
\\{
\\ "head": {"vars": ["ticker", "security", "securityLabel", "industryLabel", "countryCode"]},
\\ "results": {
\\ "bindings": [
\\ {"ticker": {"type": "literal", "value": "TEST"},
\\ "security": {"type": "uri", "value": "http://example/Q1"},
\\ "securityLabel": {"type": "literal", "value": "Test Co"},
\\ "industryLabel": {"type": "literal", "value": "xyz industry"},
\\ "countryCode": {"type": "literal", "value": "US"}},
\\ {"ticker": {"type": "literal", "value": "TEST"},
\\ "security": {"type": "uri", "value": "http://example/Q1"},
\\ "securityLabel": {"type": "literal", "value": "Test Co"},
\\ "industryLabel": {"type": "literal", "value": "software industry"},
\\ "countryCode": {"type": "literal", "value": "US"}}
\\ ]
\\ }
\\}
;
const allocator = std.testing.allocator;
const expected = [_][]const u8{"TEST"};
const recs = try parse(std.testing.io, allocator, fixture, &expected);
defer {
for (recs) |*r| {
var m = r.*;
m.deinit(allocator);
}
allocator.free(recs);
}
try std.testing.expectEqual(@as(usize, 1), recs.len);
try std.testing.expectEqualStrings(sector.technology, recs[0].sector.?);
// First raw label preserved as `industry`.
try std.testing.expectEqualStrings("xyz industry", recs[0].industry.?);
}
test "parse: canonical match never downgrades to non-canonical" {
// First binding: "software industry" Technology
// (canonical). Second binding: "xyz industry" no canonical
// match. Sector should STAY Technology, not downgrade to
// "xyz industry".
const fixture =
\\{
\\ "head": {"vars": ["ticker", "security", "securityLabel", "industryLabel", "countryCode"]},
\\ "results": {
\\ "bindings": [
\\ {"ticker": {"type": "literal", "value": "TEST"},
\\ "security": {"type": "uri", "value": "http://example/Q1"},
\\ "securityLabel": {"type": "literal", "value": "Test Co"},
\\ "industryLabel": {"type": "literal", "value": "software industry"},
\\ "countryCode": {"type": "literal", "value": "US"}},
\\ {"ticker": {"type": "literal", "value": "TEST"},
\\ "security": {"type": "uri", "value": "http://example/Q1"},
\\ "securityLabel": {"type": "literal", "value": "Test Co"},
\\ "industryLabel": {"type": "literal", "value": "xyz industry"},
\\ "countryCode": {"type": "literal", "value": "US"}}
\\ ]
\\ }
\\}
;
const allocator = std.testing.allocator;
const expected = [_][]const u8{"TEST"};
const recs = try parse(std.testing.io, allocator, fixture, &expected);
defer {
for (recs) |*r| {
var m = r.*;
m.deinit(allocator);
}
allocator.free(recs);
}
try std.testing.expectEqual(@as(usize, 1), recs.len);
try std.testing.expectEqualStrings(sector.technology, recs[0].sector.?);
}
// canonicalizeSector
test "canonicalizeSector: technology keywords map to Technology" {
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("semiconductor industry").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("software development").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("software industry").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("Technology Industry").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("computing").?);
}
test "canonicalizeSector: tech-platform keywords (cloud / web hosting / SaaS) map to Technology" {
// Regression check for AMZN: Wikidata returns
// "web hosting service" as Amazon's first industry triple.
// Pre-fix, that fell through to Consumer Cyclical via
// "online retail" / "e-commerce". With the expanded
// keyword list, web hosting Technology directly.
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("web hosting service").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("cloud computing").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("cloud services").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("internet service provider").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("internet services").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("SaaS").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("software as a service").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("data center").?);
try std.testing.expectEqualStrings(sector.technology, canonicalizeSector("information technology").?);
}
test "canonicalizeSector: e-commerce still maps to Consumer Cyclical (priority order matters)" {
// Regression check that the Technology keyword expansion
// didn't accidentally swallow Consumer Cyclical hits.
// E-commerce / online retail / retail still hit the Consumer
// Cyclical branch because none of them contain Technology
// keywords.
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("e-commerce").?);
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("online retail").?);
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("retail").?);
}
test "canonicalizeSector: communication services" {
try std.testing.expectEqualStrings(sector.communication_services, canonicalizeSector("telecom").?);
try std.testing.expectEqualStrings(sector.communication_services, canonicalizeSector("media industry").?);
try std.testing.expectEqualStrings(sector.communication_services, canonicalizeSector("video game industry").?);
try std.testing.expectEqualStrings(sector.communication_services, canonicalizeSector("publishing").?);
}
test "canonicalizeSector: healthcare" {
try std.testing.expectEqualStrings(sector.healthcare, canonicalizeSector("pharmaceutical industry").?);
try std.testing.expectEqualStrings(sector.healthcare, canonicalizeSector("biotech").?);
try std.testing.expectEqualStrings(sector.healthcare, canonicalizeSector("medical device").?);
try std.testing.expectEqualStrings(sector.healthcare, canonicalizeSector("healthcare industry").?);
}
test "canonicalizeSector: financial services" {
try std.testing.expectEqualStrings(sector.financial_services, canonicalizeSector("bank").?);
try std.testing.expectEqualStrings(sector.financial_services, canonicalizeSector("insurance company").?);
try std.testing.expectEqualStrings(sector.financial_services, canonicalizeSector("asset management").?);
try std.testing.expectEqualStrings(sector.financial_services, canonicalizeSector("financial services").?);
}
test "canonicalizeSector: energy" {
try std.testing.expectEqualStrings(sector.energy, canonicalizeSector("oil and gas industry").?);
try std.testing.expectEqualStrings(sector.energy, canonicalizeSector("petroleum industry").?);
try std.testing.expectEqualStrings(sector.energy, canonicalizeSector("renewable energy").?);
try std.testing.expectEqualStrings(sector.energy, canonicalizeSector("solar power").?);
}
test "canonicalizeSector: real estate" {
try std.testing.expectEqualStrings(sector.real_estate, canonicalizeSector("real estate").?);
try std.testing.expectEqualStrings(sector.real_estate, canonicalizeSector("REIT").?);
try std.testing.expectEqualStrings(sector.real_estate, canonicalizeSector("commercial real estate").?);
}
test "canonicalizeSector: utilities" {
try std.testing.expectEqualStrings(sector.utilities, canonicalizeSector("electric utility").?);
try std.testing.expectEqualStrings(sector.utilities, canonicalizeSector("water utility").?);
try std.testing.expectEqualStrings(sector.utilities, canonicalizeSector("power generation").?);
}
test "canonicalizeSector: basic materials" {
try std.testing.expectEqualStrings(sector.basic_materials, canonicalizeSector("chemical industry").?);
try std.testing.expectEqualStrings(sector.basic_materials, canonicalizeSector("mining").?);
try std.testing.expectEqualStrings(sector.basic_materials, canonicalizeSector("steel industry").?);
}
test "canonicalizeSector: consumer cyclical (NKE / AMZN keywords)" {
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("retail").?);
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("clothing industry").?);
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("automotive industry").?);
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("e-commerce").?);
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("hospitality").?);
}
test "canonicalizeSector: consumer defensive" {
try std.testing.expectEqualStrings(sector.consumer_defensive, canonicalizeSector("food industry").?);
try std.testing.expectEqualStrings(sector.consumer_defensive, canonicalizeSector("beverage industry").?);
try std.testing.expectEqualStrings(sector.consumer_defensive, canonicalizeSector("tobacco").?);
try std.testing.expectEqualStrings(sector.consumer_defensive, canonicalizeSector("household products").?);
}
test "canonicalizeSector: industrials (last-fallback for industrial sector)" {
try std.testing.expectEqualStrings(sector.industrials, canonicalizeSector("aerospace").?);
try std.testing.expectEqualStrings(sector.industrials, canonicalizeSector("transportation").?);
try std.testing.expectEqualStrings(sector.industrials, canonicalizeSector("airline").?);
try std.testing.expectEqualStrings(sector.industrials, canonicalizeSector("industrial sector").?);
}
test "canonicalizeSector: NKE 'industrial sector' is overridden by 'clothing industry' in parser" {
// The parser walks each binding and calls canonicalizeSector
// per industry label. NKE's bindings include "industrial
// sector" (Industrials) AND "clothing industry"
// (Consumer Cyclical). Whichever is processed last wins
// as long as the previous one wasn't canonical-and-better.
// Here we just verify the keywords map as expected the
// parser's first-canonical-wins logic is verified separately.
try std.testing.expectEqualStrings(sector.consumer_cyclical, canonicalizeSector("clothing industry").?);
try std.testing.expectEqualStrings(sector.industrials, canonicalizeSector("industrial sector").?);
}
test "canonicalizeSector: returns null for unknown / non-industry strings" {
try std.testing.expect(canonicalizeSector("International Standard Industrial Classification") == null);
try std.testing.expect(canonicalizeSector("Unknown") == null);
try std.testing.expect(canonicalizeSector("") == null);
try std.testing.expect(canonicalizeSector("xyzzy") == null);
}
test "canonicalizeSector: input longer than 128 bytes returns null (no false matches)" {
// The internal lowercasing buffer is 128 bytes; oversized
// industry labels return null rather than match against a
// truncated buffer. Real Wikidata labels are always well
// under this; the bound is defensive.
var huge: [200]u8 = undefined;
@memset(&huge, 'a');
try std.testing.expect(canonicalizeSector(&huge) == null);
}

View file

@ -1,405 +0,0 @@
//! Alpha Vantage API provider -- used for ETF profiles (free endpoint).
//! API docs: https://www.alphavantage.co/documentation/
//!
//! Free tier: 25 requests/day. Only used for data other providers don't have.
//!
//! ETF Profile endpoint: GET /query?function=ETF_PROFILE&symbol=X&apikey=KEY
//! Returns net assets, expense ratio, sector weights, top holdings, etc.
const std = @import("std");
const http = @import("../net/http.zig");
const RateLimiter = @import("../net/RateLimiter.zig");
const Date = @import("../Date.zig");
const EtfProfile = @import("../models/etf_profile.zig").EtfProfile;
const Holding = @import("../models/etf_profile.zig").Holding;
const SectorWeight = @import("../models/etf_profile.zig").SectorWeight;
const json_utils = @import("json_utils.zig");
const jsonStr = json_utils.jsonStr;
const base_url = "https://www.alphavantage.co/query";
/// Company overview data from Alpha Vantage OVERVIEW endpoint.
pub const CompanyOverview = struct {
symbol: []const u8,
name: ?[]const u8 = null,
sector: ?[]const u8 = null,
industry: ?[]const u8 = null,
country: ?[]const u8 = null,
market_cap: ?[]const u8 = null,
asset_type: ?[]const u8 = null,
};
// -- Tests --
test "parseEtfProfileResponse basic" {
const body =
\\{
\\ "net_assets": "323000000000",
\\ "net_expense_ratio": "0.03",
\\ "portfolio_turnover": "4.00",
\\ "dividend_yield": "1.25",
\\ "inception_date": "2010-09-09",
\\ "leveraged": "NO",
\\ "sectors": [
\\ {"sector": "Technology", "weight": "31.50"},
\\ {"sector": "Healthcare", "weight": "12.80"}
\\ ],
\\ "holdings": [
\\ {"symbol": "AAPL", "description": "Apple Inc", "weight": "7.10"},
\\ {"symbol": "MSFT", "description": "Microsoft Corp", "weight": "6.50"}
\\ ]
\\}
;
const allocator = std.testing.allocator;
const profile = try parseEtfProfileResponse(allocator, body, "VTI");
// Clean up allocated slices
defer {
if (profile.sectors) |sectors| {
for (sectors) |s| allocator.free(s.name);
allocator.free(sectors);
}
if (profile.holdings) |holdings| {
for (holdings) |h| {
if (h.symbol) |s| allocator.free(s);
allocator.free(h.name);
}
allocator.free(holdings);
}
}
try std.testing.expectEqualStrings("VTI", profile.symbol);
try std.testing.expectApproxEqAbs(@as(f64, 323000000000), profile.net_assets.?, 1.0);
try std.testing.expectApproxEqAbs(@as(f64, 0.03), profile.expense_ratio.?, 0.001);
try std.testing.expectApproxEqAbs(@as(f64, 4.0), profile.portfolio_turnover.?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 1.25), profile.dividend_yield.?, 0.01);
try std.testing.expect(profile.inception_date != null);
try std.testing.expect(!profile.leveraged);
try std.testing.expectEqual(@as(usize, 2), profile.sectors.?.len);
try std.testing.expectEqualStrings("Technology", profile.sectors.?[0].name);
try std.testing.expectApproxEqAbs(@as(f64, 31.50), profile.sectors.?[0].weight, 0.01);
try std.testing.expectEqual(@as(usize, 2), profile.holdings.?.len);
try std.testing.expectEqualStrings("AAPL", profile.holdings.?[0].symbol.?);
try std.testing.expectEqualStrings("Apple Inc", profile.holdings.?[0].name);
try std.testing.expectEqual(@as(u32, 2), profile.total_holdings.?);
}
test "parseEtfProfileResponse leveraged ETF" {
const body =
\\{
\\ "net_assets": "5000000000",
\\ "leveraged": "YES",
\\ "sectors": [],
\\ "holdings": []
\\}
;
const allocator = std.testing.allocator;
const profile = try parseEtfProfileResponse(allocator, body, "TQQQ");
defer {
if (profile.sectors) |s| allocator.free(s);
if (profile.holdings) |h| allocator.free(h);
}
try std.testing.expect(profile.leveraged);
}
test "parseEtfProfileResponse error response" {
const body =
\\{"Error Message": "Invalid API call"}
;
const allocator = std.testing.allocator;
const result = parseEtfProfileResponse(allocator, body, "BAD");
try std.testing.expectError(error.RequestFailed, result);
}
test "parseEtfProfileResponse rate limited" {
const body =
\\{"Note": "Thank you for using Alpha Vantage! Please visit..."}
;
const allocator = std.testing.allocator;
const result = parseEtfProfileResponse(allocator, body, "SPY");
try std.testing.expectError(error.RateLimited, result);
}
test "parseCompanyOverview basic" {
const body =
\\{
\\ "Symbol": "AAPL",
\\ "Name": "Apple Inc",
\\ "Sector": "Technology",
\\ "Industry": "Consumer Electronics",
\\ "Country": "USA",
\\ "MarketCapitalization": "2900000000000",
\\ "AssetType": "Common Stock"
\\}
;
const allocator = std.testing.allocator;
const overview = try parseCompanyOverview(allocator, body, "AAPL");
defer {
if (overview.name) |n| allocator.free(n);
if (overview.sector) |s| allocator.free(s);
if (overview.industry) |i| allocator.free(i);
if (overview.country) |c| allocator.free(c);
if (overview.market_cap) |m| allocator.free(m);
if (overview.asset_type) |a| allocator.free(a);
}
try std.testing.expectEqualStrings("AAPL", overview.symbol);
try std.testing.expectEqualStrings("Apple Inc", overview.name.?);
try std.testing.expectEqualStrings("Technology", overview.sector.?);
try std.testing.expectEqualStrings("Consumer Electronics", overview.industry.?);
try std.testing.expectEqualStrings("USA", overview.country.?);
try std.testing.expectEqualStrings("2900000000000", overview.market_cap.?);
try std.testing.expectEqualStrings("Common Stock", overview.asset_type.?);
}
test "parseCompanyOverview missing fields" {
const body =
\\{
\\ "Symbol": "XYZ"
\\}
;
const allocator = std.testing.allocator;
const overview = try parseCompanyOverview(allocator, body, "XYZ");
try std.testing.expect(overview.name == null);
try std.testing.expect(overview.sector == null);
try std.testing.expect(overview.industry == null);
}
test "parseCompanyOverview empty body returns NotFound" {
// AlphaVantage replies HTTP 200 with `{}` for symbols it
// doesn't recognize (no "Error Message" key, no anything).
// The parser must surface that as NotFound, not silently
// succeed with an all-null overview that downstream code
// would render as "Sector: Unknown, Geo: US, Asset class:
// US Large Cap" — wrong on every axis for a nonexistent
// ticker.
const body = "{}";
const allocator = std.testing.allocator;
try std.testing.expectError(error.NotFound, parseCompanyOverview(allocator, body, "ZZQQXX99"));
}
pub const AlphaVantage = struct {
api_key: []const u8,
client: http.Client,
rate_limiter: RateLimiter,
allocator: std.mem.Allocator,
pub fn init(io: std.Io, allocator: std.mem.Allocator, api_key: []const u8) AlphaVantage {
return .{
.api_key = api_key,
.client = http.Client.init(io, allocator),
.rate_limiter = RateLimiter.perDay(io, 25),
.allocator = allocator,
};
}
pub fn deinit(self: *AlphaVantage) void {
self.client.deinit();
}
/// Fetch company overview (sector, industry, country) for a stock symbol.
pub fn fetchCompanyOverview(
self: *AlphaVantage,
allocator: std.mem.Allocator,
symbol: []const u8,
) !CompanyOverview {
self.rate_limiter.acquire();
const url = try http.buildUrl(allocator, base_url, &.{
.{ "function", "OVERVIEW" },
.{ "symbol", symbol },
.{ "apikey", self.api_key },
});
defer allocator.free(url);
var response = try self.client.get(url);
defer response.deinit();
return parseCompanyOverview(allocator, response.body, symbol);
}
/// Fetch ETF profile data: expense ratio, holdings, sectors, etc.
pub fn fetchEtfProfile(
self: *AlphaVantage,
allocator: std.mem.Allocator,
symbol: []const u8,
) !EtfProfile {
self.rate_limiter.acquire();
const url = try http.buildUrl(allocator, base_url, &.{
.{ "function", "ETF_PROFILE" },
.{ "symbol", symbol },
.{ "apikey", self.api_key },
});
defer allocator.free(url);
var response = try self.client.get(url);
defer response.deinit();
return parseEtfProfileResponse(allocator, response.body, symbol);
}
};
// -- JSON parsing --
fn parseEtfProfileResponse(
allocator: std.mem.Allocator,
body: []const u8,
symbol: []const u8,
) !EtfProfile {
const parsed = std.json.parseFromSlice(std.json.Value, allocator, body, .{}) catch
return error.ParseError;
defer parsed.deinit();
const root = parsed.value.object;
// Alpha Vantage returns {"Error Message": "..."} or {"Note": "..."} on error/rate limit
if (root.get("Error Message")) |_| return error.RequestFailed;
if (root.get("Note")) |_| return error.RateLimited;
if (root.get("Information")) |_| return error.RateLimited;
var profile = EtfProfile{
.symbol = symbol,
};
if (root.get("net_assets")) |v| {
profile.net_assets = parseStrFloat(v);
}
if (root.get("net_expense_ratio")) |v| {
profile.expense_ratio = parseStrFloat(v);
}
if (root.get("portfolio_turnover")) |v| {
profile.portfolio_turnover = parseStrFloat(v);
}
if (root.get("dividend_yield")) |v| {
profile.dividend_yield = parseStrFloat(v);
}
if (root.get("inception_date")) |v| {
if (jsonStr(v)) |s| {
profile.inception_date = Date.parse(s) catch null;
}
}
if (root.get("leveraged")) |v| {
if (jsonStr(v)) |s| {
profile.leveraged = std.mem.eql(u8, s, "YES");
}
}
// Parse sectors
if (root.get("sectors")) |sectors_val| {
if (sectors_val == .array) {
var sectors: std.ArrayList(SectorWeight) = .empty;
errdefer sectors.deinit(allocator);
for (sectors_val.array.items) |item| {
const obj = switch (item) {
.object => |o| o,
else => continue,
};
const name = jsonStr(obj.get("sector")) orelse continue;
const weight = parseStrFloat(obj.get("weight") orelse continue) orelse continue;
const duped_name = try allocator.dupe(u8, name);
try sectors.append(allocator, .{
.name = duped_name,
.weight = weight,
});
}
profile.sectors = try sectors.toOwnedSlice(allocator);
}
}
// Parse top holdings (limit to top 20 to keep output manageable)
if (root.get("holdings")) |holdings_val| {
if (holdings_val == .array) {
const max_holdings: usize = 20;
var holdings: std.ArrayList(Holding) = .empty;
errdefer holdings.deinit(allocator);
const total: u32 = @intCast(holdings_val.array.items.len);
profile.total_holdings = total;
const limit = @min(holdings_val.array.items.len, max_holdings);
for (holdings_val.array.items[0..limit]) |item| {
const obj = switch (item) {
.object => |o| o,
else => continue,
};
const desc = jsonStr(obj.get("description")) orelse continue;
const weight = parseStrFloat(obj.get("weight") orelse continue) orelse continue;
const duped_sym = if (jsonStr(obj.get("symbol"))) |s|
(try allocator.dupe(u8, s))
else
null;
const duped_name = try allocator.dupe(u8, desc);
try holdings.append(allocator, .{
.symbol = duped_sym,
.name = duped_name,
.weight = weight,
});
}
profile.holdings = try holdings.toOwnedSlice(allocator);
}
}
return profile;
}
// -- Helpers --
fn parseStrFloat(val: ?std.json.Value) ?f64 {
const v = val orelse return null;
return switch (v) {
.string => |s| std.fmt.parseFloat(f64, s) catch null,
.float => |f| f,
.integer => |i| @as(f64, @floatFromInt(i)),
.null => null,
else => null,
};
}
fn parseCompanyOverview(
allocator: std.mem.Allocator,
body: []const u8,
symbol: []const u8,
) !CompanyOverview {
const parsed = std.json.parseFromSlice(std.json.Value, allocator, body, .{}) catch
return error.ParseError;
defer parsed.deinit();
const root = parsed.value.object;
if (root.get("Error Message")) |_| return error.RequestFailed;
if (root.get("Note")) |_| return error.RateLimited;
if (root.get("Information")) |_| return error.RateLimited;
// AlphaVantage returns an empty `{}` body (HTTP 200) for
// symbols it doesn't recognize. There's no `Error Message`
// key in this case just nothing. Detect by checking for the
// canonical "this is a real overview" key (`Symbol`); if
// absent, the response carries no useful data and we should
// surface that as NotFound.
if (root.get("Symbol") == null) return error.NotFound;
return .{
.symbol = symbol,
.name = if (jsonStr(root.get("Name"))) |s| allocator.dupe(u8, s) catch null else null,
.sector = if (jsonStr(root.get("Sector"))) |s| allocator.dupe(u8, s) catch null else null,
.industry = if (jsonStr(root.get("Industry"))) |s| allocator.dupe(u8, s) catch null else null,
.country = if (jsonStr(root.get("Country"))) |s| allocator.dupe(u8, s) catch null else null,
.market_cap = if (jsonStr(root.get("MarketCapitalization"))) |s| allocator.dupe(u8, s) catch null else null,
.asset_type = if (jsonStr(root.get("AssetType"))) |s| allocator.dupe(u8, s) catch null else null,
};
}

View file

@ -101,8 +101,5 @@ pub const DataError = @import("service.zig").DataError;
/// Drives the `--refresh-data` global flag.
pub const FetchOptions = @import("service.zig").FetchOptions;
/// Company overview data (sector, industry, country, market cap) from Alpha Vantage.
pub const CompanyOverview = @import("service.zig").CompanyOverview;
/// Result of a CUSIP-to-ticker lookup (ticker, name, security type).
pub const CusipResult = @import("service.zig").CusipResult;

View file

@ -19,6 +19,8 @@ const OptionsChain = @import("models/option.zig").OptionsChain;
const EarningsEvent = @import("models/earnings.zig").EarningsEvent;
const Quote = @import("models/quote.zig").Quote;
const EtfProfile = @import("models/etf_profile.zig").EtfProfile;
const Holding = @import("models/etf_profile.zig").Holding;
const SectorWeight = @import("models/etf_profile.zig").SectorWeight;
const Config = @import("Config.zig");
const cache = @import("cache/store.zig");
const srf = @import("srf");
@ -28,8 +30,6 @@ const TwelveData = @import("providers/twelvedata.zig").TwelveData;
const Polygon = @import("providers/polygon.zig").Polygon;
const Fmp = @import("providers/fmp.zig").Fmp;
const Cboe = @import("providers/cboe.zig").Cboe;
const AlphaVantage = @import("providers/alphavantage.zig").AlphaVantage;
const alphavantage = @import("providers/alphavantage.zig");
const OpenFigi = @import("providers/openfigi.zig");
const Yahoo = @import("providers/yahoo.zig").Yahoo;
const Tiingo = @import("providers/tiingo.zig").Tiingo;
@ -62,9 +62,10 @@ pub const DataError = error{
TransientError,
/// Provider auth failure (bad API key). Entire refresh should stop.
AuthError,
/// Provider returned a rate-limit response (e.g. AlphaVantage's
/// free-tier 5-calls/min or 25-calls/day). Caller should stop
/// the current batch and surface a "try again later" message;
/// Provider returned a rate-limit response (e.g. SEC EDGAR's
/// 10-req/sec ceiling, or a free-tier candle API's per-minute
/// cap). Caller should stop the current batch and surface a
/// "try again later" message;
/// retrying immediately will just hit the same limit.
RateLimited,
/// Provider responded but doesn't have data for the requested
@ -126,12 +127,80 @@ pub fn isPermanentProviderFailure(err: anyerror) bool {
return err == error.NotFound;
}
/// Re-exported provider types needed by commands via DataService.
pub const CompanyOverview = alphavantage.CompanyOverview;
/// Result of a CUSIP-to-ticker lookup (provider-agnostic).
pub const CusipResult = OpenFigi.FigiResult;
/// Result of an EDGAR ticker-map fallback lookup. Returned by
/// `DataService.lookupEdgarFallback` so commands consume a
/// digested shape instead of pulling in `TickerMap` /
/// `MutualFundTickerEntry` / `CompanyTickerEntry` (those are
/// provider-internal).
///
/// `enrich` uses this to decide what metadata.srf line to emit
/// when Wikidata had no match for a symbol.
pub const EdgarLookup = union(enum) {
/// Symbol matched the EDGAR mutual-fund / managed-fund map.
/// Generic "Fund" label (the `tickers_funds.srf` file mixes
/// mutual funds and series-of-trust ETFs; we can't tell
/// which without digging into submissions metadata).
managed_fund,
/// Symbol matched the EDGAR company / UIT map. `title` is
/// the entry's `title` (e.g. "SPDR S&P 500 ETF TRUST"),
/// allocated by the service's allocator caller frees with
/// `freeEdgarLookup` when done. The `is_etf` flag is set
/// when the title contains "ETF" or "TRUST" operating
/// companies usually have Wikidata coverage and wouldn't
/// reach this fallback, so a UIT-style hit is almost
/// certainly an ETF.
company_or_uit: struct { title: ?[]const u8, is_etf: bool },
/// Symbol not in either EDGAR map.
none,
};
/// Free any owned strings inside an `EdgarLookup`. Currently
/// only `.company_or_uit.title` is owned; `.managed_fund` and
/// `.none` are no-ops.
pub fn freeEdgarLookup(allocator: std.mem.Allocator, lookup: EdgarLookup) void {
switch (lookup) {
.company_or_uit => |c| if (c.title) |t| allocator.free(t),
.managed_fund, .none => {},
}
}
/// Look up `sym` in the supplied EDGAR ticker maps. Pure data
/// transform; no I/O. Returns the borrowing-shape result.
///
/// Both maps may be null (caller failed to load one or both).
/// A null map produces a `none` result for that pass.
///
/// On `.company_or_uit`, the returned `title` is duped from the
/// underlying entry using `allocator` so the caller can use it
/// after the maps are freed. Free with `freeEdgarLookup`.
fn lookupInTickerMaps(
allocator: std.mem.Allocator,
sym: []const u8,
mf_map: ?*const Edgar.TickerMap(Edgar.MutualFundTickerEntry),
co_map: ?*const Edgar.TickerMap(Edgar.CompanyTickerEntry),
) EdgarLookup {
if (mf_map) |m| {
if (m.get(sym)) |_| return .managed_fund;
}
if (co_map) |m| {
if (m.get(sym)) |entry| {
const title_owned: ?[]const u8 = if (entry.title) |t|
allocator.dupe(u8, t) catch null
else
null;
const title_for_check = title_owned orelse "";
const is_etf =
std.ascii.indexOfIgnoreCase(title_for_check, "ETF") != null or
std.ascii.indexOfIgnoreCase(title_for_check, "TRUST") != null;
return .{ .company_or_uit = .{ .title = title_owned, .is_etf = is_etf } };
}
}
return .none;
}
/// Indicates whether the returned data came from cache or was freshly fetched.
pub const Source = enum {
cached,
@ -239,7 +308,6 @@ pub const DataService = struct {
pg: ?Polygon = null,
fmp: ?Fmp = null,
cboe: ?Cboe = null,
av: ?AlphaVantage = null,
yh: ?Yahoo = null,
tg: ?Tiingo = null,
wikidata: ?Wikidata = null,
@ -279,9 +347,11 @@ pub const DataService = struct {
if (self.config.fmp_key == null) {
log.warn("FMP_API_KEY not set — earnings data unavailable", .{});
}
// ETF profiles
if (self.config.alphavantage_key == null) {
log.warn("ALPHAVANTAGE_API_KEY not set — ETF profiles unavailable", .{});
// ETF profiles + portfolio enrichment now go through public
// SEC EDGAR + Wikidata. Both require a contact email in
// outbound User-Agents (SEC's policy).
if (self.config.user_email == null) {
log.warn("ZFIN_USER_EMAIL not set — ETF profiles + enrichment unavailable", .{});
}
// Candle fallback
if (self.config.twelvedata_key == null and self.config.tiingo_key == null) {
@ -298,7 +368,6 @@ pub const DataService = struct {
if (self.pg) |*pg| pg.deinit();
if (self.fmp) |*fmp| fmp.deinit();
if (self.cboe) |*c| c.deinit();
if (self.av) |*av| av.deinit();
if (self.yh) |*yh| yh.deinit();
if (self.tg) |*tg| tg.deinit();
if (self.wikidata) |*w| w.deinit();
@ -321,7 +390,7 @@ pub const DataService = struct {
@field(self, field_name) = T.init(self.io, self.allocator, email);
} else {
// All we're doing here is lower casing the type name, then
// appending _key to it, so AlphaVantage -> alphavantage_key
// appending _key to it, so Tiingo -> tiingo_key
const config_key = comptime blk: {
const full = @typeName(T);
var start: usize = 0;
@ -897,46 +966,120 @@ pub const DataService = struct {
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
/// Fetch ETF profile for a symbol.
/// Checks cache first; fetches from Alpha Vantage if stale/missing.
/// Fetch ETF profile for a symbol. Assembles a unified
/// `EtfProfile` view from the EDGAR `etf_metrics` cache (profile
/// + sectors + holdings) plus the Wikidata `classification`
/// cache (inception_date, fund name fallback). Both underlying
/// caches are managed by `getEtfMetrics` / `getClassification`;
/// this function does not maintain its own cache.
///
/// `opts.skip_network = true` returns cached data even if stale,
/// returns FetchFailed on cache miss without touching the network.
/// `opts.force_refresh = true` treats cache as stale and fetches.
/// Several legacy fields that AlphaVantage used to populate
/// (`expense_ratio`, `dividend_yield`, `portfolio_turnover`,
/// `leveraged`) remain on `EtfProfile` but stay null here
/// EDGAR NPORT-P doesn't carry them. They'll fill in once a
/// prospectus parser lands.
///
/// `opts.skip_network = true` and `opts.force_refresh = true`
/// are forwarded to `getEtfMetrics`.
pub fn getEtfProfile(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(EtfProfile) {
var s = self.store();
// Primary source: EDGAR ETF metrics. If the symbol isn't a
// fund (or isn't in EDGAR), surface NotFound to the caller
// matches the old AlphaVantage behavior of returning empty
// profiles for non-ETFs.
const metrics = try self.getEtfMetrics(symbol, opts);
defer metrics.deinit();
if (!opts.force_refresh) {
if (s.read(EtfProfile, symbol, null, .fresh_only)) |cached|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
// Walk the EtfMetricRecord slice to extract profile + sectors
// + holdings. The slice shape is "one .profile, then N
// .sector, then M .holding" per `appendEtfMetricRecords`.
var name: ?[]const u8 = null;
errdefer if (name) |n| self.allocator.free(n);
var net_assets: ?f64 = null;
var sectors_buf: std.ArrayList(SectorWeight) = .empty;
errdefer {
for (sectors_buf.items) |s| self.allocator.free(s.name);
sectors_buf.deinit(self.allocator);
}
var holdings_buf: std.ArrayList(Holding) = .empty;
errdefer {
for (holdings_buf.items) |h| {
self.allocator.free(h.name);
if (h.symbol) |s| self.allocator.free(s);
}
holdings_buf.deinit(self.allocator);
}
if (opts.skip_network) {
if (s.read(EtfProfile, symbol, null, .any)) |cached| {
log.info("{s}: etf_profile stale-cached returned (skip_network)", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
self.assertNetworkAllowed("getEtfProfile av.fetchEtfProfile");
var av = try self.getProvider(AlphaVantage);
const fetched = av.fetchEtfProfile(self.allocator, symbol) catch |err| blk: {
if (err == error.RateLimited) {
self.rateLimitBackoff();
break :blk av.fetchEtfProfile(self.allocator, symbol) catch {
return DataError.FetchFailed;
};
}
if (isPermanentProviderFailure(err)) {
s.writeNegative(symbol, .etf_profile);
}
return DataError.FetchFailed;
for (metrics.data) |rec| switch (rec) {
.profile => |p| {
if (p.series_name) |sn| name = try self.allocator.dupe(u8, sn);
net_assets = p.net_assets;
},
.sector => |s| {
try sectors_buf.append(self.allocator, .{
.name = try self.allocator.dupe(u8, s.description),
.weight = s.pct_of_portfolio / 100.0,
});
},
.holding => |h| {
const sym_dup: ?[]const u8 = if (h.ticker) |t|
try self.allocator.dupe(u8, t)
else
null;
try holdings_buf.append(self.allocator, .{
.symbol = sym_dup,
.name = try self.allocator.dupe(u8, h.name),
.weight = h.pct_of_portfolio / 100.0,
});
},
};
s.write(EtfProfile, symbol, fetched, .{ .seconds = cache.Ttl.etf_profile });
// Wikidata classification provides inception_date and a
// higher-quality name. Best-effort: if the fetch fails we
// still return the EDGAR-only profile.
var inception_date: ?Date = null;
if (self.getClassification(symbol, opts)) |classification| {
defer classification.deinit();
for (classification.data) |c| {
if (c.inception_date) |idate_str| {
if (Date.parse(idate_str)) |d| inception_date = d else |_| {}
}
// Prefer Wikidata's name if EDGAR didn't provide one.
if (name == null) {
if (c.name) |n| name = try self.allocator.dupe(u8, n);
}
}
} else |_| {}
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
const sectors_count = sectors_buf.items.len;
const holdings_count = holdings_buf.items.len;
const profile: EtfProfile = .{
.symbol = try self.allocator.dupe(u8, symbol),
.name = name,
.net_assets = net_assets,
.holdings = if (holdings_count > 0)
try holdings_buf.toOwnedSlice(self.allocator)
else
null,
.total_holdings = if (holdings_count > 0) @intCast(holdings_count) else null,
.sectors = if (sectors_count > 0)
try sectors_buf.toOwnedSlice(self.allocator)
else
null,
.inception_date = inception_date,
};
// Free the empty ArrayLists we didn't consume via toOwnedSlice
// (they own no allocations but the ArrayList struct itself
// needs deinit when not handed off).
if (holdings_count == 0) holdings_buf.deinit(self.allocator);
if (sectors_count == 0) sectors_buf.deinit(self.allocator);
return .{
.data = profile,
.source = metrics.source,
.timestamp = metrics.timestamp,
.allocator = self.allocator,
};
}
// Wikidata + EDGAR providers
@ -1196,14 +1339,24 @@ pub const DataService = struct {
if (!opts.force_refresh) {
if (s.read(Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: etf_metrics fresh in local cache", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
return .{
.data = cached.data,
.source = .cached,
.timestamp = cached.timestamp,
.allocator = self.allocator,
};
}
}
if (opts.skip_network) {
if (s.read(Edgar.EtfMetricRecord, symbol, null, .any)) |cached| {
log.info("{s}: etf_metrics stale-cached returned (skip_network)", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
return .{
.data = cached.data,
.source = .cached,
.timestamp = cached.timestamp,
.allocator = self.allocator,
};
}
return DataError.FetchFailed;
}
@ -1211,7 +1364,12 @@ pub const DataService = struct {
if (!opts.force_refresh and self.syncFromServer(symbol, .etf_metrics)) {
if (s.read(Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: etf_metrics synced from server", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
return .{
.data = cached.data,
.source = .cached,
.timestamp = cached.timestamp,
.allocator = self.allocator,
};
}
}
@ -1234,7 +1392,14 @@ pub const DataService = struct {
defer co_map.deinit();
var edgar = try self.getProvider(Edgar);
const result = edgar.fetchEtfMetrics(self.io, self.allocator, &mf_map, &co_map, symbol, 20) catch |err| {
const result = edgar.fetchEtfMetrics(
self.io,
self.allocator,
&mf_map,
&co_map,
symbol,
20,
) catch |err| {
log.warn("{s}: etf_metrics fetch failed: {s}", .{ symbol, @errorName(err) });
return DataError.FetchFailed;
};
@ -1284,19 +1449,26 @@ pub const DataService = struct {
}
}
/// Load and parse the EDGAR mutual-fund ticker map, going
/// through the `Store`-backed cache. Caller deinits the result.
fn loadMutualFundTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap {
/// Load the EDGAR mutual-fund ticker map. Reads `[]MutualFundTickerEntry`
/// from cache when fresh; otherwise fetches via the provider
/// and writes the parsed slice to cache. The returned
/// `TickerMap` takes ownership of the entries; caller frees via
/// a single `mf_map.deinit()`.
///
/// Heavy: ~28k entries. Cheap on cache hit (fast SRF read);
/// expensive on miss (one HTTP round-trip + JSON parse).
/// Exposed publicly so commands like `enrich` can use the
/// ticker map as a fallback classifier when Wikidata returns
/// no rows for a symbol.
pub fn loadMutualFundTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.MutualFundTickerEntry) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(Edgar.MutualFundTickerMapBlob, "_edgar", null, .fresh_only)) |cached| {
defer self.allocator.free(cached.data);
if (s.read(Edgar.MutualFundTickerEntry, "_edgar", null, .fresh_only)) |cached| {
if (cached.data.len > 0) {
const blob = cached.data[0];
defer self.allocator.free(blob.json);
return Edgar.parseTickerMap(self.allocator, blob.json);
return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, cached.data);
}
Edgar.MutualFundTickerEntry.freeSlice(self.allocator, cached.data);
}
}
@ -1304,35 +1476,27 @@ pub const DataService = struct {
self.assertNetworkAllowed("loadMutualFundTickerMap edgar.fetchMutualFundTickerMap");
var edgar = try self.getProvider(Edgar);
// Fetch the raw JSON via a separate call so we can write
// the blob to cache; the parsed map gets returned to the
// caller.
var resp = try edgar.client.request(.GET, "https://www.sec.gov/files/company_tickers_mf.json", null, &.{
.{ .name = "User-Agent", .value = "zfin/0.1" },
.{ .name = "From", .value = self.config.user_email orelse "" },
});
defer resp.deinit();
const json = try self.allocator.dupe(u8, resp.body);
var blob = [_]Edgar.MutualFundTickerMapBlob{.{ .json = json }};
s.write(Edgar.MutualFundTickerMapBlob, "_edgar", blob[0..], .{ .seconds = cache.Ttl.tickers_funds, .jitter_pct = 8 });
defer self.allocator.free(json);
return Edgar.parseTickerMap(self.allocator, json);
// Fetch + parse via the provider (correct UA + From + Accept
// + rate-limit token), cache the parsed slice, then build
// the lookup map (which takes ownership of the slice).
const entries = try edgar.fetchMutualFundTickerMap(self.allocator);
s.write(Edgar.MutualFundTickerEntry, "_edgar", entries, .{ .seconds = cache.Ttl.tickers_funds, .jitter_pct = 8 });
return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, entries);
}
/// Load and parse the EDGAR company ticker map (stocks + UITs).
fn loadCompanyTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap {
/// Load the EDGAR company ticker map (stocks + UITs). Same shape
/// as `loadMutualFundTickerMap` for the `CompanyTickerEntry`
/// type. See that function's doc-comment for cost / use-case
/// guidance.
pub fn loadCompanyTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.CompanyTickerEntry) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(Edgar.CompanyTickerMapBlob, "_edgar", null, .fresh_only)) |cached| {
defer self.allocator.free(cached.data);
if (s.read(Edgar.CompanyTickerEntry, "_edgar", null, .fresh_only)) |cached| {
if (cached.data.len > 0) {
const blob = cached.data[0];
defer self.allocator.free(blob.json);
return Edgar.parseStockTickerMap(self.allocator, blob.json);
return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, cached.data);
}
Edgar.CompanyTickerEntry.freeSlice(self.allocator, cached.data);
}
}
@ -1340,18 +1504,41 @@ pub const DataService = struct {
self.assertNetworkAllowed("loadCompanyTickerMap edgar.fetchCompanyTickerMap");
var edgar = try self.getProvider(Edgar);
var resp = try edgar.client.request(.GET, "https://www.sec.gov/files/company_tickers.json", null, &.{
.{ .name = "User-Agent", .value = "zfin/0.1" },
.{ .name = "From", .value = self.config.user_email orelse "" },
});
defer resp.deinit();
const entries = try edgar.fetchCompanyTickerMap(self.allocator);
s.write(Edgar.CompanyTickerEntry, "_edgar", entries, .{ .seconds = cache.Ttl.tickers_companies, .jitter_pct = 8 });
return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, entries);
}
const json = try self.allocator.dupe(u8, resp.body);
var blob = [_]Edgar.CompanyTickerMapBlob{.{ .json = json }};
s.write(Edgar.CompanyTickerMapBlob, "_edgar", blob[0..], .{ .seconds = cache.Ttl.tickers_companies, .jitter_pct = 8 });
defer self.allocator.free(json);
/// Look up a symbol in the EDGAR ticker maps. Used by the
/// `enrich` command as a fallback classifier when Wikidata
/// returns no rows for the symbol. Loads both maps (cache or
/// network), runs the lookup, frees the maps, returns the
/// digested `EdgarLookup` union.
///
/// Commands consume the union directly they never see
/// `TickerMap` / `MutualFundTickerEntry` / `CompanyTickerEntry`
/// shapes. Provider details stay inside the service layer.
///
/// Caller owns the `title` string when the result is
/// `.company_or_uit{ .title = non-null }`. Free with the
/// allocator passed to this method (typically the same one
/// the service was initialized with).
pub fn lookupEdgarFallback(
self: *DataService,
sym: []const u8,
opts: FetchOptions,
) EdgarLookup {
var mf_opt: ?Edgar.TickerMap(Edgar.MutualFundTickerEntry) = self.loadMutualFundTickerMap(opts) catch null;
defer if (mf_opt) |*m| m.deinit();
var co_opt: ?Edgar.TickerMap(Edgar.CompanyTickerEntry) = self.loadCompanyTickerMap(opts) catch null;
defer if (co_opt) |*m| m.deinit();
return Edgar.parseStockTickerMap(self.allocator, json);
return lookupInTickerMaps(
self.allocator,
sym,
if (mf_opt) |*m| m else null,
if (co_opt) |*m| m else null,
);
}
//
@ -1386,49 +1573,6 @@ pub const DataService = struct {
return DataError.FetchFailed;
}
/// Fetch company overview (sector, industry, country, market cap) from Alpha Vantage.
/// No cache -- always fetches fresh. Caller must free the returned string fields.
///
/// Maps the provider's specific error to a `DataError` variant so
/// callers (notably `enrich`) can distinguish "AlphaVantage
/// doesn't have this symbol" from "rate-limited" from "auth
/// failed" from generic transport errors. Logs the upstream
/// error name on every failure so the stderr log carries the
/// detail even when the typed return value is collapsed.
pub fn getCompanyOverview(self: *DataService, symbol: []const u8) DataError!CompanyOverview {
var av = try self.getProvider(AlphaVantage);
return av.fetchCompanyOverview(self.allocator, symbol) catch |err| {
log.warn("{s}: getCompanyOverview failed: {s}", .{ symbol, @errorName(err) });
return mapAlphaVantageError(err);
};
}
/// Translate an AlphaVantage provider error into the broader
/// `DataError` set. Keeps the rate-limit / not-found / auth
/// distinctions visible to callers so user-facing CLI messages
/// can be specific instead of generic "FetchFailed".
fn mapAlphaVantageError(err: anyerror) DataError {
return switch (err) {
error.RateLimited => DataError.RateLimited,
error.Unauthorized => DataError.AuthError,
error.NotFound => DataError.NotFound,
// The AlphaVantage parser throws `RequestFailed` when
// the response body contains an `"Error Message"` key,
// which AV sends for unknown / malformed symbols. The
// HTTP layer also uses `RequestFailed` as a last-resort
// transport collapse rare in practice. Treat both as
// NotFound; the user-facing semantic ("AlphaVantage
// doesn't recognize this symbol") is what's wanted in
// the common case, and the log line above carries the
// raw error name for the rare transport-failure case.
error.RequestFailed => DataError.NotFound,
error.ServerError => DataError.TransientError,
error.OutOfMemory => DataError.OutOfMemory,
error.ParseError => DataError.ParseError,
else => DataError.FetchFailed,
};
}
/// Compute trailing returns for a symbol (fetches candles + dividends).
/// Returns both as-of-date and month-end trailing returns.
/// As-of-date: end = latest close. Matches Morningstar "Trailing Returns" page.
@ -2942,3 +3086,159 @@ test "DataService getProvider returns NoApiKey for Wikidata without user_email"
const ed_result = svc.getProvider(Edgar);
try std.testing.expectError(DataError.NoApiKey, ed_result);
}
// lookupInTickerMaps
//
// Pure function no I/O. Consumed by `lookupEdgarFallback`,
// which loads the maps then calls this. Tests construct
// synthetic ticker-map data directly to exercise every branch
// without touching the cache or network.
fn testNewMfEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8) !Edgar.MutualFundTickerEntry {
return .{
.symbol = try allocator.dupe(u8, symbol),
.cik = try allocator.dupe(u8, cik),
};
}
fn testNewCoEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8, title: ?[]const u8) !Edgar.CompanyTickerEntry {
return .{
.symbol = try allocator.dupe(u8, symbol),
.cik = try allocator.dupe(u8, cik),
.title = if (title) |t| try allocator.dupe(u8, t) else null,
};
}
test "lookupInTickerMaps: both maps null -> .none" {
const allocator = std.testing.allocator;
const result = lookupInTickerMaps(allocator, "ANY", null, null);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .none);
}
test "lookupInTickerMaps: symbol in MF map -> .managed_fund" {
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322");
var map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "FAGIX", &map, null);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .managed_fund);
}
test "lookupInTickerMaps: symbol in company map with TRUST title -> ETF hint" {
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "SPY", "0000884394", "SPDR S&P 500 ETF TRUST");
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "SPY", null, &map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .company_or_uit);
try std.testing.expect(result.company_or_uit.is_etf);
try std.testing.expectEqualStrings("SPDR S&P 500 ETF TRUST", result.company_or_uit.title.?);
}
test "lookupInTickerMaps: company map with operating-company title -> not ETF" {
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "AAPL", "0000320193", "Apple Inc.");
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "AAPL", null, &map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .company_or_uit);
try std.testing.expect(!result.company_or_uit.is_etf);
}
test "lookupInTickerMaps: not in either map -> .none" {
const allocator = std.testing.allocator;
const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
mf_entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322");
var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries);
defer mf_map.deinit();
const result = lookupInTickerMaps(allocator, "MISSING", &mf_map, null);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .none);
}
test "lookupInTickerMaps: MF map takes precedence over company map" {
// If a symbol appears in both (rare but possible class
// shares of an open-end fund vs the fund's parent company),
// we prefer the MF answer. Lock in the contract.
const allocator = std.testing.allocator;
const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
mf_entries[0] = try testNewMfEntry(allocator, "DUP", "0000000001");
const co_entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
co_entries[0] = try testNewCoEntry(allocator, "DUP", "0000000002", "DUP TRUST");
var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries);
defer mf_map.deinit();
var co_map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, co_entries);
defer co_map.deinit();
const result = lookupInTickerMaps(allocator, "DUP", &mf_map, &co_map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .managed_fund);
}
test "lookupInTickerMaps: company map with null title -> .company_or_uit, no ETF" {
// Defensive: if EDGAR's company file has a row with no
// title, we still return the lookup but can't infer ETF
// status from a missing string.
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "BARE", "0000000001", null);
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "BARE", null, &map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .company_or_uit);
try std.testing.expect(!result.company_or_uit.is_etf);
try std.testing.expect(result.company_or_uit.title == null);
}
test "lookupInTickerMaps: returned title is owned (survives map deinit)" {
// Critical for the service.lookupEdgarFallback contract:
// the maps get freed before the EdgarLookup is returned to
// the caller. The title must survive that.
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "VTI", "0000884394", "VANGUARD TOTAL STOCK MARKET ETF");
const result = blk: {
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
break :blk lookupInTickerMaps(allocator, "VTI", null, &map);
};
defer freeEdgarLookup(allocator, result);
// Map is gone. Title must still be readable.
try std.testing.expect(result == .company_or_uit);
try std.testing.expectEqualStrings("VANGUARD TOTAL STOCK MARKET ETF", result.company_or_uit.title.?);
try std.testing.expect(result.company_or_uit.is_etf);
}
test "freeEdgarLookup: handles all three union variants without leak" {
const allocator = std.testing.allocator;
// .managed_fund no-op
freeEdgarLookup(allocator, .managed_fund);
// .none no-op
freeEdgarLookup(allocator, .none);
// .company_or_uit with null title no-op
freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = null, .is_etf = false } });
// .company_or_uit with non-null title frees the title.
const owned = try allocator.dupe(u8, "Some Title");
freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = owned, .is_etf = true } });
// testing.allocator panics on leak passing this test means
// the title was freed.
}

View file

@ -162,9 +162,10 @@ fn loadDataFinish(state: *State, app: *App, pf: zfin.Portfolio, summary: zfin.va
// Rendering
pub fn buildStyledLines(state: *State, app: *App, arena: std.mem.Allocator) ![]const StyledLine {
// Compute equity/fixed split from classification + portfolio
// Compute equity/fixed-income/cash split from classification + portfolio
var stock_pct: f64 = 0;
var bond_pct: f64 = 0;
var cash_pct: f64 = 0;
var total_value: f64 = 0;
if (app.portfolio.summary) |summary| {
total_value = summary.total_value;
@ -180,9 +181,10 @@ pub fn buildStyledLines(state: *State, app: *App, arena: std.mem.Allocator) ![]c
);
stock_pct = split.stock_pct;
bond_pct = split.bond_pct;
cash_pct = split.cash_pct;
}
}
return renderAnalysisLines(arena, app.theme, state.result, stock_pct, bond_pct, total_value);
return renderAnalysisLines(arena, app.theme, state.result, stock_pct, bond_pct, cash_pct, total_value);
}
/// Render analysis tab content. Pure function no App dependency.
@ -192,6 +194,7 @@ pub fn renderAnalysisLines(
analysis_result: ?zfin.analysis.AnalysisResult,
stock_pct: f64,
bond_pct: f64,
cash_pct: f64,
total_value: f64,
) ![]const StyledLine {
var lines: std.ArrayList(StyledLine) = .empty;
@ -206,14 +209,19 @@ pub fn renderAnalysisLines(
return lines.toOwnedSlice(arena);
};
// Equities vs Fixed Income summary
if (stock_pct > 0 or bond_pct > 0) {
// Equities / Fixed Income / Cash header summary. The Other
// bucket (derivatives, real property) is excluded from this
// summary but appears as its own row in the Asset Category
// breakdown.
if (stock_pct > 0 or bond_pct > 0 or cash_pct > 0) {
try lines.append(arena, .{
.text = try std.fmt.allocPrint(arena, " Equities {d:.1}% ({f}) / Fixed Income {d:.1}% ({f})", .{
.text = try std.fmt.allocPrint(arena, " Equities {d:.1}% ({f}) / Fixed Income {d:.1}% ({f}) / Cash {d:.1}% ({f})", .{
stock_pct * 100,
Money.from(stock_pct * total_value),
bond_pct * 100,
Money.from(bond_pct * total_value),
cash_pct * 100,
Money.from(cash_pct * total_value),
}),
.style = th.mutedStyle(),
});
@ -223,18 +231,15 @@ pub fn renderAnalysisLines(
const bar_width: usize = 30;
const label_width: usize = 24;
const sections = [_]struct { items: []const zfin.analysis.BreakdownItem, title: []const u8 }{
.{ .items = result.asset_class, .title = " Asset Class" },
.{ .items = result.sector, .title = " Sector (Equities)" },
.{ .items = result.geo, .title = " Geographic" },
.{ .items = result.account, .title = " By Account" },
.{ .items = result.tax_type, .title = " By Tax Type" },
};
const sections = zfin.analysis.breakdownSections(&result);
for (sections, 0..) |sec, si| {
if (si > 0 and sec.items.len == 0) continue;
if (si > 0) try lines.append(arena, .{ .text = "", .style = th.contentStyle() });
try lines.append(arena, .{ .text = sec.title, .style = th.headerStyle() });
// Indent the title (renderer-level, not baked into the
// section's title string).
const title_text = try std.fmt.allocPrint(arena, " {s}", .{sec.title});
try lines.append(arena, .{ .text = title_text, .style = th.headerStyle() });
try lines.append(arena, .{ .text = "", .style = th.contentStyle() });
for (sec.items) |item| {
const text = try fmtBreakdownLine(arena, item, bar_width, label_width);
@ -333,6 +338,7 @@ test "renderAnalysisLines with data" {
.{ .label = "Int'l Stock", .weight = 0.40, .value = 80000 },
};
const result = zfin.analysis.AnalysisResult{
.asset_category = &.{},
.asset_class = &asset_class,
.sector = &.{},
.geo = &.{},
@ -341,15 +347,18 @@ test "renderAnalysisLines with data" {
.unclassified = &.{},
.total_value = 200000,
};
const lines = try renderAnalysisLines(arena, th, result, 0.80, 0.20, 200000);
const lines = try renderAnalysisLines(arena, th, result, 0.80, 0.15, 0.05, 200000);
// Should have header section + asset class items
try testing.expect(lines.len >= 5);
// Find "Portfolio Analysis" header
var found_header = false;
var found_cash_in_summary = false;
for (lines) |l| {
if (std.mem.indexOf(u8, l.text, "Portfolio Analysis") != null) found_header = true;
if (std.mem.indexOf(u8, l.text, "Cash 5.0%") != null) found_cash_in_summary = true;
}
try testing.expect(found_header);
try testing.expect(found_cash_in_summary);
// Find asset class data
var found_us = false;
for (lines) |l| {
@ -364,7 +373,7 @@ test "renderAnalysisLines no data" {
const arena = arena_state.allocator();
const th = theme.default_theme;
const lines = try renderAnalysisLines(arena, th, null, 0, 0, 0);
const lines = try renderAnalysisLines(arena, th, null, 0, 0, 0, 0);
try testing.expectEqual(@as(usize, 5), lines.len);
try testing.expect(std.mem.indexOf(u8, lines[3].text, "No analysis data") != null);
}