zfin/src/commands/enrich.zig
Emil Lerch b796a46699
All checks were successful
Generic zig build / build (push) Successful in 4m20s
Generic zig build / deploy (push) Successful in 17s
Generic zig build / publish-macos (push) Successful in 41s
move Wikidata -> edgar logic into getClassification
2026-06-01 16:11:09 -07:00

1412 lines
59 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const std = @import("std");
const zfin = @import("../root.zig");
const cli = @import("common.zig");
const framework = @import("framework.zig");
const isCusipLike = @import("../models/portfolio.zig").isCusipLike;
const ClassificationRecord = zfin.classification.ClassificationRecord;
pub const ParsedArgs = struct {
/// Optional symbol (e.g. "AAPL"). Null = portfolio mode (uses
/// `-p` resolution to find the user's portfolio file(s)).
symbol: ?[]const u8,
};
pub const meta: framework.Meta = .{
.name = "enrich",
.group = .hygiene,
.synopsis = "Bootstrap metadata.srf from Wikidata + EDGAR",
.help =
\\Usage: zfin enrich [SYMBOL]
\\
\\Bootstrap a `metadata.srf` classification file from public
\\Wikidata + SEC EDGAR data. Two modes:
\\
\\ - Portfolio mode (no argument): enrich every stock symbol
\\ in your portfolio. Honors the global `-p`/`--portfolio`
\\ flag for selecting which portfolio file(s) to use; with
\\ no flag, falls back to the standard portfolio resolution
\\ (portfolio.srf in cwd, or $ZFIN_HOME/portfolio.srf).
\\ Output is a complete SRF file written to stdout —
\\ redirect into metadata.srf and edit by hand for accuracy.
\\ - Symbol mode (single SYMBOL argument): enrich one symbol
\\ and emit one appendable SRF line. Useful for adding to
\\ an existing metadata.srf without rerunning the whole file.
\\
\\Sources used:
\\ - Wikidata SPARQL: sector / industry / country / asset class
\\ + CIK lookup for the EDGAR call below.
\\ - SEC EDGAR XBRL company facts: shares outstanding, used
\\ with the latest cached close price to derive market-cap
\\ size buckets (Large/Mid/Small) for US-domiciled stocks.
\\ - SEC EDGAR mutual-fund ticker map: fallback when Wikidata
\\ has no entry. Open-end mutual funds aren't exchange-listed
\\ and are usually missing from Wikidata; this fills in
\\ `geo::US,asset_class::Fund` (or `ETF` for company-map
\\ UIT entries with title hints).
\\
\\Always review the output before saving as `metadata.srf`.
\\Wikidata + EDGAR are free and have no per-day quota; the SEC
\\caps EDGAR at 10 req/sec which the rate limiter respects.
\\Requires ZFIN_USER_EMAIL in `.env` (SEC requires a contact in
\\the User-Agent header).
\\
\\Examples:
\\ zfin enrich > metadata.srf # whole portfolio (default file)
\\ zfin -p sample enrich > metadata.srf # whole portfolio (named file)
\\ zfin enrich AAPL >> metadata.srf # single symbol append
\\ zfin enrich fagix >> metadata.srf # symbol auto-uppercased
\\
,
.uppercase_first_arg = true,
.user_errors = error{UnexpectedArg},
};
pub fn parseArgs(ctx: *framework.RunCtx, cmd_args: []const []const u8) !ParsedArgs {
if (cmd_args.len > 1) {
cli.stderrPrint(ctx.io, "Error: 'enrich' takes at most one argument (a symbol). For portfolio-mode, omit the argument and use the global -p flag if needed.\n");
return error.UnexpectedArg;
}
return .{ .symbol = if (cmd_args.len == 1) cmd_args[0] else null };
}
const DerivedMeta = struct {
/// Best-effort sector text. May be the literal "Unknown" when
/// Wikidata has no sector statement.
sector: []const u8,
/// Geo bucket: one of "US", "International Developed",
/// "Emerging Markets", or "Unknown".
geo: []const u8,
/// Asset class: "ETF", "Mutual Fund", or one of the size-shaped
/// US-stock buckets ("US Large Cap" / "US Mid Cap" / "US Small
/// Cap"). For non-US stocks where size data is unavailable, we
/// fall back to the geo bucket itself.
asset_class: []const u8,
};
/// Compose a `DerivedMeta` from the per-symbol Wikidata
/// `ClassificationRecord` plus an optional `market_cap` estimate
/// (shares-outstanding × latest close, in dollars). Pure data
/// transform; no I/O.
///
/// `sector_buf` is a caller-provided buffer for the title-cased
/// sector string. Wikidata returns sectors in mixed case; we
/// normalize for display.
fn deriveMetadata(
classification: ClassificationRecord,
market_cap: ?f64,
sector_buf: []u8,
) DerivedMeta {
const geo_str = zfin.classification.geoFor(classification.country);
// Sector: title-case Wikidata's sector string when present.
// For ETFs, override with `TODO` — funds are multi-sector by
// definition, so the user fills in their own breakdown.
// When Wikidata returned no sector at all (e.g. SOXX got an
// entity hit but no industry/country/instance fields), emit
// `TODO` rather than the literal "Unknown" placeholder so
// the user knows to fill in by hand rather than thinking
// "Unknown" is a valid taxonomy value.
const sector_str = blk: {
if (classification.is_etf) break :blk "TODO";
if (classification.sector) |s| {
break :blk cli.fmt.toTitleCase(sector_buf, s);
}
break :blk "TODO";
};
const asset_class_str = blk: {
if (classification.is_etf) break :blk "ETF";
if (classification.asset_class) |ac| {
if (std.mem.eql(u8, ac, "Mutual Fund")) break :blk "Mutual Fund";
}
// Stock size bucket. Only computed for US-domiciled stocks
// where we have a market-cap estimate. International stocks
// fall through to the geo bucket.
if (std.mem.eql(u8, geo_str, zfin.classification.geo.us)) {
if (market_cap) |mc| {
if (mc >= 10_000_000_000) break :blk "US Large Cap";
if (mc >= 2_000_000_000) break :blk "US Mid Cap";
break :blk "US Small Cap";
}
// Default for US stocks without market-cap data —
// matches the old AlphaVantage flow's default.
break :blk "US Large Cap";
}
// Non-US fallback: use the geo bucket as the asset class.
// The user can refine in metadata.srf.
break :blk geo_str;
};
return .{ .sector = sector_str, .geo = geo_str, .asset_class = asset_class_str };
}
/// CLI `enrich` command: bootstrap a metadata.srf file from Wikidata + EDGAR data.
/// Two dispatch paths:
/// - Portfolio mode (no argument): load the user's portfolio
/// via the standard `cli.loadPortfolio` flow (which honors
/// `-p`/`--portfolio` patterns), then enrich every stock
/// symbol.
/// - Symbol mode (one argument): enrich a single symbol. The
/// framework uppercases the argument before we see it, so
/// `enrich fagix` and `enrich FAGIX` produce identical
/// output.
pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
const svc = ctx.svc orelse return error.MissingDataService;
if (parsed.symbol) |sym| {
try enrichSymbol(ctx.io, ctx.allocator, svc, sym, ctx.out);
return;
}
try enrichPortfolio(ctx, svc);
}
/// Whether the caller should continue with the next symbol or
/// abort the whole batch after a fetch failure. Hard-stop errors
/// (no API key, auth rejected, rate-limited) will recur on every
/// subsequent symbol; soft-skip errors are per-symbol and other
/// symbols may still succeed.
const FetchErrorAction = enum { hard_stop, soft_skip };
/// Print a user-facing stderr message describing why the fetch
/// for `sym` failed, and tell the caller whether to continue
/// (`soft_skip`) or stop the whole batch (`hard_stop`).
///
/// This is the single dispatch point for translating a
/// `DataError` into actionable user output. Per AGENTS.md "Errors
/// carry information": the message names the specific error
/// variant — never just "fetch failed" — so the user can act on
/// it without reading source code.
fn reportFetchError(io: std.Io, sym: []const u8, err: anyerror) FetchErrorAction {
var msg_buf: [256]u8 = undefined;
switch (err) {
zfin.DataError.NoApiKey => {
cli.stderrPrint(io, "Error: ZFIN_USER_EMAIL not set. Add it to .env (SEC EDGAR requires a contact email in the User-Agent header).\n");
return .hard_stop;
},
zfin.DataError.AuthError => {
cli.stderrPrint(io, "Error: SEC EDGAR rejected the request. Check ZFIN_USER_EMAIL in .env\n");
return .hard_stop;
},
zfin.DataError.RateLimited => {
const msg = std.fmt.bufPrint(
&msg_buf,
"Error: rate-limited on {s}. Wikidata/EDGAR have generous limits; check for upstream throttling.\n",
.{sym},
) catch "Error: rate-limited. Try again later.\n";
cli.stderrPrint(io, msg);
return .hard_stop;
},
zfin.DataError.NotFound => {
const msg = std.fmt.bufPrint(
&msg_buf,
" {s}: not in Wikidata; mark sector/geo/asset_class manually\n",
.{sym},
) catch " not in Wikidata; mark manually\n";
cli.stderrPrint(io, msg);
return .soft_skip;
},
zfin.DataError.TransientError => {
const msg = std.fmt.bufPrint(
&msg_buf,
" {s}: transient upstream failure; will need re-run\n",
.{sym},
) catch " transient upstream failure; will need re-run\n";
cli.stderrPrint(io, msg);
return .soft_skip;
},
else => {
const msg = std.fmt.bufPrint(
&msg_buf,
" {s}: fetch failed ({t})\n",
.{ sym, err },
) catch " fetch failed\n";
cli.stderrPrint(io, msg);
return .soft_skip;
},
}
}
/// Best-effort market-cap estimate for a stock symbol: pull
/// shares-outstanding from EDGAR (XBRL company facts, keyed by
/// CIK) and multiply by the latest cached close price. Returns
/// null on any failure; the caller falls back to a default size
/// bucket. No network beyond what the underlying service methods
/// already do; failures are logged but never propagated.
fn estimateMarketCap(
svc: *zfin.DataService,
sym: []const u8,
cik: ?[]const u8,
opts: zfin.FetchOptions,
) ?f64 {
const cik_str = cik orelse return null;
const facts = svc.getEntityFacts(cik_str, opts) catch return null;
defer facts.deinit();
var shares: ?u64 = null;
for (facts.data) |fact| switch (fact) {
.shares_outstanding => |so| shares = so.shares_outstanding,
};
const shares_u = shares orelse return null;
const candles = svc.getCandles(sym, opts) catch return null;
defer candles.deinit();
if (candles.data.len == 0) return null;
const close = candles.data[candles.data.len - 1].close;
return @as(f64, @floatFromInt(shares_u)) * close;
}
/// Which summary counter a portfolio-mode symbol bumps after
/// the EDGAR fallback resolves. `wikidata_errored` is true when
/// `getClassification` errored upstream; false when it returned
/// an empty/useless result. The two paths differ only in what
/// `.none` means: a `.none` after an upstream error is a true
/// failure (no data anywhere); a `.none` after a successful
/// fetch with empty/sparse data is a manual-fill-in case (the
/// symbol exists, just needs human attention).
const SummaryCounter = enum { wikidata_hit, edgar_fallback, failed, manual_todo };
fn classifyForCounter(kind: FallbackKind, wikidata_errored: bool) SummaryCounter {
return switch (kind) {
.wikidata => .wikidata_hit,
.edgar_fallback => .edgar_fallback,
.none => if (wikidata_errored) .failed else .manual_todo,
};
}
/// Format the per-symbol provenance breadcrumb message into
/// `buf` and return the slice. Returns null only if the buffer
/// is somehow too small for any of the variants (256 bytes is
/// sufficient for all real symbols and short error names; null
/// is a "should never happen" safety valve, not a normal path).
fn formatProvenanceMessage(buf: []u8, sym: []const u8, kind: FallbackKind, err: ?anyerror) ?[]const u8 {
return switch (kind) {
.wikidata => std.fmt.bufPrint(buf, " {s}: classified via Wikidata\n", .{sym}),
.edgar_fallback => std.fmt.bufPrint(buf, " {s}: classified via EDGAR fallback (Wikidata sparse or empty)\n", .{sym}),
.none => if (err) |e|
std.fmt.bufPrint(buf, " {s}: no classification (Wikidata errored {t}, EDGAR had no entry); fill in by hand\n", .{ sym, e })
else
std.fmt.bufPrint(buf, " {s}: no Wikidata or EDGAR entry; fill in by hand\n", .{sym}),
} catch null;
}
/// Print a one-line stderr breadcrumb describing how a symbol
/// was classified. Used in single-symbol mode (`zfin enrich AAPL`)
/// where there's no end-of-run summary line; the user otherwise
/// has no way to tell whether the SRF row came from Wikidata,
/// the EDGAR fallback, or is a TODO stub. Silent in portfolio
/// mode (which has its own summary line at the bottom).
fn stderrSymbolProvenance(io: std.Io, sym: []const u8, kind: FallbackKind, err: ?anyerror) void {
var buf: [256]u8 = undefined;
if (formatProvenanceMessage(&buf, sym, kind, err)) |msg| {
cli.stderrPrint(io, msg);
}
}
/// Enrich a single symbol and output appendable SRF lines to stdout.
fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService, sym: []const u8, out: *std.Io.Writer) !void {
// Symbol is already uppercase: the framework's
// `uppercase_first_arg = true` normalizes the CLI arg before
// it reaches `parseArgs`. Portfolio-mode callers also pass
// canonicalized symbols (from the parsed portfolio file).
{
var msg_buf: [128]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, " Fetching {s}...\n", .{sym}) catch " ...\n";
cli.stderrPrint(io, msg);
}
const opts: zfin.FetchOptions = .{};
// `getClassification` runs the full Wikidata -> EDGAR fallback
// chain inside the service. The returned record always carries
// useful data (is_etf, asset_class, country, geo, source, ...);
// sparse-Wikidata symbols get merged with EDGAR ticker-map +
// NPORT-P data before this returns.
const result = svc.getClassification(sym, opts) catch |err| {
switch (err) {
zfin.DataError.NotFound => {
// Neither Wikidata nor EDGAR knows this symbol.
try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym});
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
stderrSymbolProvenance(io, sym, .none, null);
return;
},
else => {
const action = reportFetchError(io, sym, err);
switch (action) {
.hard_stop => return,
.soft_skip => {
try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
stderrSymbolProvenance(io, sym, .none, err);
return;
},
}
},
}
};
defer result.deinit();
const c = result.data[0];
if (c.is_etf) {
try emitEtfRows(svc, allocator, sym, c, opts, out);
} else {
const market_cap = estimateMarketCap(svc, sym, c.cik, opts);
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, market_cap, &sector_buf);
if (c.name) |name| {
try out.print("# {s}\n", .{name});
}
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{
sym, derived.sector, derived.geo, derived.asset_class,
});
}
stderrSymbolProvenance(io, sym, kindFromSource(c.source), null);
}
/// Translate the classification record's `source` provenance
/// into the `FallbackKind` enum used by the existing
/// progress/summary plumbing.
fn kindFromSource(source: []const u8) FallbackKind {
if (std.mem.eql(u8, source, "wikidata")) return .wikidata;
if (std.mem.eql(u8, source, "edgar_fallback")) return .edgar_fallback;
return .none;
}
/// Emit multi-row sleeve breakdown for an ETF/fund. Sleeves come
/// from `getEtfMetrics` (NPORT-P sector decomposition); the
/// classification record supplies the asset_class, geo, and
/// (if title-keyword inference fired) the dominant sector to
/// override "Equity / Corporate" with.
fn emitEtfRows(
svc: *zfin.DataService,
allocator: std.mem.Allocator,
sym: []const u8,
c: ClassificationRecord,
opts: zfin.FetchOptions,
out: *std.Io.Writer,
) !void {
const fund_data = loadFundEtfData(svc, allocator, sym, opts);
defer if (fund_data) |d| freeFundEtfData(allocator, d);
const sectors: ?[]const FundSector = if (fund_data) |d| d.sectors else null;
const asset_class = c.asset_class orelse "Fund";
const geo = c.geo orelse "US";
const from_edgar = std.mem.eql(u8, c.source, "edgar_fallback");
if (c.name) |name| {
if (from_edgar) {
try out.print("# {s} -- {s} (Wikidata had no entry)\n", .{ sym, name });
} else {
try out.print("# {s} -- {s}\n", .{ sym, name });
}
} else if (from_edgar) {
try out.print("# {s} -- (Wikidata had no entry)\n", .{sym});
} else {
try out.print("# {s}\n", .{sym});
}
try emitFundLines(sym, asset_class, sectors, c.sector, geo, out);
}
/// Wikidata didn't return a classification for `sym` (either the
/// fetch errored out softly, or returned an empty result set).
/// Emit a metadata line based on the EDGAR-fallback `lookup`:
///
/// - `.managed_fund` → `geo::US,asset_class::Fund` (the
/// `tickers_funds.srf` file mixes mutual funds and
/// series-of-trust ETFs — generic "Fund" label since we
/// can't tell).
/// - `.company_or_uit` with title-hint → `geo::US,
/// asset_class::ETF` for trust/ETF-shaped titles, else
/// `Fund`.
/// - `.none` → all-TODO commented stub.
///
/// `sector::TODO` is always emitted on fund hits — funds are
/// multi-sector by definition; the user fills in their preferred
/// breakdown.
///
/// `err` is non-null when Wikidata's fetch errored (vs returning
/// empty); included in the comment so the user can see why the
/// auto-fill didn't work.
/// One sector slice of a fund's NPORT-P breakdown. The
/// `description` is NPORT-P's human-readable category (e.g.
/// "Equity / Corporate", "Debt / US Treasury") rather than a
/// GICS sector. For pure-equity funds NPORT-P collapses to
/// "100% Equity / Corporate"; for multi-asset funds (FAGIX-shape)
/// the breakdown is meaningfully diverse. The user can refine
/// to GICS by hand if they want to track stock-fund
/// decomposition.
pub const FundSector = struct {
description: []const u8, // borrowed; caller keeps source alive
pct: f64,
};
/// Determine whether a fund's NPORT-P breakdown is dominated
/// by a single Equity / Corporate sector — the precondition
/// for sector inference firing. A "dominant" sector is one
/// that's >95% of the holdings; multi-asset funds (FAGIX-shape:
/// 48% Debt + 22% Equity + ...) don't meet this guard and
/// keep their NPORT-P decomposition.
fn hasDominantEquitySector(fund_sectors: ?[]const FundSector) bool {
const sectors = fund_sectors orelse return false;
for (sectors) |s| {
if (std.mem.eql(u8, s.description, "Equity / Corporate") and s.pct >= 95.0) {
return true;
}
}
return false;
}
/// Emit the body lines for a fund-classified symbol. When
/// `fund_sectors` is non-null and non-empty, emits one
/// `pct:num:N` line per sector; otherwise emits a single
/// `sector::TODO` line. The asset_class comes from the caller
/// (already determined: "Fund" or "ETF").
///
/// `inferred_sector` (when non-null AND a single dominant
/// `Equity / Corporate` sleeve exists) replaces that sleeve's
/// row with a GICS-tagged row, preserving the original pct.
/// Other rows (Cash sleeves, dust derivatives) stay as-is.
/// When inference doesn't apply (no dominant sleeve, no
/// inferred sector, or the breakdown is multi-asset like
/// FAGIX), the NPORT-P decomposition emits unchanged.
///
/// `geo` is applied uniformly to every emitted row. Defaults
/// to `"US"` when null. NPORT-P doesn't tell us the holdings'
/// geo (only the fund's domicile, which is always US for funds
/// in this map), so the caller passes the inferred geo when
/// available.
fn emitFundLines(
sym: []const u8,
asset_class: []const u8,
fund_sectors: ?[]const FundSector,
inferred_sector: ?[]const u8,
geo: ?[]const u8,
out: *std.Io.Writer,
) !void {
const geo_str = geo orelse "US";
if (fund_sectors) |sectors| {
if (sectors.len > 0) {
const should_override =
inferred_sector != null and
hasDominantEquitySector(sectors);
for (sectors) |s| {
// When inference fires, replace the dominant
// Equity / Corporate row with the inferred GICS
// sector. Other rows stay as the raw NPORT-P
// category — they're informative as-is (Cash
// sleeves, derivatives, etc.).
const sector_str = if (should_override and
std.mem.eql(u8, s.description, "Equity / Corporate"))
inferred_sector.?
else
s.description;
try out.print(
"symbol::{s},sector::{s},geo::{s},asset_class::{s},pct:num:{d:.2}\n",
.{ sym, sector_str, geo_str, asset_class, s.pct },
);
}
return;
}
}
// No sector breakdown at all (NPORT-P fetch failed). Emit
// one TODO line — but if title-keyword inference returned
// a sector, use it instead of "TODO".
const sector_str = inferred_sector orelse "TODO";
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, sector_str, geo_str, asset_class });
}
/// What `getEtfMetrics` provides that `enrich` actually uses:
/// the canonical fund name (NPORT-P `<seriesName>`, falling back
/// to the submissions-feed `entity_name`) plus the per-sector
/// portfolio breakdown. Either field may be null if NPORT-P data
/// didn't include it. Both fields are owned by the allocator
/// passed to the loader; free via `freeFundEtfData`.
pub const FundEtfData = struct {
series_name: ?[]const u8,
sectors: ?[]FundSector,
};
/// Pull NPORT-P data for `sym` from the EtfMetrics cache (or
/// fetch on miss). Returns null on any error fetching upstream;
/// returns a struct (with possibly-null fields) on success. The
/// fields are independent — a fund may have a series_name but no
/// sector data, or vice versa, depending on what NPORT-P
/// returned.
fn loadFundEtfData(svc: *zfin.DataService, allocator: std.mem.Allocator, sym: []const u8, opts: zfin.FetchOptions) ?FundEtfData {
const result = svc.getEtfMetrics(sym, opts) catch return null;
defer result.deinit();
var list: std.ArrayList(FundSector) = .empty;
errdefer {
for (list.items) |s| allocator.free(s.description);
list.deinit(allocator);
}
var series_name: ?[]const u8 = null;
errdefer if (series_name) |s| allocator.free(s);
for (result.data) |rec| switch (rec) {
.profile => |p| {
// Take the first profile record's series_name.
// `parseNportP` already filters "N/A" sentinels and
// empty strings before populating this field; the
// submissions-feed fallback (`entity_name`) is also
// already applied. Whatever lands here is the most
// authoritative name we have for the fund.
if (series_name == null) {
if (p.series_name) |sn| {
series_name = allocator.dupe(u8, sn) catch return null;
}
}
},
.sector => |s| {
const desc = allocator.dupe(u8, s.description) catch return null;
list.append(allocator, .{ .description = desc, .pct = s.pct_of_portfolio }) catch {
allocator.free(desc);
return null;
};
},
.holding => {},
};
const sectors: ?[]FundSector = if (list.items.len == 0) blk: {
list.deinit(allocator);
break :blk null;
} else list.toOwnedSlice(allocator) catch null;
// If both fields are null there's nothing useful to return;
// signal "no data" so the caller takes the no-name fallback.
if (series_name == null and sectors == null) return null;
return .{
.series_name = series_name,
.sectors = sectors,
};
}
/// Free the slice returned by an old caller pattern (each
/// entry's `description` plus the slice itself). Kept around
/// because tests construct slices directly; production callers
/// use `freeFundEtfData`.
fn freeFundSectors(allocator: std.mem.Allocator, sectors: []FundSector) void {
for (sectors) |s| allocator.free(s.description);
allocator.free(sectors);
}
/// Free the struct returned by `loadFundEtfData`. Frees the
/// `series_name` string (if non-null) and the `sectors` slice
/// (each entry's `description`, then the slice itself).
fn freeFundEtfData(allocator: std.mem.Allocator, data: FundEtfData) void {
if (data.series_name) |s| allocator.free(s);
if (data.sectors) |secs| freeFundSectors(allocator, secs);
}
/// Provenance tag derived from a `ClassificationRecord.source`
/// string. Used for per-symbol summary counters and progress
/// messages.
const FallbackKind = enum { wikidata, edgar_fallback, none };
/// Sort symbol slice alphabetically in place. Used by
/// `enrichPortfolio` to produce stable, diff-friendly output.
/// Pure data transform on a `[][]const u8`; no allocation.
fn sortSymbolsAlphabetically(syms: [][]const u8) void {
std.mem.sort([]const u8, syms, {}, struct {
fn lt(_: void, a: []const u8, b: []const u8) bool {
return std.mem.lessThan(u8, a, b);
}
}.lt);
}
/// Enrich all symbols from a portfolio file.
/// Enrich every stock symbol in the resolved portfolio. Goes
/// through `cli.loadPortfolio` so global `-p`/`--portfolio`
/// patterns are honored — same multi-file union-merge as the rest
/// of the CLI.
fn enrichPortfolio(ctx: *framework.RunCtx, svc: *zfin.DataService) !void {
const io = ctx.io;
const allocator = ctx.allocator;
const out = ctx.out;
var loaded = cli.loadPortfolio(ctx, ctx.today) orelse return;
defer loaded.deinit(allocator);
const positions = loaded.positions;
const syms = loaded.syms;
// Sort symbols alphabetically for stable, diff-friendly
// output. Without this, `stockSymbols` returns symbols in
// `std.StringHashMap` bucket order — unstable across Zig
// versions and across portfolio edits. Sorting here only
// affects enrich's output; other consumers of `loaded.syms`
// (none in this function) see the same slice they would
// have anyway.
sortSymbolsAlphabetically(@constCast(syms));
// EDGAR ticker-map fallback runs lazily inside
// `svc.lookupEdgarFallback` (cache-first; only hits the
// network on cold cache or `--refresh-data`). The service
// handles map lifetimes; the loop here just consumes the
// digested `EdgarLookup` shape.
const opts: zfin.FetchOptions = .{};
try out.print("#!srfv1\n", .{});
try out.print("# Portfolio classification metadata\n", .{});
try out.print("# Generated from Wikidata + SEC EDGAR data\n", .{});
try out.print("# Edit as needed: sector, geo, asset_class, pct:num:N\n", .{});
try out.print("#\n", .{});
try out.print("# For ETFs/funds with multi-class exposure, add multiple lines\n", .{});
try out.print("# with pct:num: values that sum to ~100\n\n", .{});
var wikidata_hits: usize = 0;
var edgar_fallback: usize = 0;
var manual_todo: usize = 0;
var cusip_skipped: usize = 0;
var failed: usize = 0;
for (syms, 0..) |sym, i| {
// Skip CUSIPs and known non-stock symbols
if (isCusipLike(sym)) {
// Find the display name for this CUSIP
const display: []const u8 = sym;
var note: ?[]const u8 = null;
for (positions) |pos| {
if (std.mem.eql(u8, pos.symbol, sym)) {
if (pos.note) |n| {
note = n;
}
break;
}
}
try out.print("# CUSIP {s}", .{sym});
if (note) |n| try out.print(" ({s})", .{n});
try out.print(" -- fill in manually\n", .{});
try out.print("# symbol::{s},asset_class::TODO,geo::TODO\n\n", .{display});
cusip_skipped += 1;
continue;
}
// Progress to stderr
{
var msg_buf: [128]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, " [{d}/{d}] {s}...\n", .{ i + 1, syms.len, sym }) catch " ...\n";
cli.stderrPrint(io, msg);
}
const result = svc.getClassification(sym, opts) catch |err| {
switch (err) {
zfin.DataError.NotFound => {
// Neither Wikidata nor EDGAR knows this
// symbol -- fill in by hand.
try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym});
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
manual_todo += 1;
continue;
},
else => {
const action = reportFetchError(io, sym, err);
try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
failed += 1;
switch (action) {
.hard_stop => {
// Every remaining symbol will hit the
// same condition (no API key / auth
// fail / rate limit). Stop the batch
// with a clear note so the user knows
// how many were skipped.
var rem_buf: [256]u8 = undefined;
const remaining = syms.len - i - 1;
const rem_msg = std.fmt.bufPrint(
&rem_buf,
"Stopping enrichment: {d} symbol(s) not yet fetched. Rerun once the issue is resolved.\n",
.{remaining},
) catch "Stopping enrichment.\n";
cli.stderrPrint(io, rem_msg);
break;
},
.soft_skip => continue,
}
},
}
};
defer result.deinit();
const c = result.data[0];
if (c.is_etf) {
try emitEtfRows(svc, allocator, sym, c, opts, out);
try out.print("\n", .{});
} else {
const market_cap = estimateMarketCap(svc, sym, c.cik, opts);
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, market_cap, &sector_buf);
if (c.name) |name| {
try out.print("# {s}\n", .{name});
}
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{
sym, derived.sector, derived.geo, derived.asset_class,
});
}
switch (kindFromSource(c.source)) {
.wikidata => wikidata_hits += 1,
.edgar_fallback => edgar_fallback += 1,
.none => manual_todo += 1, // shouldn't happen for a successful return
}
}
// Summary. Every symbol contributes to exactly one bucket;
// the buckets sum to `syms.len`. `failed` only counts
// symbols that errored upstream AND had no EDGAR fallback —
// those are the genuinely-empty rows the user has to fill
// in by hand or rerun for. Errors that were rescued by
// EDGAR land in `edgar_fallback` (the file has a usable
// line for them).
const filled = wikidata_hits + edgar_fallback;
try out.print("# ---\n", .{});
try out.print("# Enriched {d} symbols: {d} fully classified ({d} from Wikidata, {d} from EDGAR fallback), {d} need manual fill-in, {d} CUSIP-skipped, {d} unrecoverable failures\n", .{
syms.len, filled, wikidata_hits, edgar_fallback, manual_todo, cusip_skipped, failed,
});
try out.print("# Review and edit this file, then save as metadata.srf\n", .{});
}
// ── Tests ────────────────────────────────────────────────────
test "parseArgs: accepts a symbol argument" {
var ctx: framework.RunCtx = undefined;
ctx.io = std.testing.io;
const args = [_][]const u8{"AAPL"};
const parsed = try parseArgs(&ctx, &args);
try std.testing.expectEqualStrings("AAPL", parsed.symbol orelse return error.MissingSymbol);
}
test "parseArgs: no argument means portfolio mode" {
var ctx: framework.RunCtx = undefined;
ctx.io = std.testing.io;
const args = [_][]const u8{};
const parsed = try parseArgs(&ctx, &args);
try std.testing.expect(parsed.symbol == null);
}
test "parseArgs: extra args error" {
var ctx: framework.RunCtx = undefined;
ctx.io = std.testing.io;
const args = [_][]const u8{ "AAPL", "extra" };
try std.testing.expectError(error.UnexpectedArg, parseArgs(&ctx, &args));
}
test "deriveMetadata: US large cap stock" {
const c: ClassificationRecord = .{
.symbol = "AAPL",
.name = "Apple Inc.",
.sector = "technology",
.country = "US",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 3_000_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("Technology", derived.sector);
try std.testing.expectEqualStrings("US", derived.geo);
try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
}
test "deriveMetadata: US small cap stock" {
const c: ClassificationRecord = .{
.symbol = "TINY",
.country = "US",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 500_000_000, &sector_buf);
try std.testing.expectEqualStrings("US Small Cap", derived.asset_class);
}
test "deriveMetadata: US mid cap stock" {
const c: ClassificationRecord = .{
.symbol = "MID",
.country = "US",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 5_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("US Mid Cap", derived.asset_class);
}
test "deriveMetadata: ETF sets asset_class to ETF regardless of size" {
const c: ClassificationRecord = .{
.symbol = "VTI",
.name = "Vanguard Total Stock Market ETF",
.country = "US",
.is_etf = true,
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 1_000_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("ETF", derived.asset_class);
}
test "deriveMetadata: international stock falls back to geo bucket" {
const c: ClassificationRecord = .{
.symbol = "TM",
.country = "JP",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 200_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("International Developed", derived.geo);
try std.testing.expectEqualStrings("International Developed", derived.asset_class);
}
test "deriveMetadata: emerging-market stock geo bucket" {
const c: ClassificationRecord = .{
.symbol = "BABA",
.country = "CN",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 200_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("Emerging Markets", derived.geo);
try std.testing.expectEqualStrings("Emerging Markets", derived.asset_class);
}
test "deriveMetadata: missing market cap defaults US to Large Cap" {
const c: ClassificationRecord = .{
.symbol = "UNK",
.country = "US",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, null, &sector_buf);
try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
}
test "deriveMetadata: unknown country -> Unknown geo" {
const c: ClassificationRecord = .{
.symbol = "WEIRD",
.country = null,
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, null, &sector_buf);
try std.testing.expectEqualStrings("Unknown", derived.geo);
try std.testing.expectEqualStrings("Unknown", derived.asset_class);
}
test "deriveMetadata: ETF gets sector::TODO regardless of Wikidata sector field" {
// ETFs are multi-sector by definition. Wikidata sometimes
// attaches an industry to an ETF entity; we override to
// TODO so the user fills in their own sector breakdown
// rather than copying whatever stray industry slipped
// through.
const c: ClassificationRecord = .{
.symbol = "VTI",
.country = "US",
.is_etf = true,
.sector = "stale industry value",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, null, &sector_buf);
try std.testing.expectEqualStrings("TODO", derived.sector);
try std.testing.expectEqualStrings("ETF", derived.asset_class);
}
test "deriveMetadata: missing sector -> TODO (not 'Unknown')" {
// SOXX-style: Wikidata returned an entity but no industry,
// no country, no instance-of statements. The sector field
// is null. We emit TODO so the user knows to fill in
// manually rather than seeing "Unknown" and assuming it's
// a valid taxonomy bucket.
const c: ClassificationRecord = .{
.symbol = "SPARSE",
.country = "US",
.sector = null,
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, null, &sector_buf);
try std.testing.expectEqualStrings("TODO", derived.sector);
}
test "deriveMetadata: stock with sector preserved (canonical sector pass-through)" {
// Wikidata's parser canonicalizes sectors before they reach
// deriveMetadata; the function just title-cases them. Verify
// the canonical strings (already title-cased) round-trip
// unchanged.
const c: ClassificationRecord = .{
.symbol = "MSFT",
.country = "US",
.sector = "Technology",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 3_000_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("Technology", derived.sector);
}
test "deriveMetadata: asset_class == 'Mutual Fund' short-circuits before size buckets" {
// When Wikidata says it's a mutual fund, we trust that and
// skip size-bucket derivation. Verifies the line-140
// branch.
const c: ClassificationRecord = .{
.symbol = "VFORX",
.country = "US",
.sector = "Financial Services",
.asset_class = "Mutual Fund",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 50_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class);
// Sector still gets title-cased (not overridden to TODO);
// mutual funds aren't ETFs.
try std.testing.expectEqualStrings("Financial Services", derived.sector);
}
test "deriveMetadata: asset_class == 'Mutual Fund' with no market cap" {
// Mutual funds have null market caps in practice (no
// shares-outstanding XBRL tag). Confirm we still take the
// Mutual Fund branch and don't default to Large Cap.
const c: ClassificationRecord = .{
.symbol = "FAGIX",
.country = "US",
.asset_class = "Mutual Fund",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, null, &sector_buf);
try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class);
}
test "deriveMetadata: asset_class set but not 'Mutual Fund' -> falls through to size buckets" {
// Defensive: any non-"Mutual Fund" string in asset_class
// should NOT short-circuit. Today only "Mutual Fund" is
// a recognized literal; anything else falls through.
const c: ClassificationRecord = .{
.symbol = "AAPL",
.country = "US",
.asset_class = "Open-End Fund", // hypothetical other value
.sector = "Technology",
.as_of = "2026-05-29",
.source = "wikidata",
};
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(c, 3_000_000_000_000, &sector_buf);
try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
}
// ── reportFetchError ────────────────────────────────────────
//
// `reportFetchError` writes a user-facing diagnostic to stderr
// (no-op under `builtin.is_test`) and returns either
// `.hard_stop` (every subsequent symbol will hit the same
// condition; abort the batch) or `.soft_skip` (per-symbol; keep
// going). The tests verify the action classification per error
// variant — the stderr text isn't asserted because stderr is
// suppressed in test mode.
test "reportFetchError: NoApiKey -> hard_stop" {
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NoApiKey);
try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
}
test "reportFetchError: AuthError -> hard_stop" {
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.AuthError);
try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
}
test "reportFetchError: RateLimited -> hard_stop" {
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.RateLimited);
try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
}
test "reportFetchError: NotFound -> soft_skip" {
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NotFound);
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}
test "reportFetchError: TransientError -> soft_skip" {
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.TransientError);
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}
test "reportFetchError: unknown error variant -> soft_skip (catch-all)" {
// Any error not matched by the explicit prongs (e.g. a
// generic FetchFailed) falls through the `else` branch and
// soft-skips. This is the safer default — better to keep
// the batch going on a per-symbol failure than to abort
// everything on an unexpected error class.
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.FetchFailed);
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}
test "reportFetchError: long symbol still classifies correctly (bufPrint fallback)" {
// The internal msg_buf is 256 bytes; symbols approaching
// that size hit the bufPrint-failed fallback path. Verify
// the action still classifies correctly even if the message
// truncates.
const long_sym = "X" ** 200;
const action = reportFetchError(std.testing.io, long_sym, zfin.DataError.NotFound);
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}
// ── formatProvenanceMessage ────────────────────────────────────
test "formatProvenanceMessage: wikidata -> 'classified via Wikidata' line" {
var buf: [256]u8 = undefined;
const msg = formatProvenanceMessage(&buf, "AAPL", .wikidata, null) orelse return error.Format;
try std.testing.expect(std.mem.indexOf(u8, msg, "AAPL") != null);
try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata") != null);
try std.testing.expect(std.mem.endsWith(u8, msg, "\n"));
}
test "formatProvenanceMessage: edgar_fallback -> 'classified via EDGAR fallback' line" {
var buf: [256]u8 = undefined;
const msg = formatProvenanceMessage(&buf, "SOXX", .edgar_fallback, null) orelse return error.Format;
try std.testing.expect(std.mem.indexOf(u8, msg, "SOXX") != null);
try std.testing.expect(std.mem.indexOf(u8, msg, "EDGAR fallback") != null);
}
test "formatProvenanceMessage: none with no error -> 'no Wikidata or EDGAR entry'" {
var buf: [256]u8 = undefined;
const msg = formatProvenanceMessage(&buf, "MISSING", .none, null) orelse return error.Format;
try std.testing.expect(std.mem.indexOf(u8, msg, "MISSING") != null);
try std.testing.expect(std.mem.indexOf(u8, msg, "no Wikidata or EDGAR entry") != null);
try std.testing.expect(std.mem.indexOf(u8, msg, "fill in by hand") != null);
}
test "formatProvenanceMessage: none with error -> includes error name" {
// When Wikidata errored AND EDGAR had no entry, the message
// includes the upstream error name so the user can act on
// it (e.g. RateLimited → wait and rerun).
var buf: [256]u8 = undefined;
const msg = formatProvenanceMessage(&buf, "FOO", .none, error.RateLimited) orelse return error.Format;
try std.testing.expect(std.mem.indexOf(u8, msg, "FOO") != null);
try std.testing.expect(std.mem.indexOf(u8, msg, "RateLimited") != null);
try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata errored") != null);
}
test "formatProvenanceMessage: small buffer returns null (safety valve)" {
// 16-byte buffer can't hold any of the message variants.
// Should return null rather than crash; caller treats null
// as "skip the breadcrumb" rather than panicking.
var buf: [16]u8 = undefined;
try std.testing.expect(formatProvenanceMessage(&buf, "AAPL", .edgar_fallback, null) == null);
}
test "formatProvenanceMessage: messages have leading two-space indent" {
// Match the rest of enrich's stderr output (progress
// messages, fetch breadcrumbs all use " " prefix).
var buf: [256]u8 = undefined;
const msg = formatProvenanceMessage(&buf, "X", .edgar_fallback, null) orelse return error.Format;
try std.testing.expect(std.mem.startsWith(u8, msg, " "));
}
// ── classifyForCounter ────────────────────────────────────────
test "classifyForCounter: wikidata -> wikidata_hit regardless of error arg" {
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false));
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true));
}
test "classifyForCounter: edgar_fallback -> edgar_fallback regardless of wikidata error" {
// EDGAR rescued the symbol; the file has a usable line; it
// counts as edgar_fallback whether or not Wikidata errored
// upstream.
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true));
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false));
}
test "classifyForCounter: none + wikidata errored -> failed (no data anywhere)" {
// True failure: Wikidata HTTP errored, EDGAR has no row.
// Nothing usable in the file for this symbol; user must
// rerun or fill in by hand.
try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true));
}
test "classifyForCounter: none + wikidata succeeded but empty -> manual_todo" {
// Wikidata returned empty/useless data, EDGAR has no row.
// The symbol exists in metadata.srf as a TODO stub; user
// fills in by hand. Different from `failed` because there's
// nothing to retry — Wikidata simply has no entry.
try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false));
}
test "classifyForCounter: covers all (FallbackKind, bool) input combinations" {
// Exhaustive combinator test — locks in the truth table so
// any future change to the policy has to update this test.
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false));
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true));
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false));
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true));
try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false));
try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true));
}
// ── hasDominantEquitySector ──────────────────────────────────
test "hasDominantEquitySector: single 99% Equity / Corporate -> true" {
const sectors = [_]FundSector{
.{ .description = "Equity / Corporate", .pct = 99.7 },
.{ .description = "Short-Term Investment Vehicle / Registered Fund", .pct = 0.19 },
};
try std.testing.expect(hasDominantEquitySector(sectors[0..]));
}
test "hasDominantEquitySector: 95% threshold is inclusive" {
const sectors = [_]FundSector{
.{ .description = "Equity / Corporate", .pct = 95.0 },
};
try std.testing.expect(hasDominantEquitySector(sectors[0..]));
}
test "hasDominantEquitySector: 94.99% does NOT trigger" {
const sectors = [_]FundSector{
.{ .description = "Equity / Corporate", .pct = 94.99 },
};
try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
}
test "hasDominantEquitySector: multi-asset fund (FAGIX-shape) -> false" {
// FAGIX has 22% Equity / Corporate plus debt and other
// sleeves. 22% is way under the 95% threshold.
const sectors = [_]FundSector{
.{ .description = "Debt / Corporate", .pct = 47.69 },
.{ .description = "Equity / Corporate", .pct = 22.49 },
.{ .description = "Loan / Corporate", .pct = 9.99 },
};
try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
}
test "hasDominantEquitySector: pure-debt fund -> false" {
// VBTLX-shape: all Debt / *. No Equity / Corporate row at all.
const sectors = [_]FundSector{
.{ .description = "Debt / Corporate", .pct = 50.0 },
.{ .description = "Debt / US Treasury", .pct = 30.0 },
};
try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
}
test "hasDominantEquitySector: null and empty -> false" {
try std.testing.expect(!hasDominantEquitySector(null));
const empty = [_]FundSector{};
try std.testing.expect(!hasDominantEquitySector(empty[0..]));
}
test "emitFundLines: null sectors -> single TODO line" {
var out_buf: [256]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
try emitFundLines("VTI", "ETF", null, null, null, &out);
try std.testing.expectEqualStrings(
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
out.buffered(),
);
}
test "emitFundLines: populated sectors -> one line per sector with pct" {
var out_buf: [512]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
const sectors = [_]FundSector{
.{ .description = "Debt / Corporate", .pct = 47.69 },
.{ .description = "Equity / Corporate", .pct = 22.49 },
};
try emitFundLines("FAGIX", "Fund", sectors[0..], null, null, &out);
const written = out.buffered();
try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69") != null);
try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Equity / Corporate,geo::US,asset_class::Fund,pct:num:22.49") != null);
try std.testing.expect(std.mem.indexOf(u8, written, "TODO") == null);
}
test "emitFundLines: empty slice -> single TODO line (treats empty as null)" {
var out_buf: [256]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
const empty: [0]FundSector = .{};
try emitFundLines("VTI", "ETF", empty[0..], null, null, &out);
try std.testing.expectEqualStrings(
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
out.buffered(),
);
}
test "emitFundLines: negative pct values render correctly" {
// Real NPORT-P data has negative pcts for short positions
// and derivatives. They must round-trip cleanly.
var out_buf: [512]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
const sectors = [_]FundSector{
.{ .description = "Repurchase Agreement / Other", .pct = -29.72 },
.{ .description = "Derivative-FX / Other", .pct = -0.84 },
};
try emitFundLines("PTY", "Fund", sectors[0..], null, null, &out);
const written = out.buffered();
try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-29.72") != null);
try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-0.84") != null);
}
test "emitFundLines: ETF asset_class flows through" {
var out_buf: [512]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
const sectors = [_]FundSector{
.{ .description = "Equity / Corporate", .pct = 99.86 },
};
try emitFundLines("SOXX", "ETF", sectors[0..], null, null, &out);
try std.testing.expectEqualStrings(
"symbol::SOXX,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.86\n",
out.buffered(),
);
}
test "freeFundSectors: frees slice + each description, no leak" {
// Allocate the same shape `loadFundSectors` produces, then
// free it via `freeFundSectors`. `std.testing.allocator`
// catches any leak.
const alloc = std.testing.allocator;
var list: std.ArrayList(FundSector) = .empty;
errdefer list.deinit(alloc);
const desc1 = try alloc.dupe(u8, "Debt / Corporate");
errdefer alloc.free(desc1);
try list.append(alloc, .{ .description = desc1, .pct = 47.69 });
const desc2 = try alloc.dupe(u8, "Equity / Corporate");
errdefer alloc.free(desc2);
try list.append(alloc, .{ .description = desc2, .pct = 22.49 });
const slice = try list.toOwnedSlice(alloc);
freeFundSectors(alloc, slice);
// No assertion needed — testing.allocator panics on leak.
}
test "freeFundSectors: empty slice is a no-op" {
const alloc = std.testing.allocator;
const slice = try alloc.alloc(FundSector, 0);
freeFundSectors(alloc, slice);
}
test "freeFundEtfData: frees both name and sectors without leak" {
// Construct the same shape `loadFundEtfData` produces, then
// free via the paired free function. testing.allocator
// catches any leak.
const alloc = std.testing.allocator;
const name = try alloc.dupe(u8, "Vanguard Total Bond Market Index Fund");
var list: std.ArrayList(FundSector) = .empty;
errdefer {
for (list.items) |s| alloc.free(s.description);
list.deinit(alloc);
}
const desc = try alloc.dupe(u8, "Debt / Corporate");
try list.append(alloc, .{ .description = desc, .pct = 50.0 });
const sectors = try list.toOwnedSlice(alloc);
freeFundEtfData(alloc, .{ .series_name = name, .sectors = sectors });
}
test "freeFundEtfData: handles null series_name (only sectors freed)" {
const alloc = std.testing.allocator;
var list: std.ArrayList(FundSector) = .empty;
errdefer {
for (list.items) |s| alloc.free(s.description);
list.deinit(alloc);
}
const desc = try alloc.dupe(u8, "Equity / Corporate");
try list.append(alloc, .{ .description = desc, .pct = 100.0 });
const sectors = try list.toOwnedSlice(alloc);
freeFundEtfData(alloc, .{ .series_name = null, .sectors = sectors });
}
test "freeFundEtfData: handles null sectors (only series_name freed)" {
const alloc = std.testing.allocator;
const name = try alloc.dupe(u8, "Some Fund");
freeFundEtfData(alloc, .{ .series_name = name, .sectors = null });
}
test "freeFundEtfData: both null is a no-op" {
const alloc = std.testing.allocator;
freeFundEtfData(alloc, .{ .series_name = null, .sectors = null });
}
// ── sortSymbolsAlphabetically ────────────────────────────────
test "sortSymbolsAlphabetically: shuffled input -> alphabetical output" {
var syms = [_][]const u8{ "QQQ", "AAPL", "VTI", "BND", "SPY" };
sortSymbolsAlphabetically(&syms);
try std.testing.expectEqualStrings("AAPL", syms[0]);
try std.testing.expectEqualStrings("BND", syms[1]);
try std.testing.expectEqualStrings("QQQ", syms[2]);
try std.testing.expectEqualStrings("SPY", syms[3]);
try std.testing.expectEqualStrings("VTI", syms[4]);
}
test "sortSymbolsAlphabetically: already-sorted input is stable" {
var syms = [_][]const u8{ "AAPL", "BND", "VTI" };
sortSymbolsAlphabetically(&syms);
try std.testing.expectEqualStrings("AAPL", syms[0]);
try std.testing.expectEqualStrings("BND", syms[1]);
try std.testing.expectEqualStrings("VTI", syms[2]);
}
test "sortSymbolsAlphabetically: empty slice is a no-op" {
var syms = [_][]const u8{};
sortSymbolsAlphabetically(&syms);
try std.testing.expectEqual(@as(usize, 0), syms.len);
}
test "sortSymbolsAlphabetically: single element is unchanged" {
var syms = [_][]const u8{"AAPL"};
sortSymbolsAlphabetically(&syms);
try std.testing.expectEqualStrings("AAPL", syms[0]);
}
test "sortSymbolsAlphabetically: case-sensitive ordering (uppercase < lowercase)" {
// Defensive: the symbols should always be uppercased before
// they reach this function (portfolio.srf canonicalizes;
// single-symbol mode uses framework's `uppercase_first_arg`).
// But verify the underlying comparator is byte-lex so we
// know what to expect if mixed-case ever leaks in.
var syms = [_][]const u8{ "aapl", "AAPL", "BND" };
sortSymbolsAlphabetically(&syms);
// Uppercase letters have lower byte values than lowercase.
try std.testing.expectEqualStrings("AAPL", syms[0]);
try std.testing.expectEqualStrings("BND", syms[1]);
try std.testing.expectEqualStrings("aapl", syms[2]);
}
test "sortSymbolsAlphabetically: numbers and digits sort before letters" {
// CUSIPs (9-character alphanumeric) and class shares like
// "BRK.B" can occur. Confirm byte-lex ordering puts digit
// prefixes before letter prefixes, which matches user
// intuition (numbered things group together at the top).
var syms = [_][]const u8{ "AAPL", "02315N600", "BRK.B" };
sortSymbolsAlphabetically(&syms);
try std.testing.expectEqualStrings("02315N600", syms[0]);
try std.testing.expectEqualStrings("AAPL", syms[1]);
try std.testing.expectEqualStrings("BRK.B", syms[2]);
}
test "sortSymbolsAlphabetically: duplicate symbols stay together" {
// Defensive: stockSymbols is supposed to dedup, but if
// duplicates ever leak in, they should sort adjacent rather
// than crash or scramble.
var syms = [_][]const u8{ "VTI", "AAPL", "VTI", "BND" };
sortSymbolsAlphabetically(&syms);
try std.testing.expectEqualStrings("AAPL", syms[0]);
try std.testing.expectEqualStrings("BND", syms[1]);
try std.testing.expectEqualStrings("VTI", syms[2]);
try std.testing.expectEqualStrings("VTI", syms[3]);
}