1412 lines
59 KiB
Zig
1412 lines
59 KiB
Zig
const std = @import("std");
|
||
const zfin = @import("../root.zig");
|
||
const cli = @import("common.zig");
|
||
const framework = @import("framework.zig");
|
||
const isCusipLike = @import("../models/portfolio.zig").isCusipLike;
|
||
const ClassificationRecord = zfin.classification.ClassificationRecord;
|
||
|
||
pub const ParsedArgs = struct {
|
||
/// Optional symbol (e.g. "AAPL"). Null = portfolio mode (uses
|
||
/// `-p` resolution to find the user's portfolio file(s)).
|
||
symbol: ?[]const u8,
|
||
};
|
||
|
||
pub const meta: framework.Meta = .{
|
||
.name = "enrich",
|
||
.group = .hygiene,
|
||
.synopsis = "Bootstrap metadata.srf from Wikidata + EDGAR",
|
||
.help =
|
||
\\Usage: zfin enrich [SYMBOL]
|
||
\\
|
||
\\Bootstrap a `metadata.srf` classification file from public
|
||
\\Wikidata + SEC EDGAR data. Two modes:
|
||
\\
|
||
\\ - Portfolio mode (no argument): enrich every stock symbol
|
||
\\ in your portfolio. Honors the global `-p`/`--portfolio`
|
||
\\ flag for selecting which portfolio file(s) to use; with
|
||
\\ no flag, falls back to the standard portfolio resolution
|
||
\\ (portfolio.srf in cwd, or $ZFIN_HOME/portfolio.srf).
|
||
\\ Output is a complete SRF file written to stdout —
|
||
\\ redirect into metadata.srf and edit by hand for accuracy.
|
||
\\ - Symbol mode (single SYMBOL argument): enrich one symbol
|
||
\\ and emit one appendable SRF line. Useful for adding to
|
||
\\ an existing metadata.srf without rerunning the whole file.
|
||
\\
|
||
\\Sources used:
|
||
\\ - Wikidata SPARQL: sector / industry / country / asset class
|
||
\\ + CIK lookup for the EDGAR call below.
|
||
\\ - SEC EDGAR XBRL company facts: shares outstanding, used
|
||
\\ with the latest cached close price to derive market-cap
|
||
\\ size buckets (Large/Mid/Small) for US-domiciled stocks.
|
||
\\ - SEC EDGAR mutual-fund ticker map: fallback when Wikidata
|
||
\\ has no entry. Open-end mutual funds aren't exchange-listed
|
||
\\ and are usually missing from Wikidata; this fills in
|
||
\\ `geo::US,asset_class::Fund` (or `ETF` for company-map
|
||
\\ UIT entries with title hints).
|
||
\\
|
||
\\Always review the output before saving as `metadata.srf`.
|
||
\\Wikidata + EDGAR are free and have no per-day quota; the SEC
|
||
\\caps EDGAR at 10 req/sec which the rate limiter respects.
|
||
\\Requires ZFIN_USER_EMAIL in `.env` (SEC requires a contact in
|
||
\\the User-Agent header).
|
||
\\
|
||
\\Examples:
|
||
\\ zfin enrich > metadata.srf # whole portfolio (default file)
|
||
\\ zfin -p sample enrich > metadata.srf # whole portfolio (named file)
|
||
\\ zfin enrich AAPL >> metadata.srf # single symbol append
|
||
\\ zfin enrich fagix >> metadata.srf # symbol auto-uppercased
|
||
\\
|
||
,
|
||
.uppercase_first_arg = true,
|
||
.user_errors = error{UnexpectedArg},
|
||
};
|
||
|
||
pub fn parseArgs(ctx: *framework.RunCtx, cmd_args: []const []const u8) !ParsedArgs {
|
||
if (cmd_args.len > 1) {
|
||
cli.stderrPrint(ctx.io, "Error: 'enrich' takes at most one argument (a symbol). For portfolio-mode, omit the argument and use the global -p flag if needed.\n");
|
||
return error.UnexpectedArg;
|
||
}
|
||
return .{ .symbol = if (cmd_args.len == 1) cmd_args[0] else null };
|
||
}
|
||
|
||
const DerivedMeta = struct {
|
||
/// Best-effort sector text. May be the literal "Unknown" when
|
||
/// Wikidata has no sector statement.
|
||
sector: []const u8,
|
||
/// Geo bucket: one of "US", "International Developed",
|
||
/// "Emerging Markets", or "Unknown".
|
||
geo: []const u8,
|
||
/// Asset class: "ETF", "Mutual Fund", or one of the size-shaped
|
||
/// US-stock buckets ("US Large Cap" / "US Mid Cap" / "US Small
|
||
/// Cap"). For non-US stocks where size data is unavailable, we
|
||
/// fall back to the geo bucket itself.
|
||
asset_class: []const u8,
|
||
};
|
||
|
||
/// Compose a `DerivedMeta` from the per-symbol Wikidata
|
||
/// `ClassificationRecord` plus an optional `market_cap` estimate
|
||
/// (shares-outstanding × latest close, in dollars). Pure data
|
||
/// transform; no I/O.
|
||
///
|
||
/// `sector_buf` is a caller-provided buffer for the title-cased
|
||
/// sector string. Wikidata returns sectors in mixed case; we
|
||
/// normalize for display.
|
||
fn deriveMetadata(
|
||
classification: ClassificationRecord,
|
||
market_cap: ?f64,
|
||
sector_buf: []u8,
|
||
) DerivedMeta {
|
||
const geo_str = zfin.classification.geoFor(classification.country);
|
||
|
||
// Sector: title-case Wikidata's sector string when present.
|
||
// For ETFs, override with `TODO` — funds are multi-sector by
|
||
// definition, so the user fills in their own breakdown.
|
||
// When Wikidata returned no sector at all (e.g. SOXX got an
|
||
// entity hit but no industry/country/instance fields), emit
|
||
// `TODO` rather than the literal "Unknown" placeholder so
|
||
// the user knows to fill in by hand rather than thinking
|
||
// "Unknown" is a valid taxonomy value.
|
||
const sector_str = blk: {
|
||
if (classification.is_etf) break :blk "TODO";
|
||
if (classification.sector) |s| {
|
||
break :blk cli.fmt.toTitleCase(sector_buf, s);
|
||
}
|
||
break :blk "TODO";
|
||
};
|
||
|
||
const asset_class_str = blk: {
|
||
if (classification.is_etf) break :blk "ETF";
|
||
if (classification.asset_class) |ac| {
|
||
if (std.mem.eql(u8, ac, "Mutual Fund")) break :blk "Mutual Fund";
|
||
}
|
||
// Stock size bucket. Only computed for US-domiciled stocks
|
||
// where we have a market-cap estimate. International stocks
|
||
// fall through to the geo bucket.
|
||
if (std.mem.eql(u8, geo_str, zfin.classification.geo.us)) {
|
||
if (market_cap) |mc| {
|
||
if (mc >= 10_000_000_000) break :blk "US Large Cap";
|
||
if (mc >= 2_000_000_000) break :blk "US Mid Cap";
|
||
break :blk "US Small Cap";
|
||
}
|
||
// Default for US stocks without market-cap data —
|
||
// matches the old AlphaVantage flow's default.
|
||
break :blk "US Large Cap";
|
||
}
|
||
// Non-US fallback: use the geo bucket as the asset class.
|
||
// The user can refine in metadata.srf.
|
||
break :blk geo_str;
|
||
};
|
||
|
||
return .{ .sector = sector_str, .geo = geo_str, .asset_class = asset_class_str };
|
||
}
|
||
|
||
/// CLI `enrich` command: bootstrap a metadata.srf file from Wikidata + EDGAR data.
|
||
/// Two dispatch paths:
|
||
/// - Portfolio mode (no argument): load the user's portfolio
|
||
/// via the standard `cli.loadPortfolio` flow (which honors
|
||
/// `-p`/`--portfolio` patterns), then enrich every stock
|
||
/// symbol.
|
||
/// - Symbol mode (one argument): enrich a single symbol. The
|
||
/// framework uppercases the argument before we see it, so
|
||
/// `enrich fagix` and `enrich FAGIX` produce identical
|
||
/// output.
|
||
pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
|
||
const svc = ctx.svc orelse return error.MissingDataService;
|
||
if (parsed.symbol) |sym| {
|
||
try enrichSymbol(ctx.io, ctx.allocator, svc, sym, ctx.out);
|
||
return;
|
||
}
|
||
try enrichPortfolio(ctx, svc);
|
||
}
|
||
|
||
/// Whether the caller should continue with the next symbol or
|
||
/// abort the whole batch after a fetch failure. Hard-stop errors
|
||
/// (no API key, auth rejected, rate-limited) will recur on every
|
||
/// subsequent symbol; soft-skip errors are per-symbol and other
|
||
/// symbols may still succeed.
|
||
const FetchErrorAction = enum { hard_stop, soft_skip };
|
||
|
||
/// Print a user-facing stderr message describing why the fetch
|
||
/// for `sym` failed, and tell the caller whether to continue
|
||
/// (`soft_skip`) or stop the whole batch (`hard_stop`).
|
||
///
|
||
/// This is the single dispatch point for translating a
|
||
/// `DataError` into actionable user output. Per AGENTS.md "Errors
|
||
/// carry information": the message names the specific error
|
||
/// variant — never just "fetch failed" — so the user can act on
|
||
/// it without reading source code.
|
||
fn reportFetchError(io: std.Io, sym: []const u8, err: anyerror) FetchErrorAction {
|
||
var msg_buf: [256]u8 = undefined;
|
||
switch (err) {
|
||
zfin.DataError.NoApiKey => {
|
||
cli.stderrPrint(io, "Error: ZFIN_USER_EMAIL not set. Add it to .env (SEC EDGAR requires a contact email in the User-Agent header).\n");
|
||
return .hard_stop;
|
||
},
|
||
zfin.DataError.AuthError => {
|
||
cli.stderrPrint(io, "Error: SEC EDGAR rejected the request. Check ZFIN_USER_EMAIL in .env\n");
|
||
return .hard_stop;
|
||
},
|
||
zfin.DataError.RateLimited => {
|
||
const msg = std.fmt.bufPrint(
|
||
&msg_buf,
|
||
"Error: rate-limited on {s}. Wikidata/EDGAR have generous limits; check for upstream throttling.\n",
|
||
.{sym},
|
||
) catch "Error: rate-limited. Try again later.\n";
|
||
cli.stderrPrint(io, msg);
|
||
return .hard_stop;
|
||
},
|
||
zfin.DataError.NotFound => {
|
||
const msg = std.fmt.bufPrint(
|
||
&msg_buf,
|
||
" {s}: not in Wikidata; mark sector/geo/asset_class manually\n",
|
||
.{sym},
|
||
) catch " not in Wikidata; mark manually\n";
|
||
cli.stderrPrint(io, msg);
|
||
return .soft_skip;
|
||
},
|
||
zfin.DataError.TransientError => {
|
||
const msg = std.fmt.bufPrint(
|
||
&msg_buf,
|
||
" {s}: transient upstream failure; will need re-run\n",
|
||
.{sym},
|
||
) catch " transient upstream failure; will need re-run\n";
|
||
cli.stderrPrint(io, msg);
|
||
return .soft_skip;
|
||
},
|
||
else => {
|
||
const msg = std.fmt.bufPrint(
|
||
&msg_buf,
|
||
" {s}: fetch failed ({t})\n",
|
||
.{ sym, err },
|
||
) catch " fetch failed\n";
|
||
cli.stderrPrint(io, msg);
|
||
return .soft_skip;
|
||
},
|
||
}
|
||
}
|
||
|
||
/// Best-effort market-cap estimate for a stock symbol: pull
|
||
/// shares-outstanding from EDGAR (XBRL company facts, keyed by
|
||
/// CIK) and multiply by the latest cached close price. Returns
|
||
/// null on any failure; the caller falls back to a default size
|
||
/// bucket. No network beyond what the underlying service methods
|
||
/// already do; failures are logged but never propagated.
|
||
fn estimateMarketCap(
|
||
svc: *zfin.DataService,
|
||
sym: []const u8,
|
||
cik: ?[]const u8,
|
||
opts: zfin.FetchOptions,
|
||
) ?f64 {
|
||
const cik_str = cik orelse return null;
|
||
const facts = svc.getEntityFacts(cik_str, opts) catch return null;
|
||
defer facts.deinit();
|
||
|
||
var shares: ?u64 = null;
|
||
for (facts.data) |fact| switch (fact) {
|
||
.shares_outstanding => |so| shares = so.shares_outstanding,
|
||
};
|
||
const shares_u = shares orelse return null;
|
||
|
||
const candles = svc.getCandles(sym, opts) catch return null;
|
||
defer candles.deinit();
|
||
if (candles.data.len == 0) return null;
|
||
const close = candles.data[candles.data.len - 1].close;
|
||
|
||
return @as(f64, @floatFromInt(shares_u)) * close;
|
||
}
|
||
|
||
/// Which summary counter a portfolio-mode symbol bumps after
|
||
/// the EDGAR fallback resolves. `wikidata_errored` is true when
|
||
/// `getClassification` errored upstream; false when it returned
|
||
/// an empty/useless result. The two paths differ only in what
|
||
/// `.none` means: a `.none` after an upstream error is a true
|
||
/// failure (no data anywhere); a `.none` after a successful
|
||
/// fetch with empty/sparse data is a manual-fill-in case (the
|
||
/// symbol exists, just needs human attention).
|
||
const SummaryCounter = enum { wikidata_hit, edgar_fallback, failed, manual_todo };
|
||
|
||
fn classifyForCounter(kind: FallbackKind, wikidata_errored: bool) SummaryCounter {
|
||
return switch (kind) {
|
||
.wikidata => .wikidata_hit,
|
||
.edgar_fallback => .edgar_fallback,
|
||
.none => if (wikidata_errored) .failed else .manual_todo,
|
||
};
|
||
}
|
||
|
||
/// Format the per-symbol provenance breadcrumb message into
|
||
/// `buf` and return the slice. Returns null only if the buffer
|
||
/// is somehow too small for any of the variants (256 bytes is
|
||
/// sufficient for all real symbols and short error names; null
|
||
/// is a "should never happen" safety valve, not a normal path).
|
||
fn formatProvenanceMessage(buf: []u8, sym: []const u8, kind: FallbackKind, err: ?anyerror) ?[]const u8 {
|
||
return switch (kind) {
|
||
.wikidata => std.fmt.bufPrint(buf, " {s}: classified via Wikidata\n", .{sym}),
|
||
.edgar_fallback => std.fmt.bufPrint(buf, " {s}: classified via EDGAR fallback (Wikidata sparse or empty)\n", .{sym}),
|
||
.none => if (err) |e|
|
||
std.fmt.bufPrint(buf, " {s}: no classification (Wikidata errored {t}, EDGAR had no entry); fill in by hand\n", .{ sym, e })
|
||
else
|
||
std.fmt.bufPrint(buf, " {s}: no Wikidata or EDGAR entry; fill in by hand\n", .{sym}),
|
||
} catch null;
|
||
}
|
||
|
||
/// Print a one-line stderr breadcrumb describing how a symbol
|
||
/// was classified. Used in single-symbol mode (`zfin enrich AAPL`)
|
||
/// where there's no end-of-run summary line; the user otherwise
|
||
/// has no way to tell whether the SRF row came from Wikidata,
|
||
/// the EDGAR fallback, or is a TODO stub. Silent in portfolio
|
||
/// mode (which has its own summary line at the bottom).
|
||
fn stderrSymbolProvenance(io: std.Io, sym: []const u8, kind: FallbackKind, err: ?anyerror) void {
|
||
var buf: [256]u8 = undefined;
|
||
if (formatProvenanceMessage(&buf, sym, kind, err)) |msg| {
|
||
cli.stderrPrint(io, msg);
|
||
}
|
||
}
|
||
|
||
/// Enrich a single symbol and output appendable SRF lines to stdout.
|
||
fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService, sym: []const u8, out: *std.Io.Writer) !void {
|
||
// Symbol is already uppercase: the framework's
|
||
// `uppercase_first_arg = true` normalizes the CLI arg before
|
||
// it reaches `parseArgs`. Portfolio-mode callers also pass
|
||
// canonicalized symbols (from the parsed portfolio file).
|
||
{
|
||
var msg_buf: [128]u8 = undefined;
|
||
const msg = std.fmt.bufPrint(&msg_buf, " Fetching {s}...\n", .{sym}) catch " ...\n";
|
||
cli.stderrPrint(io, msg);
|
||
}
|
||
|
||
const opts: zfin.FetchOptions = .{};
|
||
|
||
// `getClassification` runs the full Wikidata -> EDGAR fallback
|
||
// chain inside the service. The returned record always carries
|
||
// useful data (is_etf, asset_class, country, geo, source, ...);
|
||
// sparse-Wikidata symbols get merged with EDGAR ticker-map +
|
||
// NPORT-P data before this returns.
|
||
const result = svc.getClassification(sym, opts) catch |err| {
|
||
switch (err) {
|
||
zfin.DataError.NotFound => {
|
||
// Neither Wikidata nor EDGAR knows this symbol.
|
||
try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym});
|
||
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
|
||
stderrSymbolProvenance(io, sym, .none, null);
|
||
return;
|
||
},
|
||
else => {
|
||
const action = reportFetchError(io, sym, err);
|
||
switch (action) {
|
||
.hard_stop => return,
|
||
.soft_skip => {
|
||
try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
|
||
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
|
||
stderrSymbolProvenance(io, sym, .none, err);
|
||
return;
|
||
},
|
||
}
|
||
},
|
||
}
|
||
};
|
||
defer result.deinit();
|
||
const c = result.data[0];
|
||
|
||
if (c.is_etf) {
|
||
try emitEtfRows(svc, allocator, sym, c, opts, out);
|
||
} else {
|
||
const market_cap = estimateMarketCap(svc, sym, c.cik, opts);
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, market_cap, §or_buf);
|
||
if (c.name) |name| {
|
||
try out.print("# {s}\n", .{name});
|
||
}
|
||
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{
|
||
sym, derived.sector, derived.geo, derived.asset_class,
|
||
});
|
||
}
|
||
|
||
stderrSymbolProvenance(io, sym, kindFromSource(c.source), null);
|
||
}
|
||
|
||
/// Translate the classification record's `source` provenance
|
||
/// into the `FallbackKind` enum used by the existing
|
||
/// progress/summary plumbing.
|
||
fn kindFromSource(source: []const u8) FallbackKind {
|
||
if (std.mem.eql(u8, source, "wikidata")) return .wikidata;
|
||
if (std.mem.eql(u8, source, "edgar_fallback")) return .edgar_fallback;
|
||
return .none;
|
||
}
|
||
|
||
/// Emit multi-row sleeve breakdown for an ETF/fund. Sleeves come
|
||
/// from `getEtfMetrics` (NPORT-P sector decomposition); the
|
||
/// classification record supplies the asset_class, geo, and
|
||
/// (if title-keyword inference fired) the dominant sector to
|
||
/// override "Equity / Corporate" with.
|
||
fn emitEtfRows(
|
||
svc: *zfin.DataService,
|
||
allocator: std.mem.Allocator,
|
||
sym: []const u8,
|
||
c: ClassificationRecord,
|
||
opts: zfin.FetchOptions,
|
||
out: *std.Io.Writer,
|
||
) !void {
|
||
const fund_data = loadFundEtfData(svc, allocator, sym, opts);
|
||
defer if (fund_data) |d| freeFundEtfData(allocator, d);
|
||
const sectors: ?[]const FundSector = if (fund_data) |d| d.sectors else null;
|
||
|
||
const asset_class = c.asset_class orelse "Fund";
|
||
const geo = c.geo orelse "US";
|
||
|
||
const from_edgar = std.mem.eql(u8, c.source, "edgar_fallback");
|
||
if (c.name) |name| {
|
||
if (from_edgar) {
|
||
try out.print("# {s} -- {s} (Wikidata had no entry)\n", .{ sym, name });
|
||
} else {
|
||
try out.print("# {s} -- {s}\n", .{ sym, name });
|
||
}
|
||
} else if (from_edgar) {
|
||
try out.print("# {s} -- (Wikidata had no entry)\n", .{sym});
|
||
} else {
|
||
try out.print("# {s}\n", .{sym});
|
||
}
|
||
try emitFundLines(sym, asset_class, sectors, c.sector, geo, out);
|
||
}
|
||
|
||
/// Wikidata didn't return a classification for `sym` (either the
|
||
/// fetch errored out softly, or returned an empty result set).
|
||
/// Emit a metadata line based on the EDGAR-fallback `lookup`:
|
||
///
|
||
/// - `.managed_fund` → `geo::US,asset_class::Fund` (the
|
||
/// `tickers_funds.srf` file mixes mutual funds and
|
||
/// series-of-trust ETFs — generic "Fund" label since we
|
||
/// can't tell).
|
||
/// - `.company_or_uit` with title-hint → `geo::US,
|
||
/// asset_class::ETF` for trust/ETF-shaped titles, else
|
||
/// `Fund`.
|
||
/// - `.none` → all-TODO commented stub.
|
||
///
|
||
/// `sector::TODO` is always emitted on fund hits — funds are
|
||
/// multi-sector by definition; the user fills in their preferred
|
||
/// breakdown.
|
||
///
|
||
/// `err` is non-null when Wikidata's fetch errored (vs returning
|
||
/// empty); included in the comment so the user can see why the
|
||
/// auto-fill didn't work.
|
||
/// One sector slice of a fund's NPORT-P breakdown. The
|
||
/// `description` is NPORT-P's human-readable category (e.g.
|
||
/// "Equity / Corporate", "Debt / US Treasury") rather than a
|
||
/// GICS sector. For pure-equity funds NPORT-P collapses to
|
||
/// "100% Equity / Corporate"; for multi-asset funds (FAGIX-shape)
|
||
/// the breakdown is meaningfully diverse. The user can refine
|
||
/// to GICS by hand if they want to track stock-fund
|
||
/// decomposition.
|
||
pub const FundSector = struct {
|
||
description: []const u8, // borrowed; caller keeps source alive
|
||
pct: f64,
|
||
};
|
||
|
||
/// Determine whether a fund's NPORT-P breakdown is dominated
|
||
/// by a single Equity / Corporate sector — the precondition
|
||
/// for sector inference firing. A "dominant" sector is one
|
||
/// that's >95% of the holdings; multi-asset funds (FAGIX-shape:
|
||
/// 48% Debt + 22% Equity + ...) don't meet this guard and
|
||
/// keep their NPORT-P decomposition.
|
||
fn hasDominantEquitySector(fund_sectors: ?[]const FundSector) bool {
|
||
const sectors = fund_sectors orelse return false;
|
||
for (sectors) |s| {
|
||
if (std.mem.eql(u8, s.description, "Equity / Corporate") and s.pct >= 95.0) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/// Emit the body lines for a fund-classified symbol. When
|
||
/// `fund_sectors` is non-null and non-empty, emits one
|
||
/// `pct:num:N` line per sector; otherwise emits a single
|
||
/// `sector::TODO` line. The asset_class comes from the caller
|
||
/// (already determined: "Fund" or "ETF").
|
||
///
|
||
/// `inferred_sector` (when non-null AND a single dominant
|
||
/// `Equity / Corporate` sleeve exists) replaces that sleeve's
|
||
/// row with a GICS-tagged row, preserving the original pct.
|
||
/// Other rows (Cash sleeves, dust derivatives) stay as-is.
|
||
/// When inference doesn't apply (no dominant sleeve, no
|
||
/// inferred sector, or the breakdown is multi-asset like
|
||
/// FAGIX), the NPORT-P decomposition emits unchanged.
|
||
///
|
||
/// `geo` is applied uniformly to every emitted row. Defaults
|
||
/// to `"US"` when null. NPORT-P doesn't tell us the holdings'
|
||
/// geo (only the fund's domicile, which is always US for funds
|
||
/// in this map), so the caller passes the inferred geo when
|
||
/// available.
|
||
fn emitFundLines(
|
||
sym: []const u8,
|
||
asset_class: []const u8,
|
||
fund_sectors: ?[]const FundSector,
|
||
inferred_sector: ?[]const u8,
|
||
geo: ?[]const u8,
|
||
out: *std.Io.Writer,
|
||
) !void {
|
||
const geo_str = geo orelse "US";
|
||
if (fund_sectors) |sectors| {
|
||
if (sectors.len > 0) {
|
||
const should_override =
|
||
inferred_sector != null and
|
||
hasDominantEquitySector(sectors);
|
||
for (sectors) |s| {
|
||
// When inference fires, replace the dominant
|
||
// Equity / Corporate row with the inferred GICS
|
||
// sector. Other rows stay as the raw NPORT-P
|
||
// category — they're informative as-is (Cash
|
||
// sleeves, derivatives, etc.).
|
||
const sector_str = if (should_override and
|
||
std.mem.eql(u8, s.description, "Equity / Corporate"))
|
||
inferred_sector.?
|
||
else
|
||
s.description;
|
||
try out.print(
|
||
"symbol::{s},sector::{s},geo::{s},asset_class::{s},pct:num:{d:.2}\n",
|
||
.{ sym, sector_str, geo_str, asset_class, s.pct },
|
||
);
|
||
}
|
||
return;
|
||
}
|
||
}
|
||
// No sector breakdown at all (NPORT-P fetch failed). Emit
|
||
// one TODO line — but if title-keyword inference returned
|
||
// a sector, use it instead of "TODO".
|
||
const sector_str = inferred_sector orelse "TODO";
|
||
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, sector_str, geo_str, asset_class });
|
||
}
|
||
|
||
/// What `getEtfMetrics` provides that `enrich` actually uses:
|
||
/// the canonical fund name (NPORT-P `<seriesName>`, falling back
|
||
/// to the submissions-feed `entity_name`) plus the per-sector
|
||
/// portfolio breakdown. Either field may be null if NPORT-P data
|
||
/// didn't include it. Both fields are owned by the allocator
|
||
/// passed to the loader; free via `freeFundEtfData`.
|
||
pub const FundEtfData = struct {
|
||
series_name: ?[]const u8,
|
||
sectors: ?[]FundSector,
|
||
};
|
||
|
||
/// Pull NPORT-P data for `sym` from the EtfMetrics cache (or
|
||
/// fetch on miss). Returns null on any error fetching upstream;
|
||
/// returns a struct (with possibly-null fields) on success. The
|
||
/// fields are independent — a fund may have a series_name but no
|
||
/// sector data, or vice versa, depending on what NPORT-P
|
||
/// returned.
|
||
fn loadFundEtfData(svc: *zfin.DataService, allocator: std.mem.Allocator, sym: []const u8, opts: zfin.FetchOptions) ?FundEtfData {
|
||
const result = svc.getEtfMetrics(sym, opts) catch return null;
|
||
defer result.deinit();
|
||
|
||
var list: std.ArrayList(FundSector) = .empty;
|
||
errdefer {
|
||
for (list.items) |s| allocator.free(s.description);
|
||
list.deinit(allocator);
|
||
}
|
||
|
||
var series_name: ?[]const u8 = null;
|
||
errdefer if (series_name) |s| allocator.free(s);
|
||
|
||
for (result.data) |rec| switch (rec) {
|
||
.profile => |p| {
|
||
// Take the first profile record's series_name.
|
||
// `parseNportP` already filters "N/A" sentinels and
|
||
// empty strings before populating this field; the
|
||
// submissions-feed fallback (`entity_name`) is also
|
||
// already applied. Whatever lands here is the most
|
||
// authoritative name we have for the fund.
|
||
if (series_name == null) {
|
||
if (p.series_name) |sn| {
|
||
series_name = allocator.dupe(u8, sn) catch return null;
|
||
}
|
||
}
|
||
},
|
||
.sector => |s| {
|
||
const desc = allocator.dupe(u8, s.description) catch return null;
|
||
list.append(allocator, .{ .description = desc, .pct = s.pct_of_portfolio }) catch {
|
||
allocator.free(desc);
|
||
return null;
|
||
};
|
||
},
|
||
.holding => {},
|
||
};
|
||
|
||
const sectors: ?[]FundSector = if (list.items.len == 0) blk: {
|
||
list.deinit(allocator);
|
||
break :blk null;
|
||
} else list.toOwnedSlice(allocator) catch null;
|
||
|
||
// If both fields are null there's nothing useful to return;
|
||
// signal "no data" so the caller takes the no-name fallback.
|
||
if (series_name == null and sectors == null) return null;
|
||
|
||
return .{
|
||
.series_name = series_name,
|
||
.sectors = sectors,
|
||
};
|
||
}
|
||
|
||
/// Free the slice returned by an old caller pattern (each
|
||
/// entry's `description` plus the slice itself). Kept around
|
||
/// because tests construct slices directly; production callers
|
||
/// use `freeFundEtfData`.
|
||
fn freeFundSectors(allocator: std.mem.Allocator, sectors: []FundSector) void {
|
||
for (sectors) |s| allocator.free(s.description);
|
||
allocator.free(sectors);
|
||
}
|
||
|
||
/// Free the struct returned by `loadFundEtfData`. Frees the
|
||
/// `series_name` string (if non-null) and the `sectors` slice
|
||
/// (each entry's `description`, then the slice itself).
|
||
fn freeFundEtfData(allocator: std.mem.Allocator, data: FundEtfData) void {
|
||
if (data.series_name) |s| allocator.free(s);
|
||
if (data.sectors) |secs| freeFundSectors(allocator, secs);
|
||
}
|
||
|
||
/// Provenance tag derived from a `ClassificationRecord.source`
|
||
/// string. Used for per-symbol summary counters and progress
|
||
/// messages.
|
||
const FallbackKind = enum { wikidata, edgar_fallback, none };
|
||
|
||
/// Sort symbol slice alphabetically in place. Used by
|
||
/// `enrichPortfolio` to produce stable, diff-friendly output.
|
||
/// Pure data transform on a `[][]const u8`; no allocation.
|
||
fn sortSymbolsAlphabetically(syms: [][]const u8) void {
|
||
std.mem.sort([]const u8, syms, {}, struct {
|
||
fn lt(_: void, a: []const u8, b: []const u8) bool {
|
||
return std.mem.lessThan(u8, a, b);
|
||
}
|
||
}.lt);
|
||
}
|
||
|
||
/// Enrich all symbols from a portfolio file.
|
||
/// Enrich every stock symbol in the resolved portfolio. Goes
|
||
/// through `cli.loadPortfolio` so global `-p`/`--portfolio`
|
||
/// patterns are honored — same multi-file union-merge as the rest
|
||
/// of the CLI.
|
||
fn enrichPortfolio(ctx: *framework.RunCtx, svc: *zfin.DataService) !void {
|
||
const io = ctx.io;
|
||
const allocator = ctx.allocator;
|
||
const out = ctx.out;
|
||
|
||
var loaded = cli.loadPortfolio(ctx, ctx.today) orelse return;
|
||
defer loaded.deinit(allocator);
|
||
|
||
const positions = loaded.positions;
|
||
const syms = loaded.syms;
|
||
|
||
// Sort symbols alphabetically for stable, diff-friendly
|
||
// output. Without this, `stockSymbols` returns symbols in
|
||
// `std.StringHashMap` bucket order — unstable across Zig
|
||
// versions and across portfolio edits. Sorting here only
|
||
// affects enrich's output; other consumers of `loaded.syms`
|
||
// (none in this function) see the same slice they would
|
||
// have anyway.
|
||
sortSymbolsAlphabetically(@constCast(syms));
|
||
|
||
// EDGAR ticker-map fallback runs lazily inside
|
||
// `svc.lookupEdgarFallback` (cache-first; only hits the
|
||
// network on cold cache or `--refresh-data`). The service
|
||
// handles map lifetimes; the loop here just consumes the
|
||
// digested `EdgarLookup` shape.
|
||
const opts: zfin.FetchOptions = .{};
|
||
|
||
try out.print("#!srfv1\n", .{});
|
||
try out.print("# Portfolio classification metadata\n", .{});
|
||
try out.print("# Generated from Wikidata + SEC EDGAR data\n", .{});
|
||
try out.print("# Edit as needed: sector, geo, asset_class, pct:num:N\n", .{});
|
||
try out.print("#\n", .{});
|
||
try out.print("# For ETFs/funds with multi-class exposure, add multiple lines\n", .{});
|
||
try out.print("# with pct:num: values that sum to ~100\n\n", .{});
|
||
|
||
var wikidata_hits: usize = 0;
|
||
var edgar_fallback: usize = 0;
|
||
var manual_todo: usize = 0;
|
||
var cusip_skipped: usize = 0;
|
||
var failed: usize = 0;
|
||
|
||
for (syms, 0..) |sym, i| {
|
||
// Skip CUSIPs and known non-stock symbols
|
||
if (isCusipLike(sym)) {
|
||
// Find the display name for this CUSIP
|
||
const display: []const u8 = sym;
|
||
var note: ?[]const u8 = null;
|
||
for (positions) |pos| {
|
||
if (std.mem.eql(u8, pos.symbol, sym)) {
|
||
if (pos.note) |n| {
|
||
note = n;
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
try out.print("# CUSIP {s}", .{sym});
|
||
if (note) |n| try out.print(" ({s})", .{n});
|
||
try out.print(" -- fill in manually\n", .{});
|
||
try out.print("# symbol::{s},asset_class::TODO,geo::TODO\n\n", .{display});
|
||
cusip_skipped += 1;
|
||
continue;
|
||
}
|
||
|
||
// Progress to stderr
|
||
{
|
||
var msg_buf: [128]u8 = undefined;
|
||
const msg = std.fmt.bufPrint(&msg_buf, " [{d}/{d}] {s}...\n", .{ i + 1, syms.len, sym }) catch " ...\n";
|
||
cli.stderrPrint(io, msg);
|
||
}
|
||
|
||
const result = svc.getClassification(sym, opts) catch |err| {
|
||
switch (err) {
|
||
zfin.DataError.NotFound => {
|
||
// Neither Wikidata nor EDGAR knows this
|
||
// symbol -- fill in by hand.
|
||
try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym});
|
||
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
|
||
manual_todo += 1;
|
||
continue;
|
||
},
|
||
else => {
|
||
const action = reportFetchError(io, sym, err);
|
||
try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
|
||
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
|
||
failed += 1;
|
||
switch (action) {
|
||
.hard_stop => {
|
||
// Every remaining symbol will hit the
|
||
// same condition (no API key / auth
|
||
// fail / rate limit). Stop the batch
|
||
// with a clear note so the user knows
|
||
// how many were skipped.
|
||
var rem_buf: [256]u8 = undefined;
|
||
const remaining = syms.len - i - 1;
|
||
const rem_msg = std.fmt.bufPrint(
|
||
&rem_buf,
|
||
"Stopping enrichment: {d} symbol(s) not yet fetched. Rerun once the issue is resolved.\n",
|
||
.{remaining},
|
||
) catch "Stopping enrichment.\n";
|
||
cli.stderrPrint(io, rem_msg);
|
||
break;
|
||
},
|
||
.soft_skip => continue,
|
||
}
|
||
},
|
||
}
|
||
};
|
||
defer result.deinit();
|
||
const c = result.data[0];
|
||
|
||
if (c.is_etf) {
|
||
try emitEtfRows(svc, allocator, sym, c, opts, out);
|
||
try out.print("\n", .{});
|
||
} else {
|
||
const market_cap = estimateMarketCap(svc, sym, c.cik, opts);
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, market_cap, §or_buf);
|
||
if (c.name) |name| {
|
||
try out.print("# {s}\n", .{name});
|
||
}
|
||
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{
|
||
sym, derived.sector, derived.geo, derived.asset_class,
|
||
});
|
||
}
|
||
|
||
switch (kindFromSource(c.source)) {
|
||
.wikidata => wikidata_hits += 1,
|
||
.edgar_fallback => edgar_fallback += 1,
|
||
.none => manual_todo += 1, // shouldn't happen for a successful return
|
||
}
|
||
}
|
||
|
||
// Summary. Every symbol contributes to exactly one bucket;
|
||
// the buckets sum to `syms.len`. `failed` only counts
|
||
// symbols that errored upstream AND had no EDGAR fallback —
|
||
// those are the genuinely-empty rows the user has to fill
|
||
// in by hand or rerun for. Errors that were rescued by
|
||
// EDGAR land in `edgar_fallback` (the file has a usable
|
||
// line for them).
|
||
const filled = wikidata_hits + edgar_fallback;
|
||
try out.print("# ---\n", .{});
|
||
try out.print("# Enriched {d} symbols: {d} fully classified ({d} from Wikidata, {d} from EDGAR fallback), {d} need manual fill-in, {d} CUSIP-skipped, {d} unrecoverable failures\n", .{
|
||
syms.len, filled, wikidata_hits, edgar_fallback, manual_todo, cusip_skipped, failed,
|
||
});
|
||
try out.print("# Review and edit this file, then save as metadata.srf\n", .{});
|
||
}
|
||
|
||
// ── Tests ────────────────────────────────────────────────────
|
||
|
||
test "parseArgs: accepts a symbol argument" {
|
||
var ctx: framework.RunCtx = undefined;
|
||
ctx.io = std.testing.io;
|
||
const args = [_][]const u8{"AAPL"};
|
||
const parsed = try parseArgs(&ctx, &args);
|
||
try std.testing.expectEqualStrings("AAPL", parsed.symbol orelse return error.MissingSymbol);
|
||
}
|
||
|
||
test "parseArgs: no argument means portfolio mode" {
|
||
var ctx: framework.RunCtx = undefined;
|
||
ctx.io = std.testing.io;
|
||
const args = [_][]const u8{};
|
||
const parsed = try parseArgs(&ctx, &args);
|
||
try std.testing.expect(parsed.symbol == null);
|
||
}
|
||
|
||
test "parseArgs: extra args error" {
|
||
var ctx: framework.RunCtx = undefined;
|
||
ctx.io = std.testing.io;
|
||
const args = [_][]const u8{ "AAPL", "extra" };
|
||
try std.testing.expectError(error.UnexpectedArg, parseArgs(&ctx, &args));
|
||
}
|
||
|
||
test "deriveMetadata: US large cap stock" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "AAPL",
|
||
.name = "Apple Inc.",
|
||
.sector = "technology",
|
||
.country = "US",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 3_000_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("Technology", derived.sector);
|
||
try std.testing.expectEqualStrings("US", derived.geo);
|
||
try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: US small cap stock" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "TINY",
|
||
.country = "US",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 500_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("US Small Cap", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: US mid cap stock" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "MID",
|
||
.country = "US",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 5_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("US Mid Cap", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: ETF sets asset_class to ETF regardless of size" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "VTI",
|
||
.name = "Vanguard Total Stock Market ETF",
|
||
.country = "US",
|
||
.is_etf = true,
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 1_000_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("ETF", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: international stock falls back to geo bucket" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "TM",
|
||
.country = "JP",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 200_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("International Developed", derived.geo);
|
||
try std.testing.expectEqualStrings("International Developed", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: emerging-market stock geo bucket" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "BABA",
|
||
.country = "CN",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 200_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("Emerging Markets", derived.geo);
|
||
try std.testing.expectEqualStrings("Emerging Markets", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: missing market cap defaults US to Large Cap" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "UNK",
|
||
.country = "US",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, null, §or_buf);
|
||
try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: unknown country -> Unknown geo" {
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "WEIRD",
|
||
.country = null,
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, null, §or_buf);
|
||
try std.testing.expectEqualStrings("Unknown", derived.geo);
|
||
try std.testing.expectEqualStrings("Unknown", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: ETF gets sector::TODO regardless of Wikidata sector field" {
|
||
// ETFs are multi-sector by definition. Wikidata sometimes
|
||
// attaches an industry to an ETF entity; we override to
|
||
// TODO so the user fills in their own sector breakdown
|
||
// rather than copying whatever stray industry slipped
|
||
// through.
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "VTI",
|
||
.country = "US",
|
||
.is_etf = true,
|
||
.sector = "stale industry value",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, null, §or_buf);
|
||
try std.testing.expectEqualStrings("TODO", derived.sector);
|
||
try std.testing.expectEqualStrings("ETF", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: missing sector -> TODO (not 'Unknown')" {
|
||
// SOXX-style: Wikidata returned an entity but no industry,
|
||
// no country, no instance-of statements. The sector field
|
||
// is null. We emit TODO so the user knows to fill in
|
||
// manually rather than seeing "Unknown" and assuming it's
|
||
// a valid taxonomy bucket.
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "SPARSE",
|
||
.country = "US",
|
||
.sector = null,
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, null, §or_buf);
|
||
try std.testing.expectEqualStrings("TODO", derived.sector);
|
||
}
|
||
|
||
test "deriveMetadata: stock with sector preserved (canonical sector pass-through)" {
|
||
// Wikidata's parser canonicalizes sectors before they reach
|
||
// deriveMetadata; the function just title-cases them. Verify
|
||
// the canonical strings (already title-cased) round-trip
|
||
// unchanged.
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "MSFT",
|
||
.country = "US",
|
||
.sector = "Technology",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 3_000_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("Technology", derived.sector);
|
||
}
|
||
|
||
test "deriveMetadata: asset_class == 'Mutual Fund' short-circuits before size buckets" {
|
||
// When Wikidata says it's a mutual fund, we trust that and
|
||
// skip size-bucket derivation. Verifies the line-140
|
||
// branch.
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "VFORX",
|
||
.country = "US",
|
||
.sector = "Financial Services",
|
||
.asset_class = "Mutual Fund",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 50_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class);
|
||
// Sector still gets title-cased (not overridden to TODO);
|
||
// mutual funds aren't ETFs.
|
||
try std.testing.expectEqualStrings("Financial Services", derived.sector);
|
||
}
|
||
|
||
test "deriveMetadata: asset_class == 'Mutual Fund' with no market cap" {
|
||
// Mutual funds have null market caps in practice (no
|
||
// shares-outstanding XBRL tag). Confirm we still take the
|
||
// Mutual Fund branch and don't default to Large Cap.
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "FAGIX",
|
||
.country = "US",
|
||
.asset_class = "Mutual Fund",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, null, §or_buf);
|
||
try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class);
|
||
}
|
||
|
||
test "deriveMetadata: asset_class set but not 'Mutual Fund' -> falls through to size buckets" {
|
||
// Defensive: any non-"Mutual Fund" string in asset_class
|
||
// should NOT short-circuit. Today only "Mutual Fund" is
|
||
// a recognized literal; anything else falls through.
|
||
const c: ClassificationRecord = .{
|
||
.symbol = "AAPL",
|
||
.country = "US",
|
||
.asset_class = "Open-End Fund", // hypothetical other value
|
||
.sector = "Technology",
|
||
.as_of = "2026-05-29",
|
||
.source = "wikidata",
|
||
};
|
||
var sector_buf: [64]u8 = undefined;
|
||
const derived = deriveMetadata(c, 3_000_000_000_000, §or_buf);
|
||
try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
|
||
}
|
||
|
||
// ── reportFetchError ────────────────────────────────────────
|
||
//
|
||
// `reportFetchError` writes a user-facing diagnostic to stderr
|
||
// (no-op under `builtin.is_test`) and returns either
|
||
// `.hard_stop` (every subsequent symbol will hit the same
|
||
// condition; abort the batch) or `.soft_skip` (per-symbol; keep
|
||
// going). The tests verify the action classification per error
|
||
// variant — the stderr text isn't asserted because stderr is
|
||
// suppressed in test mode.
|
||
|
||
test "reportFetchError: NoApiKey -> hard_stop" {
|
||
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NoApiKey);
|
||
try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
|
||
}
|
||
|
||
test "reportFetchError: AuthError -> hard_stop" {
|
||
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.AuthError);
|
||
try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
|
||
}
|
||
|
||
test "reportFetchError: RateLimited -> hard_stop" {
|
||
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.RateLimited);
|
||
try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
|
||
}
|
||
|
||
test "reportFetchError: NotFound -> soft_skip" {
|
||
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NotFound);
|
||
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
|
||
}
|
||
|
||
test "reportFetchError: TransientError -> soft_skip" {
|
||
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.TransientError);
|
||
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
|
||
}
|
||
|
||
test "reportFetchError: unknown error variant -> soft_skip (catch-all)" {
|
||
// Any error not matched by the explicit prongs (e.g. a
|
||
// generic FetchFailed) falls through the `else` branch and
|
||
// soft-skips. This is the safer default — better to keep
|
||
// the batch going on a per-symbol failure than to abort
|
||
// everything on an unexpected error class.
|
||
const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.FetchFailed);
|
||
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
|
||
}
|
||
|
||
test "reportFetchError: long symbol still classifies correctly (bufPrint fallback)" {
|
||
// The internal msg_buf is 256 bytes; symbols approaching
|
||
// that size hit the bufPrint-failed fallback path. Verify
|
||
// the action still classifies correctly even if the message
|
||
// truncates.
|
||
const long_sym = "X" ** 200;
|
||
const action = reportFetchError(std.testing.io, long_sym, zfin.DataError.NotFound);
|
||
try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
|
||
}
|
||
|
||
// ── formatProvenanceMessage ────────────────────────────────────
|
||
|
||
test "formatProvenanceMessage: wikidata -> 'classified via Wikidata' line" {
|
||
var buf: [256]u8 = undefined;
|
||
const msg = formatProvenanceMessage(&buf, "AAPL", .wikidata, null) orelse return error.Format;
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "AAPL") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata") != null);
|
||
try std.testing.expect(std.mem.endsWith(u8, msg, "\n"));
|
||
}
|
||
|
||
test "formatProvenanceMessage: edgar_fallback -> 'classified via EDGAR fallback' line" {
|
||
var buf: [256]u8 = undefined;
|
||
const msg = formatProvenanceMessage(&buf, "SOXX", .edgar_fallback, null) orelse return error.Format;
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "SOXX") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "EDGAR fallback") != null);
|
||
}
|
||
|
||
test "formatProvenanceMessage: none with no error -> 'no Wikidata or EDGAR entry'" {
|
||
var buf: [256]u8 = undefined;
|
||
const msg = formatProvenanceMessage(&buf, "MISSING", .none, null) orelse return error.Format;
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "MISSING") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "no Wikidata or EDGAR entry") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "fill in by hand") != null);
|
||
}
|
||
|
||
test "formatProvenanceMessage: none with error -> includes error name" {
|
||
// When Wikidata errored AND EDGAR had no entry, the message
|
||
// includes the upstream error name so the user can act on
|
||
// it (e.g. RateLimited → wait and rerun).
|
||
var buf: [256]u8 = undefined;
|
||
const msg = formatProvenanceMessage(&buf, "FOO", .none, error.RateLimited) orelse return error.Format;
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "FOO") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "RateLimited") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata errored") != null);
|
||
}
|
||
|
||
test "formatProvenanceMessage: small buffer returns null (safety valve)" {
|
||
// 16-byte buffer can't hold any of the message variants.
|
||
// Should return null rather than crash; caller treats null
|
||
// as "skip the breadcrumb" rather than panicking.
|
||
var buf: [16]u8 = undefined;
|
||
try std.testing.expect(formatProvenanceMessage(&buf, "AAPL", .edgar_fallback, null) == null);
|
||
}
|
||
|
||
test "formatProvenanceMessage: messages have leading two-space indent" {
|
||
// Match the rest of enrich's stderr output (progress
|
||
// messages, fetch breadcrumbs all use " " prefix).
|
||
var buf: [256]u8 = undefined;
|
||
const msg = formatProvenanceMessage(&buf, "X", .edgar_fallback, null) orelse return error.Format;
|
||
try std.testing.expect(std.mem.startsWith(u8, msg, " "));
|
||
}
|
||
|
||
// ── classifyForCounter ────────────────────────────────────────
|
||
|
||
test "classifyForCounter: wikidata -> wikidata_hit regardless of error arg" {
|
||
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false));
|
||
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true));
|
||
}
|
||
|
||
test "classifyForCounter: edgar_fallback -> edgar_fallback regardless of wikidata error" {
|
||
// EDGAR rescued the symbol; the file has a usable line; it
|
||
// counts as edgar_fallback whether or not Wikidata errored
|
||
// upstream.
|
||
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true));
|
||
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false));
|
||
}
|
||
|
||
test "classifyForCounter: none + wikidata errored -> failed (no data anywhere)" {
|
||
// True failure: Wikidata HTTP errored, EDGAR has no row.
|
||
// Nothing usable in the file for this symbol; user must
|
||
// rerun or fill in by hand.
|
||
try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true));
|
||
}
|
||
|
||
test "classifyForCounter: none + wikidata succeeded but empty -> manual_todo" {
|
||
// Wikidata returned empty/useless data, EDGAR has no row.
|
||
// The symbol exists in metadata.srf as a TODO stub; user
|
||
// fills in by hand. Different from `failed` because there's
|
||
// nothing to retry — Wikidata simply has no entry.
|
||
try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false));
|
||
}
|
||
|
||
test "classifyForCounter: covers all (FallbackKind, bool) input combinations" {
|
||
// Exhaustive combinator test — locks in the truth table so
|
||
// any future change to the policy has to update this test.
|
||
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false));
|
||
try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true));
|
||
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false));
|
||
try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true));
|
||
try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false));
|
||
try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true));
|
||
}
|
||
|
||
// ── hasDominantEquitySector ──────────────────────────────────
|
||
|
||
test "hasDominantEquitySector: single 99% Equity / Corporate -> true" {
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Equity / Corporate", .pct = 99.7 },
|
||
.{ .description = "Short-Term Investment Vehicle / Registered Fund", .pct = 0.19 },
|
||
};
|
||
try std.testing.expect(hasDominantEquitySector(sectors[0..]));
|
||
}
|
||
|
||
test "hasDominantEquitySector: 95% threshold is inclusive" {
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Equity / Corporate", .pct = 95.0 },
|
||
};
|
||
try std.testing.expect(hasDominantEquitySector(sectors[0..]));
|
||
}
|
||
|
||
test "hasDominantEquitySector: 94.99% does NOT trigger" {
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Equity / Corporate", .pct = 94.99 },
|
||
};
|
||
try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
|
||
}
|
||
|
||
test "hasDominantEquitySector: multi-asset fund (FAGIX-shape) -> false" {
|
||
// FAGIX has 22% Equity / Corporate plus debt and other
|
||
// sleeves. 22% is way under the 95% threshold.
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Debt / Corporate", .pct = 47.69 },
|
||
.{ .description = "Equity / Corporate", .pct = 22.49 },
|
||
.{ .description = "Loan / Corporate", .pct = 9.99 },
|
||
};
|
||
try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
|
||
}
|
||
|
||
test "hasDominantEquitySector: pure-debt fund -> false" {
|
||
// VBTLX-shape: all Debt / *. No Equity / Corporate row at all.
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Debt / Corporate", .pct = 50.0 },
|
||
.{ .description = "Debt / US Treasury", .pct = 30.0 },
|
||
};
|
||
try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
|
||
}
|
||
|
||
test "hasDominantEquitySector: null and empty -> false" {
|
||
try std.testing.expect(!hasDominantEquitySector(null));
|
||
const empty = [_]FundSector{};
|
||
try std.testing.expect(!hasDominantEquitySector(empty[0..]));
|
||
}
|
||
|
||
test "emitFundLines: null sectors -> single TODO line" {
|
||
var out_buf: [256]u8 = undefined;
|
||
var out: std.Io.Writer = .fixed(&out_buf);
|
||
try emitFundLines("VTI", "ETF", null, null, null, &out);
|
||
try std.testing.expectEqualStrings(
|
||
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
|
||
out.buffered(),
|
||
);
|
||
}
|
||
|
||
test "emitFundLines: populated sectors -> one line per sector with pct" {
|
||
var out_buf: [512]u8 = undefined;
|
||
var out: std.Io.Writer = .fixed(&out_buf);
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Debt / Corporate", .pct = 47.69 },
|
||
.{ .description = "Equity / Corporate", .pct = 22.49 },
|
||
};
|
||
try emitFundLines("FAGIX", "Fund", sectors[0..], null, null, &out);
|
||
|
||
const written = out.buffered();
|
||
try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Equity / Corporate,geo::US,asset_class::Fund,pct:num:22.49") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, written, "TODO") == null);
|
||
}
|
||
|
||
test "emitFundLines: empty slice -> single TODO line (treats empty as null)" {
|
||
var out_buf: [256]u8 = undefined;
|
||
var out: std.Io.Writer = .fixed(&out_buf);
|
||
const empty: [0]FundSector = .{};
|
||
try emitFundLines("VTI", "ETF", empty[0..], null, null, &out);
|
||
try std.testing.expectEqualStrings(
|
||
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
|
||
out.buffered(),
|
||
);
|
||
}
|
||
|
||
test "emitFundLines: negative pct values render correctly" {
|
||
// Real NPORT-P data has negative pcts for short positions
|
||
// and derivatives. They must round-trip cleanly.
|
||
var out_buf: [512]u8 = undefined;
|
||
var out: std.Io.Writer = .fixed(&out_buf);
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Repurchase Agreement / Other", .pct = -29.72 },
|
||
.{ .description = "Derivative-FX / Other", .pct = -0.84 },
|
||
};
|
||
try emitFundLines("PTY", "Fund", sectors[0..], null, null, &out);
|
||
|
||
const written = out.buffered();
|
||
try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-29.72") != null);
|
||
try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-0.84") != null);
|
||
}
|
||
|
||
test "emitFundLines: ETF asset_class flows through" {
|
||
var out_buf: [512]u8 = undefined;
|
||
var out: std.Io.Writer = .fixed(&out_buf);
|
||
const sectors = [_]FundSector{
|
||
.{ .description = "Equity / Corporate", .pct = 99.86 },
|
||
};
|
||
try emitFundLines("SOXX", "ETF", sectors[0..], null, null, &out);
|
||
try std.testing.expectEqualStrings(
|
||
"symbol::SOXX,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.86\n",
|
||
out.buffered(),
|
||
);
|
||
}
|
||
|
||
test "freeFundSectors: frees slice + each description, no leak" {
|
||
// Allocate the same shape `loadFundSectors` produces, then
|
||
// free it via `freeFundSectors`. `std.testing.allocator`
|
||
// catches any leak.
|
||
const alloc = std.testing.allocator;
|
||
var list: std.ArrayList(FundSector) = .empty;
|
||
errdefer list.deinit(alloc);
|
||
|
||
const desc1 = try alloc.dupe(u8, "Debt / Corporate");
|
||
errdefer alloc.free(desc1);
|
||
try list.append(alloc, .{ .description = desc1, .pct = 47.69 });
|
||
|
||
const desc2 = try alloc.dupe(u8, "Equity / Corporate");
|
||
errdefer alloc.free(desc2);
|
||
try list.append(alloc, .{ .description = desc2, .pct = 22.49 });
|
||
|
||
const slice = try list.toOwnedSlice(alloc);
|
||
freeFundSectors(alloc, slice);
|
||
// No assertion needed — testing.allocator panics on leak.
|
||
}
|
||
|
||
test "freeFundSectors: empty slice is a no-op" {
|
||
const alloc = std.testing.allocator;
|
||
const slice = try alloc.alloc(FundSector, 0);
|
||
freeFundSectors(alloc, slice);
|
||
}
|
||
|
||
test "freeFundEtfData: frees both name and sectors without leak" {
|
||
// Construct the same shape `loadFundEtfData` produces, then
|
||
// free via the paired free function. testing.allocator
|
||
// catches any leak.
|
||
const alloc = std.testing.allocator;
|
||
const name = try alloc.dupe(u8, "Vanguard Total Bond Market Index Fund");
|
||
var list: std.ArrayList(FundSector) = .empty;
|
||
errdefer {
|
||
for (list.items) |s| alloc.free(s.description);
|
||
list.deinit(alloc);
|
||
}
|
||
const desc = try alloc.dupe(u8, "Debt / Corporate");
|
||
try list.append(alloc, .{ .description = desc, .pct = 50.0 });
|
||
const sectors = try list.toOwnedSlice(alloc);
|
||
|
||
freeFundEtfData(alloc, .{ .series_name = name, .sectors = sectors });
|
||
}
|
||
|
||
test "freeFundEtfData: handles null series_name (only sectors freed)" {
|
||
const alloc = std.testing.allocator;
|
||
var list: std.ArrayList(FundSector) = .empty;
|
||
errdefer {
|
||
for (list.items) |s| alloc.free(s.description);
|
||
list.deinit(alloc);
|
||
}
|
||
const desc = try alloc.dupe(u8, "Equity / Corporate");
|
||
try list.append(alloc, .{ .description = desc, .pct = 100.0 });
|
||
const sectors = try list.toOwnedSlice(alloc);
|
||
|
||
freeFundEtfData(alloc, .{ .series_name = null, .sectors = sectors });
|
||
}
|
||
|
||
test "freeFundEtfData: handles null sectors (only series_name freed)" {
|
||
const alloc = std.testing.allocator;
|
||
const name = try alloc.dupe(u8, "Some Fund");
|
||
freeFundEtfData(alloc, .{ .series_name = name, .sectors = null });
|
||
}
|
||
|
||
test "freeFundEtfData: both null is a no-op" {
|
||
const alloc = std.testing.allocator;
|
||
freeFundEtfData(alloc, .{ .series_name = null, .sectors = null });
|
||
}
|
||
|
||
// ── sortSymbolsAlphabetically ────────────────────────────────
|
||
|
||
test "sortSymbolsAlphabetically: shuffled input -> alphabetical output" {
|
||
var syms = [_][]const u8{ "QQQ", "AAPL", "VTI", "BND", "SPY" };
|
||
sortSymbolsAlphabetically(&syms);
|
||
try std.testing.expectEqualStrings("AAPL", syms[0]);
|
||
try std.testing.expectEqualStrings("BND", syms[1]);
|
||
try std.testing.expectEqualStrings("QQQ", syms[2]);
|
||
try std.testing.expectEqualStrings("SPY", syms[3]);
|
||
try std.testing.expectEqualStrings("VTI", syms[4]);
|
||
}
|
||
|
||
test "sortSymbolsAlphabetically: already-sorted input is stable" {
|
||
var syms = [_][]const u8{ "AAPL", "BND", "VTI" };
|
||
sortSymbolsAlphabetically(&syms);
|
||
try std.testing.expectEqualStrings("AAPL", syms[0]);
|
||
try std.testing.expectEqualStrings("BND", syms[1]);
|
||
try std.testing.expectEqualStrings("VTI", syms[2]);
|
||
}
|
||
|
||
test "sortSymbolsAlphabetically: empty slice is a no-op" {
|
||
var syms = [_][]const u8{};
|
||
sortSymbolsAlphabetically(&syms);
|
||
try std.testing.expectEqual(@as(usize, 0), syms.len);
|
||
}
|
||
|
||
test "sortSymbolsAlphabetically: single element is unchanged" {
|
||
var syms = [_][]const u8{"AAPL"};
|
||
sortSymbolsAlphabetically(&syms);
|
||
try std.testing.expectEqualStrings("AAPL", syms[0]);
|
||
}
|
||
|
||
test "sortSymbolsAlphabetically: case-sensitive ordering (uppercase < lowercase)" {
|
||
// Defensive: the symbols should always be uppercased before
|
||
// they reach this function (portfolio.srf canonicalizes;
|
||
// single-symbol mode uses framework's `uppercase_first_arg`).
|
||
// But verify the underlying comparator is byte-lex so we
|
||
// know what to expect if mixed-case ever leaks in.
|
||
var syms = [_][]const u8{ "aapl", "AAPL", "BND" };
|
||
sortSymbolsAlphabetically(&syms);
|
||
// Uppercase letters have lower byte values than lowercase.
|
||
try std.testing.expectEqualStrings("AAPL", syms[0]);
|
||
try std.testing.expectEqualStrings("BND", syms[1]);
|
||
try std.testing.expectEqualStrings("aapl", syms[2]);
|
||
}
|
||
|
||
test "sortSymbolsAlphabetically: numbers and digits sort before letters" {
|
||
// CUSIPs (9-character alphanumeric) and class shares like
|
||
// "BRK.B" can occur. Confirm byte-lex ordering puts digit
|
||
// prefixes before letter prefixes, which matches user
|
||
// intuition (numbered things group together at the top).
|
||
var syms = [_][]const u8{ "AAPL", "02315N600", "BRK.B" };
|
||
sortSymbolsAlphabetically(&syms);
|
||
try std.testing.expectEqualStrings("02315N600", syms[0]);
|
||
try std.testing.expectEqualStrings("AAPL", syms[1]);
|
||
try std.testing.expectEqualStrings("BRK.B", syms[2]);
|
||
}
|
||
|
||
test "sortSymbolsAlphabetically: duplicate symbols stay together" {
|
||
// Defensive: stockSymbols is supposed to dedup, but if
|
||
// duplicates ever leak in, they should sort adjacent rather
|
||
// than crash or scramble.
|
||
var syms = [_][]const u8{ "VTI", "AAPL", "VTI", "BND" };
|
||
sortSymbolsAlphabetically(&syms);
|
||
try std.testing.expectEqualStrings("AAPL", syms[0]);
|
||
try std.testing.expectEqualStrings("BND", syms[1]);
|
||
try std.testing.expectEqualStrings("VTI", syms[2]);
|
||
try std.testing.expectEqualStrings("VTI", syms[3]);
|
||
}
|