const std = @import("std"); const zfin = @import("../root.zig"); const cli = @import("common.zig"); const framework = @import("framework.zig"); const isCusipLike = @import("../models/portfolio.zig").isCusipLike; const ClassificationRecord = zfin.classification.ClassificationRecord; pub const ParsedArgs = struct { /// Optional symbol (e.g. "AAPL"). Null = portfolio mode (uses /// `-p` resolution to find the user's portfolio file(s)). symbol: ?[]const u8, }; pub const meta: framework.Meta = .{ .name = "enrich", .group = .hygiene, .synopsis = "Bootstrap metadata.srf from Wikidata + EDGAR", .help = \\Usage: zfin enrich [SYMBOL] \\ \\Bootstrap a `metadata.srf` classification file from public \\Wikidata + SEC EDGAR data. Two modes: \\ \\ - Portfolio mode (no argument): enrich every stock symbol \\ in your portfolio. Honors the global `-p`/`--portfolio` \\ flag for selecting which portfolio file(s) to use; with \\ no flag, falls back to the standard portfolio resolution \\ (portfolio.srf in cwd, or $ZFIN_HOME/portfolio.srf). \\ Output is a complete SRF file written to stdout — \\ redirect into metadata.srf and edit by hand for accuracy. \\ - Symbol mode (single SYMBOL argument): enrich one symbol \\ and emit one appendable SRF line. Useful for adding to \\ an existing metadata.srf without rerunning the whole file. \\ \\Sources used: \\ - Wikidata SPARQL: sector / industry / country / asset class \\ + CIK lookup for the EDGAR call below. \\ - SEC EDGAR XBRL company facts: shares outstanding, used \\ with the latest cached close price to derive market-cap \\ size buckets (Large/Mid/Small) for US-domiciled stocks. \\ - SEC EDGAR mutual-fund ticker map: fallback when Wikidata \\ has no entry. Open-end mutual funds aren't exchange-listed \\ and are usually missing from Wikidata; this fills in \\ `geo::US,asset_class::Fund` (or `ETF` for company-map \\ UIT entries with title hints). \\ \\Always review the output before saving as `metadata.srf`. \\Wikidata + EDGAR are free and have no per-day quota; the SEC \\caps EDGAR at 10 req/sec which the rate limiter respects. \\Requires ZFIN_USER_EMAIL in `.env` (SEC requires a contact in \\the User-Agent header). \\ \\Examples: \\ zfin enrich > metadata.srf # whole portfolio (default file) \\ zfin -p sample enrich > metadata.srf # whole portfolio (named file) \\ zfin enrich AAPL >> metadata.srf # single symbol append \\ zfin enrich fagix >> metadata.srf # symbol auto-uppercased \\ , .uppercase_first_arg = true, .user_errors = error{UnexpectedArg}, }; pub fn parseArgs(ctx: *framework.RunCtx, cmd_args: []const []const u8) !ParsedArgs { if (cmd_args.len > 1) { cli.stderrPrint(ctx.io, "Error: 'enrich' takes at most one argument (a symbol). For portfolio-mode, omit the argument and use the global -p flag if needed.\n"); return error.UnexpectedArg; } return .{ .symbol = if (cmd_args.len == 1) cmd_args[0] else null }; } const DerivedMeta = struct { /// Best-effort sector text. May be the literal "Unknown" when /// Wikidata has no sector statement. sector: []const u8, /// Geo bucket: one of "US", "International Developed", /// "Emerging Markets", or "Unknown". geo: []const u8, /// Asset class: "ETF", "Mutual Fund", or one of the size-shaped /// US-stock buckets ("US Large Cap" / "US Mid Cap" / "US Small /// Cap"). For non-US stocks where size data is unavailable, we /// fall back to the geo bucket itself. asset_class: []const u8, }; /// Compose a `DerivedMeta` from the per-symbol Wikidata /// `ClassificationRecord` plus an optional `market_cap` estimate /// (shares-outstanding × latest close, in dollars). Pure data /// transform; no I/O. /// /// `sector_buf` is a caller-provided buffer for the title-cased /// sector string. Wikidata returns sectors in mixed case; we /// normalize for display. fn deriveMetadata( classification: ClassificationRecord, market_cap: ?f64, sector_buf: []u8, ) DerivedMeta { const geo_str = zfin.classification.geoFor(classification.country); // Sector: title-case Wikidata's sector string when present. // For ETFs, override with `TODO` — funds are multi-sector by // definition, so the user fills in their own breakdown. // When Wikidata returned no sector at all (e.g. SOXX got an // entity hit but no industry/country/instance fields), emit // `TODO` rather than the literal "Unknown" placeholder so // the user knows to fill in by hand rather than thinking // "Unknown" is a valid taxonomy value. const sector_str = blk: { if (classification.is_etf) break :blk "TODO"; if (classification.sector) |s| { break :blk cli.fmt.toTitleCase(sector_buf, s); } break :blk "TODO"; }; const asset_class_str = blk: { if (classification.is_etf) break :blk "ETF"; if (classification.asset_class) |ac| { if (std.mem.eql(u8, ac, "Mutual Fund")) break :blk "Mutual Fund"; } // Stock size bucket. Only computed for US-domiciled stocks // where we have a market-cap estimate. International stocks // fall through to the geo bucket. if (std.mem.eql(u8, geo_str, zfin.classification.geo.us)) { if (market_cap) |mc| { if (mc >= 10_000_000_000) break :blk "US Large Cap"; if (mc >= 2_000_000_000) break :blk "US Mid Cap"; break :blk "US Small Cap"; } // Default for US stocks without market-cap data — // matches the old AlphaVantage flow's default. break :blk "US Large Cap"; } // Non-US fallback: use the geo bucket as the asset class. // The user can refine in metadata.srf. break :blk geo_str; }; return .{ .sector = sector_str, .geo = geo_str, .asset_class = asset_class_str }; } /// CLI `enrich` command: bootstrap a metadata.srf file from Wikidata + EDGAR data. /// Two dispatch paths: /// - Portfolio mode (no argument): load the user's portfolio /// via the standard `cli.loadPortfolio` flow (which honors /// `-p`/`--portfolio` patterns), then enrich every stock /// symbol. /// - Symbol mode (one argument): enrich a single symbol. The /// framework uppercases the argument before we see it, so /// `enrich fagix` and `enrich FAGIX` produce identical /// output. pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void { const svc = ctx.svc orelse return error.MissingDataService; if (parsed.symbol) |sym| { try enrichSymbol(ctx.io, ctx.allocator, svc, sym, ctx.out); return; } try enrichPortfolio(ctx, svc); } /// Whether the caller should continue with the next symbol or /// abort the whole batch after a fetch failure. Hard-stop errors /// (no API key, auth rejected, rate-limited) will recur on every /// subsequent symbol; soft-skip errors are per-symbol and other /// symbols may still succeed. const FetchErrorAction = enum { hard_stop, soft_skip }; /// Print a user-facing stderr message describing why the fetch /// for `sym` failed, and tell the caller whether to continue /// (`soft_skip`) or stop the whole batch (`hard_stop`). /// /// This is the single dispatch point for translating a /// `DataError` into actionable user output. Per AGENTS.md "Errors /// carry information": the message names the specific error /// variant — never just "fetch failed" — so the user can act on /// it without reading source code. fn reportFetchError(io: std.Io, sym: []const u8, err: anyerror) FetchErrorAction { var msg_buf: [256]u8 = undefined; switch (err) { zfin.DataError.NoApiKey => { cli.stderrPrint(io, "Error: ZFIN_USER_EMAIL not set. Add it to .env (SEC EDGAR requires a contact email in the User-Agent header).\n"); return .hard_stop; }, zfin.DataError.AuthError => { cli.stderrPrint(io, "Error: SEC EDGAR rejected the request. Check ZFIN_USER_EMAIL in .env\n"); return .hard_stop; }, zfin.DataError.RateLimited => { const msg = std.fmt.bufPrint( &msg_buf, "Error: rate-limited on {s}. Wikidata/EDGAR have generous limits; check for upstream throttling.\n", .{sym}, ) catch "Error: rate-limited. Try again later.\n"; cli.stderrPrint(io, msg); return .hard_stop; }, zfin.DataError.NotFound => { const msg = std.fmt.bufPrint( &msg_buf, " {s}: not in Wikidata; mark sector/geo/asset_class manually\n", .{sym}, ) catch " not in Wikidata; mark manually\n"; cli.stderrPrint(io, msg); return .soft_skip; }, zfin.DataError.TransientError => { const msg = std.fmt.bufPrint( &msg_buf, " {s}: transient upstream failure; will need re-run\n", .{sym}, ) catch " transient upstream failure; will need re-run\n"; cli.stderrPrint(io, msg); return .soft_skip; }, else => { const msg = std.fmt.bufPrint( &msg_buf, " {s}: fetch failed ({t})\n", .{ sym, err }, ) catch " fetch failed\n"; cli.stderrPrint(io, msg); return .soft_skip; }, } } /// Best-effort market-cap estimate for a stock symbol: pull /// shares-outstanding from EDGAR (XBRL company facts, keyed by /// CIK) and multiply by the latest cached close price. Returns /// null on any failure; the caller falls back to a default size /// bucket. No network beyond what the underlying service methods /// already do; failures are logged but never propagated. fn estimateMarketCap( svc: *zfin.DataService, sym: []const u8, cik: ?[]const u8, opts: zfin.FetchOptions, ) ?f64 { const cik_str = cik orelse return null; const facts = svc.getEntityFacts(cik_str, opts) catch return null; defer facts.deinit(); var shares: ?u64 = null; for (facts.data) |fact| switch (fact) { .shares_outstanding => |so| shares = so.shares_outstanding, }; const shares_u = shares orelse return null; const candles = svc.getCandles(sym, opts) catch return null; defer candles.deinit(); if (candles.data.len == 0) return null; const close = candles.data[candles.data.len - 1].close; return @as(f64, @floatFromInt(shares_u)) * close; } /// Which summary counter a portfolio-mode symbol bumps after /// the EDGAR fallback resolves. `wikidata_errored` is true when /// `getClassification` errored upstream; false when it returned /// an empty/useless result. The two paths differ only in what /// `.none` means: a `.none` after an upstream error is a true /// failure (no data anywhere); a `.none` after a successful /// fetch with empty/sparse data is a manual-fill-in case (the /// symbol exists, just needs human attention). const SummaryCounter = enum { wikidata_hit, edgar_fallback, failed, manual_todo }; fn classifyForCounter(kind: FallbackKind, wikidata_errored: bool) SummaryCounter { return switch (kind) { .wikidata => .wikidata_hit, .edgar_fallback => .edgar_fallback, .none => if (wikidata_errored) .failed else .manual_todo, }; } /// Format the per-symbol provenance breadcrumb message into /// `buf` and return the slice. Returns null only if the buffer /// is somehow too small for any of the variants (256 bytes is /// sufficient for all real symbols and short error names; null /// is a "should never happen" safety valve, not a normal path). fn formatProvenanceMessage(buf: []u8, sym: []const u8, kind: FallbackKind, err: ?anyerror) ?[]const u8 { return switch (kind) { .wikidata => std.fmt.bufPrint(buf, " {s}: classified via Wikidata\n", .{sym}), .edgar_fallback => std.fmt.bufPrint(buf, " {s}: classified via EDGAR fallback (Wikidata sparse or empty)\n", .{sym}), .none => if (err) |e| std.fmt.bufPrint(buf, " {s}: no classification (Wikidata errored {t}, EDGAR had no entry); fill in by hand\n", .{ sym, e }) else std.fmt.bufPrint(buf, " {s}: no Wikidata or EDGAR entry; fill in by hand\n", .{sym}), } catch null; } /// Print a one-line stderr breadcrumb describing how a symbol /// was classified. Used in single-symbol mode (`zfin enrich AAPL`) /// where there's no end-of-run summary line; the user otherwise /// has no way to tell whether the SRF row came from Wikidata, /// the EDGAR fallback, or is a TODO stub. Silent in portfolio /// mode (which has its own summary line at the bottom). fn stderrSymbolProvenance(io: std.Io, sym: []const u8, kind: FallbackKind, err: ?anyerror) void { var buf: [256]u8 = undefined; if (formatProvenanceMessage(&buf, sym, kind, err)) |msg| { cli.stderrPrint(io, msg); } } /// Enrich a single symbol and output appendable SRF lines to stdout. fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService, sym: []const u8, out: *std.Io.Writer) !void { // Symbol is already uppercase: the framework's // `uppercase_first_arg = true` normalizes the CLI arg before // it reaches `parseArgs`. Portfolio-mode callers also pass // canonicalized symbols (from the parsed portfolio file). { var msg_buf: [128]u8 = undefined; const msg = std.fmt.bufPrint(&msg_buf, " Fetching {s}...\n", .{sym}) catch " ...\n"; cli.stderrPrint(io, msg); } const opts: zfin.FetchOptions = .{}; // `getClassification` runs the full Wikidata -> EDGAR fallback // chain inside the service. The returned record always carries // useful data (is_etf, asset_class, country, geo, source, ...); // sparse-Wikidata symbols get merged with EDGAR ticker-map + // NPORT-P data before this returns. const result = svc.getClassification(sym, opts) catch |err| { switch (err) { zfin.DataError.NotFound => { // Neither Wikidata nor EDGAR knows this symbol. try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym}); try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym}); stderrSymbolProvenance(io, sym, .none, null); return; }, else => { const action = reportFetchError(io, sym, err); switch (action) { .hard_stop => return, .soft_skip => { try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err }); try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym}); stderrSymbolProvenance(io, sym, .none, err); return; }, } }, } }; defer result.deinit(); const c = result.data[0]; if (c.is_etf) { try emitEtfRows(svc, allocator, sym, c, opts, out); } else { const market_cap = estimateMarketCap(svc, sym, c.cik, opts); var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, market_cap, §or_buf); if (c.name) |name| { try out.print("# {s}\n", .{name}); } try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, derived.sector, derived.geo, derived.asset_class, }); } stderrSymbolProvenance(io, sym, kindFromSource(c.source), null); } /// Translate the classification record's `source` provenance /// into the `FallbackKind` enum used by the existing /// progress/summary plumbing. fn kindFromSource(source: []const u8) FallbackKind { if (std.mem.eql(u8, source, "wikidata")) return .wikidata; if (std.mem.eql(u8, source, "edgar_fallback")) return .edgar_fallback; return .none; } /// Emit multi-row sleeve breakdown for an ETF/fund. Sleeves come /// from `getEtfMetrics` (NPORT-P sector decomposition); the /// classification record supplies the asset_class, geo, and /// (if title-keyword inference fired) the dominant sector to /// override "Equity / Corporate" with. fn emitEtfRows( svc: *zfin.DataService, allocator: std.mem.Allocator, sym: []const u8, c: ClassificationRecord, opts: zfin.FetchOptions, out: *std.Io.Writer, ) !void { const fund_data = loadFundEtfData(svc, allocator, sym, opts); defer if (fund_data) |d| freeFundEtfData(allocator, d); const sectors: ?[]const FundSector = if (fund_data) |d| d.sectors else null; const asset_class = c.asset_class orelse "Fund"; const geo = c.geo orelse "US"; const from_edgar = std.mem.eql(u8, c.source, "edgar_fallback"); if (c.name) |name| { if (from_edgar) { try out.print("# {s} -- {s} (Wikidata had no entry)\n", .{ sym, name }); } else { try out.print("# {s} -- {s}\n", .{ sym, name }); } } else if (from_edgar) { try out.print("# {s} -- (Wikidata had no entry)\n", .{sym}); } else { try out.print("# {s}\n", .{sym}); } try emitFundLines(sym, asset_class, sectors, c.sector, geo, out); } /// Wikidata didn't return a classification for `sym` (either the /// fetch errored out softly, or returned an empty result set). /// Emit a metadata line based on the EDGAR-fallback `lookup`: /// /// - `.managed_fund` → `geo::US,asset_class::Fund` (the /// `tickers_funds.srf` file mixes mutual funds and /// series-of-trust ETFs — generic "Fund" label since we /// can't tell). /// - `.company_or_uit` with title-hint → `geo::US, /// asset_class::ETF` for trust/ETF-shaped titles, else /// `Fund`. /// - `.none` → all-TODO commented stub. /// /// `sector::TODO` is always emitted on fund hits — funds are /// multi-sector by definition; the user fills in their preferred /// breakdown. /// /// `err` is non-null when Wikidata's fetch errored (vs returning /// empty); included in the comment so the user can see why the /// auto-fill didn't work. /// One sector slice of a fund's NPORT-P breakdown. The /// `description` is NPORT-P's human-readable category (e.g. /// "Equity / Corporate", "Debt / US Treasury") rather than a /// GICS sector. For pure-equity funds NPORT-P collapses to /// "100% Equity / Corporate"; for multi-asset funds (FAGIX-shape) /// the breakdown is meaningfully diverse. The user can refine /// to GICS by hand if they want to track stock-fund /// decomposition. pub const FundSector = struct { description: []const u8, // borrowed; caller keeps source alive pct: f64, }; /// Determine whether a fund's NPORT-P breakdown is dominated /// by a single Equity / Corporate sector — the precondition /// for sector inference firing. A "dominant" sector is one /// that's >95% of the holdings; multi-asset funds (FAGIX-shape: /// 48% Debt + 22% Equity + ...) don't meet this guard and /// keep their NPORT-P decomposition. fn hasDominantEquitySector(fund_sectors: ?[]const FundSector) bool { const sectors = fund_sectors orelse return false; for (sectors) |s| { if (std.mem.eql(u8, s.description, "Equity / Corporate") and s.pct >= 95.0) { return true; } } return false; } /// Emit the body lines for a fund-classified symbol. When /// `fund_sectors` is non-null and non-empty, emits one /// `pct:num:N` line per sector; otherwise emits a single /// `sector::TODO` line. The asset_class comes from the caller /// (already determined: "Fund" or "ETF"). /// /// `inferred_sector` (when non-null AND a single dominant /// `Equity / Corporate` sleeve exists) replaces that sleeve's /// row with a GICS-tagged row, preserving the original pct. /// Other rows (Cash sleeves, dust derivatives) stay as-is. /// When inference doesn't apply (no dominant sleeve, no /// inferred sector, or the breakdown is multi-asset like /// FAGIX), the NPORT-P decomposition emits unchanged. /// /// `geo` is applied uniformly to every emitted row. Defaults /// to `"US"` when null. NPORT-P doesn't tell us the holdings' /// geo (only the fund's domicile, which is always US for funds /// in this map), so the caller passes the inferred geo when /// available. fn emitFundLines( sym: []const u8, asset_class: []const u8, fund_sectors: ?[]const FundSector, inferred_sector: ?[]const u8, geo: ?[]const u8, out: *std.Io.Writer, ) !void { const geo_str = geo orelse "US"; if (fund_sectors) |sectors| { if (sectors.len > 0) { const should_override = inferred_sector != null and hasDominantEquitySector(sectors); for (sectors) |s| { // When inference fires, replace the dominant // Equity / Corporate row with the inferred GICS // sector. Other rows stay as the raw NPORT-P // category — they're informative as-is (Cash // sleeves, derivatives, etc.). const sector_str = if (should_override and std.mem.eql(u8, s.description, "Equity / Corporate")) inferred_sector.? else s.description; try out.print( "symbol::{s},sector::{s},geo::{s},asset_class::{s},pct:num:{d:.2}\n", .{ sym, sector_str, geo_str, asset_class, s.pct }, ); } return; } } // No sector breakdown at all (NPORT-P fetch failed). Emit // one TODO line — but if title-keyword inference returned // a sector, use it instead of "TODO". const sector_str = inferred_sector orelse "TODO"; try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, sector_str, geo_str, asset_class }); } /// What `getEtfMetrics` provides that `enrich` actually uses: /// the canonical fund name (NPORT-P ``, falling back /// to the submissions-feed `entity_name`) plus the per-sector /// portfolio breakdown. Either field may be null if NPORT-P data /// didn't include it. Both fields are owned by the allocator /// passed to the loader; free via `freeFundEtfData`. pub const FundEtfData = struct { series_name: ?[]const u8, sectors: ?[]FundSector, }; /// Pull NPORT-P data for `sym` from the EtfMetrics cache (or /// fetch on miss). Returns null on any error fetching upstream; /// returns a struct (with possibly-null fields) on success. The /// fields are independent — a fund may have a series_name but no /// sector data, or vice versa, depending on what NPORT-P /// returned. fn loadFundEtfData(svc: *zfin.DataService, allocator: std.mem.Allocator, sym: []const u8, opts: zfin.FetchOptions) ?FundEtfData { const result = svc.getEtfMetrics(sym, opts) catch return null; defer result.deinit(); var list: std.ArrayList(FundSector) = .empty; errdefer { for (list.items) |s| allocator.free(s.description); list.deinit(allocator); } var series_name: ?[]const u8 = null; errdefer if (series_name) |s| allocator.free(s); for (result.data) |rec| switch (rec) { .profile => |p| { // Take the first profile record's series_name. // `parseNportP` already filters "N/A" sentinels and // empty strings before populating this field; the // submissions-feed fallback (`entity_name`) is also // already applied. Whatever lands here is the most // authoritative name we have for the fund. if (series_name == null) { if (p.series_name) |sn| { series_name = allocator.dupe(u8, sn) catch return null; } } }, .sector => |s| { const desc = allocator.dupe(u8, s.description) catch return null; list.append(allocator, .{ .description = desc, .pct = s.pct_of_portfolio }) catch { allocator.free(desc); return null; }; }, .holding => {}, }; const sectors: ?[]FundSector = if (list.items.len == 0) blk: { list.deinit(allocator); break :blk null; } else list.toOwnedSlice(allocator) catch null; // If both fields are null there's nothing useful to return; // signal "no data" so the caller takes the no-name fallback. if (series_name == null and sectors == null) return null; return .{ .series_name = series_name, .sectors = sectors, }; } /// Free the slice returned by an old caller pattern (each /// entry's `description` plus the slice itself). Kept around /// because tests construct slices directly; production callers /// use `freeFundEtfData`. fn freeFundSectors(allocator: std.mem.Allocator, sectors: []FundSector) void { for (sectors) |s| allocator.free(s.description); allocator.free(sectors); } /// Free the struct returned by `loadFundEtfData`. Frees the /// `series_name` string (if non-null) and the `sectors` slice /// (each entry's `description`, then the slice itself). fn freeFundEtfData(allocator: std.mem.Allocator, data: FundEtfData) void { if (data.series_name) |s| allocator.free(s); if (data.sectors) |secs| freeFundSectors(allocator, secs); } /// Provenance tag derived from a `ClassificationRecord.source` /// string. Used for per-symbol summary counters and progress /// messages. const FallbackKind = enum { wikidata, edgar_fallback, none }; /// Sort symbol slice alphabetically in place. Used by /// `enrichPortfolio` to produce stable, diff-friendly output. /// Pure data transform on a `[][]const u8`; no allocation. fn sortSymbolsAlphabetically(syms: [][]const u8) void { std.mem.sort([]const u8, syms, {}, struct { fn lt(_: void, a: []const u8, b: []const u8) bool { return std.mem.lessThan(u8, a, b); } }.lt); } /// Enrich all symbols from a portfolio file. /// Enrich every stock symbol in the resolved portfolio. Goes /// through `cli.loadPortfolio` so global `-p`/`--portfolio` /// patterns are honored — same multi-file union-merge as the rest /// of the CLI. fn enrichPortfolio(ctx: *framework.RunCtx, svc: *zfin.DataService) !void { const io = ctx.io; const allocator = ctx.allocator; const out = ctx.out; var loaded = cli.loadPortfolio(ctx, ctx.today) orelse return; defer loaded.deinit(allocator); const positions = loaded.positions; const syms = loaded.syms; // Sort symbols alphabetically for stable, diff-friendly // output. Without this, `stockSymbols` returns symbols in // `std.StringHashMap` bucket order — unstable across Zig // versions and across portfolio edits. Sorting here only // affects enrich's output; other consumers of `loaded.syms` // (none in this function) see the same slice they would // have anyway. sortSymbolsAlphabetically(@constCast(syms)); // EDGAR ticker-map fallback runs lazily inside // `svc.lookupEdgarFallback` (cache-first; only hits the // network on cold cache or `--refresh-data`). The service // handles map lifetimes; the loop here just consumes the // digested `EdgarLookup` shape. const opts: zfin.FetchOptions = .{}; try out.print("#!srfv1\n", .{}); try out.print("# Portfolio classification metadata\n", .{}); try out.print("# Generated from Wikidata + SEC EDGAR data\n", .{}); try out.print("# Edit as needed: sector, geo, asset_class, pct:num:N\n", .{}); try out.print("#\n", .{}); try out.print("# For ETFs/funds with multi-class exposure, add multiple lines\n", .{}); try out.print("# with pct:num: values that sum to ~100\n\n", .{}); var wikidata_hits: usize = 0; var edgar_fallback: usize = 0; var manual_todo: usize = 0; var cusip_skipped: usize = 0; var failed: usize = 0; for (syms, 0..) |sym, i| { // Skip CUSIPs and known non-stock symbols if (isCusipLike(sym)) { // Find the display name for this CUSIP const display: []const u8 = sym; var note: ?[]const u8 = null; for (positions) |pos| { if (std.mem.eql(u8, pos.symbol, sym)) { if (pos.note) |n| { note = n; } break; } } try out.print("# CUSIP {s}", .{sym}); if (note) |n| try out.print(" ({s})", .{n}); try out.print(" -- fill in manually\n", .{}); try out.print("# symbol::{s},asset_class::TODO,geo::TODO\n\n", .{display}); cusip_skipped += 1; continue; } // Progress to stderr { var msg_buf: [128]u8 = undefined; const msg = std.fmt.bufPrint(&msg_buf, " [{d}/{d}] {s}...\n", .{ i + 1, syms.len, sym }) catch " ...\n"; cli.stderrPrint(io, msg); } const result = svc.getClassification(sym, opts) catch |err| { switch (err) { zfin.DataError.NotFound => { // Neither Wikidata nor EDGAR knows this // symbol -- fill in by hand. try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym}); try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym}); manual_todo += 1; continue; }, else => { const action = reportFetchError(io, sym, err); try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err }); try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym}); failed += 1; switch (action) { .hard_stop => { // Every remaining symbol will hit the // same condition (no API key / auth // fail / rate limit). Stop the batch // with a clear note so the user knows // how many were skipped. var rem_buf: [256]u8 = undefined; const remaining = syms.len - i - 1; const rem_msg = std.fmt.bufPrint( &rem_buf, "Stopping enrichment: {d} symbol(s) not yet fetched. Rerun once the issue is resolved.\n", .{remaining}, ) catch "Stopping enrichment.\n"; cli.stderrPrint(io, rem_msg); break; }, .soft_skip => continue, } }, } }; defer result.deinit(); const c = result.data[0]; if (c.is_etf) { try emitEtfRows(svc, allocator, sym, c, opts, out); try out.print("\n", .{}); } else { const market_cap = estimateMarketCap(svc, sym, c.cik, opts); var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, market_cap, §or_buf); if (c.name) |name| { try out.print("# {s}\n", .{name}); } try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{ sym, derived.sector, derived.geo, derived.asset_class, }); } switch (kindFromSource(c.source)) { .wikidata => wikidata_hits += 1, .edgar_fallback => edgar_fallback += 1, .none => manual_todo += 1, // shouldn't happen for a successful return } } // Summary. Every symbol contributes to exactly one bucket; // the buckets sum to `syms.len`. `failed` only counts // symbols that errored upstream AND had no EDGAR fallback — // those are the genuinely-empty rows the user has to fill // in by hand or rerun for. Errors that were rescued by // EDGAR land in `edgar_fallback` (the file has a usable // line for them). const filled = wikidata_hits + edgar_fallback; try out.print("# ---\n", .{}); try out.print("# Enriched {d} symbols: {d} fully classified ({d} from Wikidata, {d} from EDGAR fallback), {d} need manual fill-in, {d} CUSIP-skipped, {d} unrecoverable failures\n", .{ syms.len, filled, wikidata_hits, edgar_fallback, manual_todo, cusip_skipped, failed, }); try out.print("# Review and edit this file, then save as metadata.srf\n", .{}); } // ── Tests ──────────────────────────────────────────────────── test "parseArgs: accepts a symbol argument" { var ctx: framework.RunCtx = undefined; ctx.io = std.testing.io; const args = [_][]const u8{"AAPL"}; const parsed = try parseArgs(&ctx, &args); try std.testing.expectEqualStrings("AAPL", parsed.symbol orelse return error.MissingSymbol); } test "parseArgs: no argument means portfolio mode" { var ctx: framework.RunCtx = undefined; ctx.io = std.testing.io; const args = [_][]const u8{}; const parsed = try parseArgs(&ctx, &args); try std.testing.expect(parsed.symbol == null); } test "parseArgs: extra args error" { var ctx: framework.RunCtx = undefined; ctx.io = std.testing.io; const args = [_][]const u8{ "AAPL", "extra" }; try std.testing.expectError(error.UnexpectedArg, parseArgs(&ctx, &args)); } test "deriveMetadata: US large cap stock" { const c: ClassificationRecord = .{ .symbol = "AAPL", .name = "Apple Inc.", .sector = "technology", .country = "US", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 3_000_000_000_000, §or_buf); try std.testing.expectEqualStrings("Technology", derived.sector); try std.testing.expectEqualStrings("US", derived.geo); try std.testing.expectEqualStrings("US Large Cap", derived.asset_class); } test "deriveMetadata: US small cap stock" { const c: ClassificationRecord = .{ .symbol = "TINY", .country = "US", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 500_000_000, §or_buf); try std.testing.expectEqualStrings("US Small Cap", derived.asset_class); } test "deriveMetadata: US mid cap stock" { const c: ClassificationRecord = .{ .symbol = "MID", .country = "US", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 5_000_000_000, §or_buf); try std.testing.expectEqualStrings("US Mid Cap", derived.asset_class); } test "deriveMetadata: ETF sets asset_class to ETF regardless of size" { const c: ClassificationRecord = .{ .symbol = "VTI", .name = "Vanguard Total Stock Market ETF", .country = "US", .is_etf = true, .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 1_000_000_000_000, §or_buf); try std.testing.expectEqualStrings("ETF", derived.asset_class); } test "deriveMetadata: international stock falls back to geo bucket" { const c: ClassificationRecord = .{ .symbol = "TM", .country = "JP", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 200_000_000_000, §or_buf); try std.testing.expectEqualStrings("International Developed", derived.geo); try std.testing.expectEqualStrings("International Developed", derived.asset_class); } test "deriveMetadata: emerging-market stock geo bucket" { const c: ClassificationRecord = .{ .symbol = "BABA", .country = "CN", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 200_000_000_000, §or_buf); try std.testing.expectEqualStrings("Emerging Markets", derived.geo); try std.testing.expectEqualStrings("Emerging Markets", derived.asset_class); } test "deriveMetadata: missing market cap defaults US to Large Cap" { const c: ClassificationRecord = .{ .symbol = "UNK", .country = "US", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, null, §or_buf); try std.testing.expectEqualStrings("US Large Cap", derived.asset_class); } test "deriveMetadata: unknown country -> Unknown geo" { const c: ClassificationRecord = .{ .symbol = "WEIRD", .country = null, .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, null, §or_buf); try std.testing.expectEqualStrings("Unknown", derived.geo); try std.testing.expectEqualStrings("Unknown", derived.asset_class); } test "deriveMetadata: ETF gets sector::TODO regardless of Wikidata sector field" { // ETFs are multi-sector by definition. Wikidata sometimes // attaches an industry to an ETF entity; we override to // TODO so the user fills in their own sector breakdown // rather than copying whatever stray industry slipped // through. const c: ClassificationRecord = .{ .symbol = "VTI", .country = "US", .is_etf = true, .sector = "stale industry value", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, null, §or_buf); try std.testing.expectEqualStrings("TODO", derived.sector); try std.testing.expectEqualStrings("ETF", derived.asset_class); } test "deriveMetadata: missing sector -> TODO (not 'Unknown')" { // SOXX-style: Wikidata returned an entity but no industry, // no country, no instance-of statements. The sector field // is null. We emit TODO so the user knows to fill in // manually rather than seeing "Unknown" and assuming it's // a valid taxonomy bucket. const c: ClassificationRecord = .{ .symbol = "SPARSE", .country = "US", .sector = null, .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, null, §or_buf); try std.testing.expectEqualStrings("TODO", derived.sector); } test "deriveMetadata: stock with sector preserved (canonical sector pass-through)" { // Wikidata's parser canonicalizes sectors before they reach // deriveMetadata; the function just title-cases them. Verify // the canonical strings (already title-cased) round-trip // unchanged. const c: ClassificationRecord = .{ .symbol = "MSFT", .country = "US", .sector = "Technology", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 3_000_000_000_000, §or_buf); try std.testing.expectEqualStrings("Technology", derived.sector); } test "deriveMetadata: asset_class == 'Mutual Fund' short-circuits before size buckets" { // When Wikidata says it's a mutual fund, we trust that and // skip size-bucket derivation. Verifies the line-140 // branch. const c: ClassificationRecord = .{ .symbol = "VFORX", .country = "US", .sector = "Financial Services", .asset_class = "Mutual Fund", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 50_000_000_000, §or_buf); try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class); // Sector still gets title-cased (not overridden to TODO); // mutual funds aren't ETFs. try std.testing.expectEqualStrings("Financial Services", derived.sector); } test "deriveMetadata: asset_class == 'Mutual Fund' with no market cap" { // Mutual funds have null market caps in practice (no // shares-outstanding XBRL tag). Confirm we still take the // Mutual Fund branch and don't default to Large Cap. const c: ClassificationRecord = .{ .symbol = "FAGIX", .country = "US", .asset_class = "Mutual Fund", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, null, §or_buf); try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class); } test "deriveMetadata: asset_class set but not 'Mutual Fund' -> falls through to size buckets" { // Defensive: any non-"Mutual Fund" string in asset_class // should NOT short-circuit. Today only "Mutual Fund" is // a recognized literal; anything else falls through. const c: ClassificationRecord = .{ .symbol = "AAPL", .country = "US", .asset_class = "Open-End Fund", // hypothetical other value .sector = "Technology", .as_of = "2026-05-29", .source = "wikidata", }; var sector_buf: [64]u8 = undefined; const derived = deriveMetadata(c, 3_000_000_000_000, §or_buf); try std.testing.expectEqualStrings("US Large Cap", derived.asset_class); } // ── reportFetchError ──────────────────────────────────────── // // `reportFetchError` writes a user-facing diagnostic to stderr // (no-op under `builtin.is_test`) and returns either // `.hard_stop` (every subsequent symbol will hit the same // condition; abort the batch) or `.soft_skip` (per-symbol; keep // going). The tests verify the action classification per error // variant — the stderr text isn't asserted because stderr is // suppressed in test mode. test "reportFetchError: NoApiKey -> hard_stop" { const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NoApiKey); try std.testing.expectEqual(FetchErrorAction.hard_stop, action); } test "reportFetchError: AuthError -> hard_stop" { const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.AuthError); try std.testing.expectEqual(FetchErrorAction.hard_stop, action); } test "reportFetchError: RateLimited -> hard_stop" { const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.RateLimited); try std.testing.expectEqual(FetchErrorAction.hard_stop, action); } test "reportFetchError: NotFound -> soft_skip" { const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NotFound); try std.testing.expectEqual(FetchErrorAction.soft_skip, action); } test "reportFetchError: TransientError -> soft_skip" { const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.TransientError); try std.testing.expectEqual(FetchErrorAction.soft_skip, action); } test "reportFetchError: unknown error variant -> soft_skip (catch-all)" { // Any error not matched by the explicit prongs (e.g. a // generic FetchFailed) falls through the `else` branch and // soft-skips. This is the safer default — better to keep // the batch going on a per-symbol failure than to abort // everything on an unexpected error class. const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.FetchFailed); try std.testing.expectEqual(FetchErrorAction.soft_skip, action); } test "reportFetchError: long symbol still classifies correctly (bufPrint fallback)" { // The internal msg_buf is 256 bytes; symbols approaching // that size hit the bufPrint-failed fallback path. Verify // the action still classifies correctly even if the message // truncates. const long_sym = "X" ** 200; const action = reportFetchError(std.testing.io, long_sym, zfin.DataError.NotFound); try std.testing.expectEqual(FetchErrorAction.soft_skip, action); } // ── formatProvenanceMessage ──────────────────────────────────── test "formatProvenanceMessage: wikidata -> 'classified via Wikidata' line" { var buf: [256]u8 = undefined; const msg = formatProvenanceMessage(&buf, "AAPL", .wikidata, null) orelse return error.Format; try std.testing.expect(std.mem.indexOf(u8, msg, "AAPL") != null); try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata") != null); try std.testing.expect(std.mem.endsWith(u8, msg, "\n")); } test "formatProvenanceMessage: edgar_fallback -> 'classified via EDGAR fallback' line" { var buf: [256]u8 = undefined; const msg = formatProvenanceMessage(&buf, "SOXX", .edgar_fallback, null) orelse return error.Format; try std.testing.expect(std.mem.indexOf(u8, msg, "SOXX") != null); try std.testing.expect(std.mem.indexOf(u8, msg, "EDGAR fallback") != null); } test "formatProvenanceMessage: none with no error -> 'no Wikidata or EDGAR entry'" { var buf: [256]u8 = undefined; const msg = formatProvenanceMessage(&buf, "MISSING", .none, null) orelse return error.Format; try std.testing.expect(std.mem.indexOf(u8, msg, "MISSING") != null); try std.testing.expect(std.mem.indexOf(u8, msg, "no Wikidata or EDGAR entry") != null); try std.testing.expect(std.mem.indexOf(u8, msg, "fill in by hand") != null); } test "formatProvenanceMessage: none with error -> includes error name" { // When Wikidata errored AND EDGAR had no entry, the message // includes the upstream error name so the user can act on // it (e.g. RateLimited → wait and rerun). var buf: [256]u8 = undefined; const msg = formatProvenanceMessage(&buf, "FOO", .none, error.RateLimited) orelse return error.Format; try std.testing.expect(std.mem.indexOf(u8, msg, "FOO") != null); try std.testing.expect(std.mem.indexOf(u8, msg, "RateLimited") != null); try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata errored") != null); } test "formatProvenanceMessage: small buffer returns null (safety valve)" { // 16-byte buffer can't hold any of the message variants. // Should return null rather than crash; caller treats null // as "skip the breadcrumb" rather than panicking. var buf: [16]u8 = undefined; try std.testing.expect(formatProvenanceMessage(&buf, "AAPL", .edgar_fallback, null) == null); } test "formatProvenanceMessage: messages have leading two-space indent" { // Match the rest of enrich's stderr output (progress // messages, fetch breadcrumbs all use " " prefix). var buf: [256]u8 = undefined; const msg = formatProvenanceMessage(&buf, "X", .edgar_fallback, null) orelse return error.Format; try std.testing.expect(std.mem.startsWith(u8, msg, " ")); } // ── classifyForCounter ──────────────────────────────────────── test "classifyForCounter: wikidata -> wikidata_hit regardless of error arg" { try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false)); try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true)); } test "classifyForCounter: edgar_fallback -> edgar_fallback regardless of wikidata error" { // EDGAR rescued the symbol; the file has a usable line; it // counts as edgar_fallback whether or not Wikidata errored // upstream. try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true)); try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false)); } test "classifyForCounter: none + wikidata errored -> failed (no data anywhere)" { // True failure: Wikidata HTTP errored, EDGAR has no row. // Nothing usable in the file for this symbol; user must // rerun or fill in by hand. try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true)); } test "classifyForCounter: none + wikidata succeeded but empty -> manual_todo" { // Wikidata returned empty/useless data, EDGAR has no row. // The symbol exists in metadata.srf as a TODO stub; user // fills in by hand. Different from `failed` because there's // nothing to retry — Wikidata simply has no entry. try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false)); } test "classifyForCounter: covers all (FallbackKind, bool) input combinations" { // Exhaustive combinator test — locks in the truth table so // any future change to the policy has to update this test. try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false)); try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true)); try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false)); try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true)); try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false)); try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true)); } // ── hasDominantEquitySector ────────────────────────────────── test "hasDominantEquitySector: single 99% Equity / Corporate -> true" { const sectors = [_]FundSector{ .{ .description = "Equity / Corporate", .pct = 99.7 }, .{ .description = "Short-Term Investment Vehicle / Registered Fund", .pct = 0.19 }, }; try std.testing.expect(hasDominantEquitySector(sectors[0..])); } test "hasDominantEquitySector: 95% threshold is inclusive" { const sectors = [_]FundSector{ .{ .description = "Equity / Corporate", .pct = 95.0 }, }; try std.testing.expect(hasDominantEquitySector(sectors[0..])); } test "hasDominantEquitySector: 94.99% does NOT trigger" { const sectors = [_]FundSector{ .{ .description = "Equity / Corporate", .pct = 94.99 }, }; try std.testing.expect(!hasDominantEquitySector(sectors[0..])); } test "hasDominantEquitySector: multi-asset fund (FAGIX-shape) -> false" { // FAGIX has 22% Equity / Corporate plus debt and other // sleeves. 22% is way under the 95% threshold. const sectors = [_]FundSector{ .{ .description = "Debt / Corporate", .pct = 47.69 }, .{ .description = "Equity / Corporate", .pct = 22.49 }, .{ .description = "Loan / Corporate", .pct = 9.99 }, }; try std.testing.expect(!hasDominantEquitySector(sectors[0..])); } test "hasDominantEquitySector: pure-debt fund -> false" { // VBTLX-shape: all Debt / *. No Equity / Corporate row at all. const sectors = [_]FundSector{ .{ .description = "Debt / Corporate", .pct = 50.0 }, .{ .description = "Debt / US Treasury", .pct = 30.0 }, }; try std.testing.expect(!hasDominantEquitySector(sectors[0..])); } test "hasDominantEquitySector: null and empty -> false" { try std.testing.expect(!hasDominantEquitySector(null)); const empty = [_]FundSector{}; try std.testing.expect(!hasDominantEquitySector(empty[0..])); } test "emitFundLines: null sectors -> single TODO line" { var out_buf: [256]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); try emitFundLines("VTI", "ETF", null, null, null, &out); try std.testing.expectEqualStrings( "symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n", out.buffered(), ); } test "emitFundLines: populated sectors -> one line per sector with pct" { var out_buf: [512]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); const sectors = [_]FundSector{ .{ .description = "Debt / Corporate", .pct = 47.69 }, .{ .description = "Equity / Corporate", .pct = 22.49 }, }; try emitFundLines("FAGIX", "Fund", sectors[0..], null, null, &out); const written = out.buffered(); try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69") != null); try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Equity / Corporate,geo::US,asset_class::Fund,pct:num:22.49") != null); try std.testing.expect(std.mem.indexOf(u8, written, "TODO") == null); } test "emitFundLines: empty slice -> single TODO line (treats empty as null)" { var out_buf: [256]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); const empty: [0]FundSector = .{}; try emitFundLines("VTI", "ETF", empty[0..], null, null, &out); try std.testing.expectEqualStrings( "symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n", out.buffered(), ); } test "emitFundLines: negative pct values render correctly" { // Real NPORT-P data has negative pcts for short positions // and derivatives. They must round-trip cleanly. var out_buf: [512]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); const sectors = [_]FundSector{ .{ .description = "Repurchase Agreement / Other", .pct = -29.72 }, .{ .description = "Derivative-FX / Other", .pct = -0.84 }, }; try emitFundLines("PTY", "Fund", sectors[0..], null, null, &out); const written = out.buffered(); try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-29.72") != null); try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-0.84") != null); } test "emitFundLines: ETF asset_class flows through" { var out_buf: [512]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); const sectors = [_]FundSector{ .{ .description = "Equity / Corporate", .pct = 99.86 }, }; try emitFundLines("SOXX", "ETF", sectors[0..], null, null, &out); try std.testing.expectEqualStrings( "symbol::SOXX,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.86\n", out.buffered(), ); } test "freeFundSectors: frees slice + each description, no leak" { // Allocate the same shape `loadFundSectors` produces, then // free it via `freeFundSectors`. `std.testing.allocator` // catches any leak. const alloc = std.testing.allocator; var list: std.ArrayList(FundSector) = .empty; errdefer list.deinit(alloc); const desc1 = try alloc.dupe(u8, "Debt / Corporate"); errdefer alloc.free(desc1); try list.append(alloc, .{ .description = desc1, .pct = 47.69 }); const desc2 = try alloc.dupe(u8, "Equity / Corporate"); errdefer alloc.free(desc2); try list.append(alloc, .{ .description = desc2, .pct = 22.49 }); const slice = try list.toOwnedSlice(alloc); freeFundSectors(alloc, slice); // No assertion needed — testing.allocator panics on leak. } test "freeFundSectors: empty slice is a no-op" { const alloc = std.testing.allocator; const slice = try alloc.alloc(FundSector, 0); freeFundSectors(alloc, slice); } test "freeFundEtfData: frees both name and sectors without leak" { // Construct the same shape `loadFundEtfData` produces, then // free via the paired free function. testing.allocator // catches any leak. const alloc = std.testing.allocator; const name = try alloc.dupe(u8, "Vanguard Total Bond Market Index Fund"); var list: std.ArrayList(FundSector) = .empty; errdefer { for (list.items) |s| alloc.free(s.description); list.deinit(alloc); } const desc = try alloc.dupe(u8, "Debt / Corporate"); try list.append(alloc, .{ .description = desc, .pct = 50.0 }); const sectors = try list.toOwnedSlice(alloc); freeFundEtfData(alloc, .{ .series_name = name, .sectors = sectors }); } test "freeFundEtfData: handles null series_name (only sectors freed)" { const alloc = std.testing.allocator; var list: std.ArrayList(FundSector) = .empty; errdefer { for (list.items) |s| alloc.free(s.description); list.deinit(alloc); } const desc = try alloc.dupe(u8, "Equity / Corporate"); try list.append(alloc, .{ .description = desc, .pct = 100.0 }); const sectors = try list.toOwnedSlice(alloc); freeFundEtfData(alloc, .{ .series_name = null, .sectors = sectors }); } test "freeFundEtfData: handles null sectors (only series_name freed)" { const alloc = std.testing.allocator; const name = try alloc.dupe(u8, "Some Fund"); freeFundEtfData(alloc, .{ .series_name = name, .sectors = null }); } test "freeFundEtfData: both null is a no-op" { const alloc = std.testing.allocator; freeFundEtfData(alloc, .{ .series_name = null, .sectors = null }); } // ── sortSymbolsAlphabetically ──────────────────────────────── test "sortSymbolsAlphabetically: shuffled input -> alphabetical output" { var syms = [_][]const u8{ "QQQ", "AAPL", "VTI", "BND", "SPY" }; sortSymbolsAlphabetically(&syms); try std.testing.expectEqualStrings("AAPL", syms[0]); try std.testing.expectEqualStrings("BND", syms[1]); try std.testing.expectEqualStrings("QQQ", syms[2]); try std.testing.expectEqualStrings("SPY", syms[3]); try std.testing.expectEqualStrings("VTI", syms[4]); } test "sortSymbolsAlphabetically: already-sorted input is stable" { var syms = [_][]const u8{ "AAPL", "BND", "VTI" }; sortSymbolsAlphabetically(&syms); try std.testing.expectEqualStrings("AAPL", syms[0]); try std.testing.expectEqualStrings("BND", syms[1]); try std.testing.expectEqualStrings("VTI", syms[2]); } test "sortSymbolsAlphabetically: empty slice is a no-op" { var syms = [_][]const u8{}; sortSymbolsAlphabetically(&syms); try std.testing.expectEqual(@as(usize, 0), syms.len); } test "sortSymbolsAlphabetically: single element is unchanged" { var syms = [_][]const u8{"AAPL"}; sortSymbolsAlphabetically(&syms); try std.testing.expectEqualStrings("AAPL", syms[0]); } test "sortSymbolsAlphabetically: case-sensitive ordering (uppercase < lowercase)" { // Defensive: the symbols should always be uppercased before // they reach this function (portfolio.srf canonicalizes; // single-symbol mode uses framework's `uppercase_first_arg`). // But verify the underlying comparator is byte-lex so we // know what to expect if mixed-case ever leaks in. var syms = [_][]const u8{ "aapl", "AAPL", "BND" }; sortSymbolsAlphabetically(&syms); // Uppercase letters have lower byte values than lowercase. try std.testing.expectEqualStrings("AAPL", syms[0]); try std.testing.expectEqualStrings("BND", syms[1]); try std.testing.expectEqualStrings("aapl", syms[2]); } test "sortSymbolsAlphabetically: numbers and digits sort before letters" { // CUSIPs (9-character alphanumeric) and class shares like // "BRK.B" can occur. Confirm byte-lex ordering puts digit // prefixes before letter prefixes, which matches user // intuition (numbered things group together at the top). var syms = [_][]const u8{ "AAPL", "02315N600", "BRK.B" }; sortSymbolsAlphabetically(&syms); try std.testing.expectEqualStrings("02315N600", syms[0]); try std.testing.expectEqualStrings("AAPL", syms[1]); try std.testing.expectEqualStrings("BRK.B", syms[2]); } test "sortSymbolsAlphabetically: duplicate symbols stay together" { // Defensive: stockSymbols is supposed to dedup, but if // duplicates ever leak in, they should sort adjacent rather // than crash or scramble. var syms = [_][]const u8{ "VTI", "AAPL", "VTI", "BND" }; sortSymbolsAlphabetically(&syms); try std.testing.expectEqualStrings("AAPL", syms[0]); try std.testing.expectEqualStrings("BND", syms[1]); try std.testing.expectEqualStrings("VTI", syms[2]); try std.testing.expectEqualStrings("VTI", syms[3]); }