zfin/src/commands/enrich.zig

const std = @import("std");
const zfin = @import("../root.zig");
const cli = @import("common.zig");
const framework = @import("framework.zig");
const isCusipLike = @import("../models/portfolio.zig").isCusipLike;
const ClassificationRecord = zfin.classification.ClassificationRecord;

pub const ParsedArgs = struct {
    /// Optional symbol (e.g. "AAPL"). Null = portfolio mode (uses
    /// `-p` resolution to find the user's portfolio file(s)).
    symbol: ?[]const u8,
};

pub const meta: framework.Meta = .{
    .name = "enrich",
    .group = .hygiene,
    .synopsis = "Bootstrap metadata.srf from Wikidata + EDGAR",
    .help =
    \\Usage: zfin enrich [SYMBOL]
    \\
    \\Bootstrap a `metadata.srf` classification file from public
    \\Wikidata + SEC EDGAR data. Two modes:
    \\
    \\  - Portfolio mode (no argument): enrich every stock symbol
    \\    in your portfolio. Honors the global `-p`/`--portfolio`
    \\    flag for selecting which portfolio file(s) to use; with
    \\    no flag, falls back to the standard portfolio resolution
    \\    (portfolio.srf in cwd, or $ZFIN_HOME/portfolio.srf).
    \\    Output is a complete SRF file written to stdout —
    \\    redirect into metadata.srf and edit by hand for accuracy.
    \\  - Symbol mode (single SYMBOL argument): enrich one symbol
    \\    and emit one appendable SRF line. Useful for adding to
    \\    an existing metadata.srf without rerunning the whole file.
    \\
    \\Sources used:
    \\  - Wikidata SPARQL: sector / industry / country / asset class
    \\    + CIK lookup for the EDGAR call below.
    \\  - SEC EDGAR XBRL company facts: shares outstanding, used
    \\    with the latest cached close price to derive market-cap
    \\    size buckets (Large/Mid/Small) for US-domiciled stocks.
    \\  - SEC EDGAR mutual-fund ticker map: fallback when Wikidata
    \\    has no entry. Open-end mutual funds aren't exchange-listed
    \\    and are usually missing from Wikidata; this fills in
    \\    `geo::US,asset_class::Fund` (or `ETF` for company-map
    \\    UIT entries with title hints).
    \\
    \\Always review the output before saving as `metadata.srf`.
    \\Wikidata + EDGAR are free and have no per-day quota; the SEC
    \\caps EDGAR at 10 req/sec which the rate limiter respects.
    \\Requires ZFIN_USER_EMAIL in `.env` (SEC requires a contact in
    \\the User-Agent header).
    \\
    \\Examples:
    \\  zfin enrich > metadata.srf                # whole portfolio (default file)
    \\  zfin -p sample enrich > metadata.srf      # whole portfolio (named file)
    \\  zfin enrich AAPL >> metadata.srf          # single symbol append
    \\  zfin enrich fagix >> metadata.srf         # symbol auto-uppercased
    \\
    ,
    .uppercase_first_arg = true,
    .user_errors = error{UnexpectedArg},
};

pub fn parseArgs(ctx: *framework.RunCtx, cmd_args: []const []const u8) !ParsedArgs {
    if (cmd_args.len > 1) {
        cli.stderrPrint(ctx.io, "Error: 'enrich' takes at most one argument (a symbol). For portfolio-mode, omit the argument and use the global -p flag if needed.\n");
        return error.UnexpectedArg;
    }
    return .{ .symbol = if (cmd_args.len == 1) cmd_args[0] else null };
}

const DerivedMeta = struct {
    /// Best-effort sector text. May be the literal "Unknown" when
    /// Wikidata has no sector statement.
    sector: []const u8,
    /// Geo bucket: one of "US", "International Developed",
    /// "Emerging Markets", or "Unknown".
    geo: []const u8,
    /// Asset class: "ETF", "Mutual Fund", or one of the size-shaped
    /// US-stock buckets ("US Large Cap" / "US Mid Cap" / "US Small
    /// Cap"). For non-US stocks where size data is unavailable, we
    /// fall back to the geo bucket itself.
    asset_class: []const u8,
};

/// Compose a `DerivedMeta` from the per-symbol Wikidata
/// `ClassificationRecord` plus an optional `market_cap` estimate
/// (shares-outstanding × latest close, in dollars). Pure data
/// transform; no I/O.
///
/// `sector_buf` is a caller-provided buffer for the title-cased
/// sector string. Wikidata returns sectors in mixed case; we
/// normalize for display.
fn deriveMetadata(
    classification: ClassificationRecord,
    market_cap: ?f64,
    sector_buf: []u8,
) DerivedMeta {
    const geo_str = zfin.classification.geoFor(classification.country);

    // Sector: title-case Wikidata's sector string when present.
    // For ETFs, override with `TODO` — funds are multi-sector by
    // definition, so the user fills in their own breakdown.
    // When Wikidata returned no sector at all (e.g. SOXX got an
    // entity hit but no industry/country/instance fields), emit
    // `TODO` rather than the literal "Unknown" placeholder so
    // the user knows to fill in by hand rather than thinking
    // "Unknown" is a valid taxonomy value.
    const sector_str = blk: {
        if (classification.is_etf) break :blk "TODO";
        if (classification.sector) |s| {
            break :blk cli.fmt.toTitleCase(sector_buf, s);
        }
        break :blk "TODO";
    };

    const asset_class_str = blk: {
        if (classification.is_etf) break :blk "ETF";
        if (classification.asset_class) |ac| {
            if (std.mem.eql(u8, ac, "Mutual Fund")) break :blk "Mutual Fund";
        }
        // Stock size bucket. Only computed for US-domiciled stocks
        // where we have a market-cap estimate. International stocks
        // fall through to the geo bucket.
        if (std.mem.eql(u8, geo_str, zfin.classification.geo.us)) {
            if (market_cap) |mc| {
                if (mc >= 10_000_000_000) break :blk "US Large Cap";
                if (mc >= 2_000_000_000) break :blk "US Mid Cap";
                break :blk "US Small Cap";
            }
            // Default for US stocks without market-cap data —
            // matches the old AlphaVantage flow's default.
            break :blk "US Large Cap";
        }
        // Non-US fallback: use the geo bucket as the asset class.
        // The user can refine in metadata.srf.
        break :blk geo_str;
    };

    return .{ .sector = sector_str, .geo = geo_str, .asset_class = asset_class_str };
}

/// CLI `enrich` command: bootstrap a metadata.srf file from Wikidata + EDGAR data.
/// Two dispatch paths:
///   - Portfolio mode (no argument): load the user's portfolio
///     via the standard `cli.loadPortfolio` flow (which honors
///     `-p`/`--portfolio` patterns), then enrich every stock
///     symbol.
///   - Symbol mode (one argument): enrich a single symbol. The
///     framework uppercases the argument before we see it, so
///     `enrich fagix` and `enrich FAGIX` produce identical
///     output.
pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
    const svc = ctx.svc orelse return error.MissingDataService;
    if (parsed.symbol) |sym| {
        try enrichSymbol(ctx.io, ctx.allocator, svc, sym, ctx.out);
        return;
    }
    try enrichPortfolio(ctx, svc);
}

/// Whether the caller should continue with the next symbol or
/// abort the whole batch after a fetch failure. Hard-stop errors
/// (no API key, auth rejected, rate-limited) will recur on every
/// subsequent symbol; soft-skip errors are per-symbol and other
/// symbols may still succeed.
const FetchErrorAction = enum { hard_stop, soft_skip };

/// Print a user-facing stderr message describing why the fetch
/// for `sym` failed, and tell the caller whether to continue
/// (`soft_skip`) or stop the whole batch (`hard_stop`).
///
/// This is the single dispatch point for translating a
/// `DataError` into actionable user output. Per AGENTS.md "Errors
/// carry information": the message names the specific error
/// variant — never just "fetch failed" — so the user can act on
/// it without reading source code.
fn reportFetchError(io: std.Io, sym: []const u8, err: anyerror) FetchErrorAction {
    var msg_buf: [256]u8 = undefined;
    switch (err) {
        zfin.DataError.NoApiKey => {
            cli.stderrPrint(io, "Error: ZFIN_USER_EMAIL not set. Add it to .env (SEC EDGAR requires a contact email in the User-Agent header).\n");
            return .hard_stop;
        },
        zfin.DataError.AuthError => {
            cli.stderrPrint(io, "Error: SEC EDGAR rejected the request. Check ZFIN_USER_EMAIL in .env\n");
            return .hard_stop;
        },
        zfin.DataError.RateLimited => {
            const msg = std.fmt.bufPrint(
                &msg_buf,
                "Error: rate-limited on {s}. Wikidata/EDGAR have generous limits; check for upstream throttling.\n",
                .{sym},
            ) catch "Error: rate-limited. Try again later.\n";
            cli.stderrPrint(io, msg);
            return .hard_stop;
        },
        zfin.DataError.NotFound => {
            const msg = std.fmt.bufPrint(
                &msg_buf,
                "  {s}: not in Wikidata; mark sector/geo/asset_class manually\n",
                .{sym},
            ) catch "  not in Wikidata; mark manually\n";
            cli.stderrPrint(io, msg);
            return .soft_skip;
        },
        zfin.DataError.TransientError => {
            const msg = std.fmt.bufPrint(
                &msg_buf,
                "  {s}: transient upstream failure; will need re-run\n",
                .{sym},
            ) catch "  transient upstream failure; will need re-run\n";
            cli.stderrPrint(io, msg);
            return .soft_skip;
        },
        else => {
            const msg = std.fmt.bufPrint(
                &msg_buf,
                "  {s}: fetch failed ({t})\n",
                .{ sym, err },
            ) catch "  fetch failed\n";
            cli.stderrPrint(io, msg);
            return .soft_skip;
        },
    }
}

/// Best-effort market-cap estimate for a stock symbol: pull
/// shares-outstanding from EDGAR (XBRL company facts, keyed by
/// CIK) and multiply by the latest cached close price. Returns
/// null on any failure; the caller falls back to a default size
/// bucket. No network beyond what the underlying service methods
/// already do; failures are logged but never propagated.
fn estimateMarketCap(
    svc: *zfin.DataService,
    sym: []const u8,
    cik: ?[]const u8,
    opts: zfin.FetchOptions,
) ?f64 {
    const cik_str = cik orelse return null;
    const facts = svc.getEntityFacts(cik_str, opts) catch return null;
    defer facts.deinit();

    var shares: ?u64 = null;
    for (facts.data) |fact| switch (fact) {
        .shares_outstanding => |so| shares = so.shares_outstanding,
    };
    const shares_u = shares orelse return null;

    const candles = svc.getCandles(sym, opts) catch return null;
    defer candles.deinit();
    if (candles.data.len == 0) return null;
    const close = candles.data[candles.data.len - 1].close;

    return @as(f64, @floatFromInt(shares_u)) * close;
}

/// Which summary counter a portfolio-mode symbol bumps after
/// the EDGAR fallback resolves. `wikidata_errored` is true when
/// `getClassification` errored upstream; false when it returned
/// an empty/useless result. The two paths differ only in what
/// `.none` means: a `.none` after an upstream error is a true
/// failure (no data anywhere); a `.none` after a successful
/// fetch with empty/sparse data is a manual-fill-in case (the
/// symbol exists, just needs human attention).
const SummaryCounter = enum { wikidata_hit, edgar_fallback, failed, manual_todo };

fn classifyForCounter(kind: FallbackKind, wikidata_errored: bool) SummaryCounter {
    return switch (kind) {
        .wikidata => .wikidata_hit,
        .edgar_fallback => .edgar_fallback,
        .none => if (wikidata_errored) .failed else .manual_todo,
    };
}

/// Format the per-symbol provenance breadcrumb message into
/// `buf` and return the slice. Returns null only if the buffer
/// is somehow too small for any of the variants (256 bytes is
/// sufficient for all real symbols and short error names; null
/// is a "should never happen" safety valve, not a normal path).
fn formatProvenanceMessage(buf: []u8, sym: []const u8, kind: FallbackKind, err: ?anyerror) ?[]const u8 {
    return switch (kind) {
        .wikidata => std.fmt.bufPrint(buf, "  {s}: classified via Wikidata\n", .{sym}),
        .edgar_fallback => std.fmt.bufPrint(buf, "  {s}: classified via EDGAR fallback (Wikidata sparse or empty)\n", .{sym}),
        .none => if (err) |e|
            std.fmt.bufPrint(buf, "  {s}: no classification (Wikidata errored {t}, EDGAR had no entry); fill in by hand\n", .{ sym, e })
        else
            std.fmt.bufPrint(buf, "  {s}: no Wikidata or EDGAR entry; fill in by hand\n", .{sym}),
    } catch null;
}

/// Print a one-line stderr breadcrumb describing how a symbol
/// was classified. Used in single-symbol mode (`zfin enrich AAPL`)
/// where there's no end-of-run summary line; the user otherwise
/// has no way to tell whether the SRF row came from Wikidata,
/// the EDGAR fallback, or is a TODO stub. Silent in portfolio
/// mode (which has its own summary line at the bottom).
fn stderrSymbolProvenance(io: std.Io, sym: []const u8, kind: FallbackKind, err: ?anyerror) void {
    var buf: [256]u8 = undefined;
    if (formatProvenanceMessage(&buf, sym, kind, err)) |msg| {
        cli.stderrPrint(io, msg);
    }
}

/// Enrich a single symbol and output appendable SRF lines to stdout.
fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService, sym: []const u8, out: *std.Io.Writer) !void {
    // Symbol is already uppercase: the framework's
    // `uppercase_first_arg = true` normalizes the CLI arg before
    // it reaches `parseArgs`. Portfolio-mode callers also pass
    // canonicalized symbols (from the parsed portfolio file).
    {
        var msg_buf: [128]u8 = undefined;
        const msg = std.fmt.bufPrint(&msg_buf, "  Fetching {s}...\n", .{sym}) catch "  ...\n";
        cli.stderrPrint(io, msg);
    }

    const opts: zfin.FetchOptions = .{};

    // `getClassification` runs the full Wikidata -> EDGAR fallback
    // chain inside the service. The returned record always carries
    // useful data (is_etf, asset_class, country, geo, source, ...);
    // sparse-Wikidata symbols get merged with EDGAR ticker-map +
    // NPORT-P data before this returns.
    const result = svc.getClassification(sym, opts) catch |err| {
        switch (err) {
            zfin.DataError.NotFound => {
                // Neither Wikidata nor EDGAR knows this symbol.
                try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym});
                try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
                stderrSymbolProvenance(io, sym, .none, null);
                return;
            },
            else => {
                const action = reportFetchError(io, sym, err);
                switch (action) {
                    .hard_stop => return,
                    .soft_skip => {
                        try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
                        try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
                        stderrSymbolProvenance(io, sym, .none, err);
                        return;
                    },
                }
            },
        }
    };
    defer result.deinit();
    const c = result.data[0];

    if (c.is_etf) {
        try emitEtfRows(svc, allocator, sym, c, opts, out);
    } else {
        const market_cap = estimateMarketCap(svc, sym, c.cik, opts);
        var sector_buf: [64]u8 = undefined;
        const derived = deriveMetadata(c, market_cap, &sector_buf);
        if (c.name) |name| {
            try out.print("# {s}\n", .{name});
        }
        try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{
            sym, derived.sector, derived.geo, derived.asset_class,
        });
    }

    stderrSymbolProvenance(io, sym, kindFromSource(c.source), null);
}

/// Translate the classification record's `source` provenance
/// into the `FallbackKind` enum used by the existing
/// progress/summary plumbing.
fn kindFromSource(source: []const u8) FallbackKind {
    if (std.mem.eql(u8, source, "wikidata")) return .wikidata;
    if (std.mem.eql(u8, source, "edgar_fallback")) return .edgar_fallback;
    return .none;
}

/// Emit multi-row sleeve breakdown for an ETF/fund. Sleeves come
/// from `getEtfMetrics` (NPORT-P sector decomposition); the
/// classification record supplies the asset_class, geo, and
/// (if title-keyword inference fired) the dominant sector to
/// override "Equity / Corporate" with.
fn emitEtfRows(
    svc: *zfin.DataService,
    allocator: std.mem.Allocator,
    sym: []const u8,
    c: ClassificationRecord,
    opts: zfin.FetchOptions,
    out: *std.Io.Writer,
) !void {
    const fund_data = loadFundEtfData(svc, allocator, sym, opts);
    defer if (fund_data) |d| freeFundEtfData(allocator, d);
    const sectors: ?[]const FundSector = if (fund_data) |d| d.sectors else null;

    const asset_class = c.asset_class orelse "Fund";
    const geo = c.geo orelse "US";

    const from_edgar = std.mem.eql(u8, c.source, "edgar_fallback");
    if (c.name) |name| {
        if (from_edgar) {
            try out.print("# {s} -- {s} (Wikidata had no entry)\n", .{ sym, name });
        } else {
            try out.print("# {s} -- {s}\n", .{ sym, name });
        }
    } else if (from_edgar) {
        try out.print("# {s} -- (Wikidata had no entry)\n", .{sym});
    } else {
        try out.print("# {s}\n", .{sym});
    }
    try emitFundLines(sym, asset_class, sectors, c.sector, geo, out);
}

/// Wikidata didn't return a classification for `sym` (either the
/// fetch errored out softly, or returned an empty result set).
/// Emit a metadata line based on the EDGAR-fallback `lookup`:
///
///   - `.managed_fund` → `geo::US,asset_class::Fund` (the
///     `tickers_funds.srf` file mixes mutual funds and
///     series-of-trust ETFs — generic "Fund" label since we
///     can't tell).
///   - `.company_or_uit` with title-hint → `geo::US,
///     asset_class::ETF` for trust/ETF-shaped titles, else
///     `Fund`.
///   - `.none` → all-TODO commented stub.
///
/// `sector::TODO` is always emitted on fund hits — funds are
/// multi-sector by definition; the user fills in their preferred
/// breakdown.
///
/// `err` is non-null when Wikidata's fetch errored (vs returning
/// empty); included in the comment so the user can see why the
/// auto-fill didn't work.
/// One sector slice of a fund's NPORT-P breakdown. The
/// `description` is NPORT-P's human-readable category (e.g.
/// "Equity / Corporate", "Debt / US Treasury") rather than a
/// GICS sector. For pure-equity funds NPORT-P collapses to
/// "100% Equity / Corporate"; for multi-asset funds (FAGIX-shape)
/// the breakdown is meaningfully diverse. The user can refine
/// to GICS by hand if they want to track stock-fund
/// decomposition.
pub const FundSector = struct {
    description: []const u8, // borrowed; caller keeps source alive
    pct: f64,
};

/// Determine whether a fund's NPORT-P breakdown is dominated
/// by a single Equity / Corporate sector — the precondition
/// for sector inference firing. A "dominant" sector is one
/// that's >95% of the holdings; multi-asset funds (FAGIX-shape:
/// 48% Debt + 22% Equity + ...) don't meet this guard and
/// keep their NPORT-P decomposition.
fn hasDominantEquitySector(fund_sectors: ?[]const FundSector) bool {
    const sectors = fund_sectors orelse return false;
    for (sectors) |s| {
        if (std.mem.eql(u8, s.description, "Equity / Corporate") and s.pct >= 95.0) {
            return true;
        }
    }
    return false;
}

/// Emit the body lines for a fund-classified symbol. When
/// `fund_sectors` is non-null and non-empty, emits one
/// `pct:num:N` line per sector; otherwise emits a single
/// `sector::TODO` line. The asset_class comes from the caller
/// (already determined: "Fund" or "ETF").
///
/// `inferred_sector` (when non-null AND a single dominant
/// `Equity / Corporate` sleeve exists) replaces that sleeve's
/// row with a GICS-tagged row, preserving the original pct.
/// Other rows (Cash sleeves, dust derivatives) stay as-is.
/// When inference doesn't apply (no dominant sleeve, no
/// inferred sector, or the breakdown is multi-asset like
/// FAGIX), the NPORT-P decomposition emits unchanged.
///
/// `geo` is applied uniformly to every emitted row. Defaults
/// to `"US"` when null. NPORT-P doesn't tell us the holdings'
/// geo (only the fund's domicile, which is always US for funds
/// in this map), so the caller passes the inferred geo when
/// available.
fn emitFundLines(
    sym: []const u8,
    asset_class: []const u8,
    fund_sectors: ?[]const FundSector,
    inferred_sector: ?[]const u8,
    geo: ?[]const u8,
    out: *std.Io.Writer,
) !void {
    const geo_str = geo orelse "US";
    if (fund_sectors) |sectors| {
        if (sectors.len > 0) {
            const should_override =
                inferred_sector != null and
                hasDominantEquitySector(sectors);
            for (sectors) |s| {
                // When inference fires, replace the dominant
                // Equity / Corporate row with the inferred GICS
                // sector. Other rows stay as the raw NPORT-P
                // category — they're informative as-is (Cash
                // sleeves, derivatives, etc.).
                const sector_str = if (should_override and
                    std.mem.eql(u8, s.description, "Equity / Corporate"))
                    inferred_sector.?
                else
                    s.description;
                try out.print(
                    "symbol::{s},sector::{s},geo::{s},asset_class::{s},pct:num:{d:.2}\n",
                    .{ sym, sector_str, geo_str, asset_class, s.pct },
                );
            }
            return;
        }
    }
    // No sector breakdown at all (NPORT-P fetch failed). Emit
    // one TODO line — but if title-keyword inference returned
    // a sector, use it instead of "TODO".
    const sector_str = inferred_sector orelse "TODO";
    try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, sector_str, geo_str, asset_class });
}

/// What `getEtfMetrics` provides that `enrich` actually uses:
/// the canonical fund name (NPORT-P `<seriesName>`, falling back
/// to the submissions-feed `entity_name`) plus the per-sector
/// portfolio breakdown. Either field may be null if NPORT-P data
/// didn't include it. Both fields are owned by the allocator
/// passed to the loader; free via `freeFundEtfData`.
pub const FundEtfData = struct {
    series_name: ?[]const u8,
    sectors: ?[]FundSector,
};

/// Pull NPORT-P data for `sym` from the EtfMetrics cache (or
/// fetch on miss). Returns null on any error fetching upstream;
/// returns a struct (with possibly-null fields) on success. The
/// fields are independent — a fund may have a series_name but no
/// sector data, or vice versa, depending on what NPORT-P
/// returned.
fn loadFundEtfData(svc: *zfin.DataService, allocator: std.mem.Allocator, sym: []const u8, opts: zfin.FetchOptions) ?FundEtfData {
    const result = svc.getEtfMetrics(sym, opts) catch return null;
    defer result.deinit();

    var list: std.ArrayList(FundSector) = .empty;
    errdefer {
        for (list.items) |s| allocator.free(s.description);
        list.deinit(allocator);
    }

    var series_name: ?[]const u8 = null;
    errdefer if (series_name) |s| allocator.free(s);

    for (result.data) |rec| switch (rec) {
        .profile => |p| {
            // Take the first profile record's series_name.
            // `parseNportP` already filters "N/A" sentinels and
            // empty strings before populating this field; the
            // submissions-feed fallback (`entity_name`) is also
            // already applied. Whatever lands here is the most
            // authoritative name we have for the fund.
            if (series_name == null) {
                if (p.series_name) |sn| {
                    series_name = allocator.dupe(u8, sn) catch return null;
                }
            }
        },
        .sector => |s| {
            const desc = allocator.dupe(u8, s.description) catch return null;
            list.append(allocator, .{ .description = desc, .pct = s.pct_of_portfolio }) catch {
                allocator.free(desc);
                return null;
            };
        },
        .holding => {},
    };

    const sectors: ?[]FundSector = if (list.items.len == 0) blk: {
        list.deinit(allocator);
        break :blk null;
    } else list.toOwnedSlice(allocator) catch null;

    // If both fields are null there's nothing useful to return;
    // signal "no data" so the caller takes the no-name fallback.
    if (series_name == null and sectors == null) return null;

    return .{
        .series_name = series_name,
        .sectors = sectors,
    };
}

/// Free the slice returned by an old caller pattern (each
/// entry's `description` plus the slice itself). Kept around
/// because tests construct slices directly; production callers
/// use `freeFundEtfData`.
fn freeFundSectors(allocator: std.mem.Allocator, sectors: []FundSector) void {
    for (sectors) |s| allocator.free(s.description);
    allocator.free(sectors);
}

/// Free the struct returned by `loadFundEtfData`. Frees the
/// `series_name` string (if non-null) and the `sectors` slice
/// (each entry's `description`, then the slice itself).
fn freeFundEtfData(allocator: std.mem.Allocator, data: FundEtfData) void {
    if (data.series_name) |s| allocator.free(s);
    if (data.sectors) |secs| freeFundSectors(allocator, secs);
}

/// Provenance tag derived from a `ClassificationRecord.source`
/// string. Used for per-symbol summary counters and progress
/// messages.
const FallbackKind = enum { wikidata, edgar_fallback, none };

/// Sort symbol slice alphabetically in place. Used by
/// `enrichPortfolio` to produce stable, diff-friendly output.
/// Pure data transform on a `[][]const u8`; no allocation.
fn sortSymbolsAlphabetically(syms: [][]const u8) void {
    std.mem.sort([]const u8, syms, {}, struct {
        fn lt(_: void, a: []const u8, b: []const u8) bool {
            return std.mem.lessThan(u8, a, b);
        }
    }.lt);
}

/// Enrich all symbols from a portfolio file.
/// Enrich every stock symbol in the resolved portfolio. Goes
/// through `cli.loadPortfolio` so global `-p`/`--portfolio`
/// patterns are honored — same multi-file union-merge as the rest
/// of the CLI.
fn enrichPortfolio(ctx: *framework.RunCtx, svc: *zfin.DataService) !void {
    const io = ctx.io;
    const allocator = ctx.allocator;
    const out = ctx.out;

    var loaded = cli.loadPortfolio(ctx, ctx.today) orelse return;
    defer loaded.deinit(allocator);

    const positions = loaded.positions;
    const syms = loaded.syms;

    // Sort symbols alphabetically for stable, diff-friendly
    // output. Without this, `stockSymbols` returns symbols in
    // `std.StringHashMap` bucket order — unstable across Zig
    // versions and across portfolio edits. Sorting here only
    // affects enrich's output; other consumers of `loaded.syms`
    // (none in this function) see the same slice they would
    // have anyway.
    sortSymbolsAlphabetically(@constCast(syms));

    // EDGAR ticker-map fallback runs lazily inside
    // `svc.lookupEdgarFallback` (cache-first; only hits the
    // network on cold cache or `--refresh-data`). The service
    // handles map lifetimes; the loop here just consumes the
    // digested `EdgarLookup` shape.
    const opts: zfin.FetchOptions = .{};

    try out.print("#!srfv1\n", .{});
    try out.print("# Portfolio classification metadata\n", .{});
    try out.print("# Generated from Wikidata + SEC EDGAR data\n", .{});
    try out.print("# Edit as needed: sector, geo, asset_class, pct:num:N\n", .{});
    try out.print("#\n", .{});
    try out.print("# For ETFs/funds with multi-class exposure, add multiple lines\n", .{});
    try out.print("# with pct:num: values that sum to ~100\n\n", .{});

    var wikidata_hits: usize = 0;
    var edgar_fallback: usize = 0;
    var manual_todo: usize = 0;
    var cusip_skipped: usize = 0;
    var failed: usize = 0;

    for (syms, 0..) |sym, i| {
        // Skip CUSIPs and known non-stock symbols
        if (isCusipLike(sym)) {
            // Find the display name for this CUSIP
            const display: []const u8 = sym;
            var note: ?[]const u8 = null;
            for (positions) |pos| {
                if (std.mem.eql(u8, pos.symbol, sym)) {
                    if (pos.note) |n| {
                        note = n;
                    }
                    break;
                }
            }
            try out.print("# CUSIP {s}", .{sym});
            if (note) |n| try out.print(" ({s})", .{n});
            try out.print(" -- fill in manually\n", .{});
            try out.print("# symbol::{s},asset_class::TODO,geo::TODO\n\n", .{display});
            cusip_skipped += 1;
            continue;
        }

        // Progress to stderr
        {
            var msg_buf: [128]u8 = undefined;
            const msg = std.fmt.bufPrint(&msg_buf, "  [{d}/{d}] {s}...\n", .{ i + 1, syms.len, sym }) catch "  ...\n";
            cli.stderrPrint(io, msg);
        }

        const result = svc.getClassification(sym, opts) catch |err| {
            switch (err) {
                zfin.DataError.NotFound => {
                    // Neither Wikidata nor EDGAR knows this
                    // symbol -- fill in by hand.
                    try out.print("# {s} -- no Wikidata or EDGAR entry\n", .{sym});
                    try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
                    manual_todo += 1;
                    continue;
                },
                else => {
                    const action = reportFetchError(io, sym, err);
                    try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
                    try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
                    failed += 1;
                    switch (action) {
                        .hard_stop => {
                            // Every remaining symbol will hit the
                            // same condition (no API key / auth
                            // fail / rate limit). Stop the batch
                            // with a clear note so the user knows
                            // how many were skipped.
                            var rem_buf: [256]u8 = undefined;
                            const remaining = syms.len - i - 1;
                            const rem_msg = std.fmt.bufPrint(
                                &rem_buf,
                                "Stopping enrichment: {d} symbol(s) not yet fetched. Rerun once the issue is resolved.\n",
                                .{remaining},
                            ) catch "Stopping enrichment.\n";
                            cli.stderrPrint(io, rem_msg);
                            break;
                        },
                        .soft_skip => continue,
                    }
                },
            }
        };
        defer result.deinit();
        const c = result.data[0];

        if (c.is_etf) {
            try emitEtfRows(svc, allocator, sym, c, opts, out);
            try out.print("\n", .{});
        } else {
            const market_cap = estimateMarketCap(svc, sym, c.cik, opts);
            var sector_buf: [64]u8 = undefined;
            const derived = deriveMetadata(c, market_cap, &sector_buf);
            if (c.name) |name| {
                try out.print("# {s}\n", .{name});
            }
            try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{
                sym, derived.sector, derived.geo, derived.asset_class,
            });
        }

        switch (kindFromSource(c.source)) {
            .wikidata => wikidata_hits += 1,
            .edgar_fallback => edgar_fallback += 1,
            .none => manual_todo += 1, // shouldn't happen for a successful return
        }
    }

    // Summary. Every symbol contributes to exactly one bucket;
    // the buckets sum to `syms.len`. `failed` only counts
    // symbols that errored upstream AND had no EDGAR fallback —
    // those are the genuinely-empty rows the user has to fill
    // in by hand or rerun for. Errors that were rescued by
    // EDGAR land in `edgar_fallback` (the file has a usable
    // line for them).
    const filled = wikidata_hits + edgar_fallback;
    try out.print("# ---\n", .{});
    try out.print("# Enriched {d} symbols: {d} fully classified ({d} from Wikidata, {d} from EDGAR fallback), {d} need manual fill-in, {d} CUSIP-skipped, {d} unrecoverable failures\n", .{
        syms.len, filled, wikidata_hits, edgar_fallback, manual_todo, cusip_skipped, failed,
    });
    try out.print("# Review and edit this file, then save as metadata.srf\n", .{});
}

// ── Tests ────────────────────────────────────────────────────

test "parseArgs: accepts a symbol argument" {
    var ctx: framework.RunCtx = undefined;
    ctx.io = std.testing.io;
    const args = [_][]const u8{"AAPL"};
    const parsed = try parseArgs(&ctx, &args);
    try std.testing.expectEqualStrings("AAPL", parsed.symbol orelse return error.MissingSymbol);
}

test "parseArgs: no argument means portfolio mode" {
    var ctx: framework.RunCtx = undefined;
    ctx.io = std.testing.io;
    const args = [_][]const u8{};
    const parsed = try parseArgs(&ctx, &args);
    try std.testing.expect(parsed.symbol == null);
}

test "parseArgs: extra args error" {
    var ctx: framework.RunCtx = undefined;
    ctx.io = std.testing.io;
    const args = [_][]const u8{ "AAPL", "extra" };
    try std.testing.expectError(error.UnexpectedArg, parseArgs(&ctx, &args));
}

test "deriveMetadata: US large cap stock" {
    const c: ClassificationRecord = .{
        .symbol = "AAPL",
        .name = "Apple Inc.",
        .sector = "technology",
        .country = "US",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 3_000_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("Technology", derived.sector);
    try std.testing.expectEqualStrings("US", derived.geo);
    try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
}

test "deriveMetadata: US small cap stock" {
    const c: ClassificationRecord = .{
        .symbol = "TINY",
        .country = "US",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 500_000_000, &sector_buf);
    try std.testing.expectEqualStrings("US Small Cap", derived.asset_class);
}

test "deriveMetadata: US mid cap stock" {
    const c: ClassificationRecord = .{
        .symbol = "MID",
        .country = "US",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 5_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("US Mid Cap", derived.asset_class);
}

test "deriveMetadata: ETF sets asset_class to ETF regardless of size" {
    const c: ClassificationRecord = .{
        .symbol = "VTI",
        .name = "Vanguard Total Stock Market ETF",
        .country = "US",
        .is_etf = true,
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 1_000_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("ETF", derived.asset_class);
}

test "deriveMetadata: international stock falls back to geo bucket" {
    const c: ClassificationRecord = .{
        .symbol = "TM",
        .country = "JP",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 200_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("International Developed", derived.geo);
    try std.testing.expectEqualStrings("International Developed", derived.asset_class);
}

test "deriveMetadata: emerging-market stock geo bucket" {
    const c: ClassificationRecord = .{
        .symbol = "BABA",
        .country = "CN",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 200_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("Emerging Markets", derived.geo);
    try std.testing.expectEqualStrings("Emerging Markets", derived.asset_class);
}

test "deriveMetadata: missing market cap defaults US to Large Cap" {
    const c: ClassificationRecord = .{
        .symbol = "UNK",
        .country = "US",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, null, &sector_buf);
    try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
}

test "deriveMetadata: unknown country -> Unknown geo" {
    const c: ClassificationRecord = .{
        .symbol = "WEIRD",
        .country = null,
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, null, &sector_buf);
    try std.testing.expectEqualStrings("Unknown", derived.geo);
    try std.testing.expectEqualStrings("Unknown", derived.asset_class);
}

test "deriveMetadata: ETF gets sector::TODO regardless of Wikidata sector field" {
    // ETFs are multi-sector by definition. Wikidata sometimes
    // attaches an industry to an ETF entity; we override to
    // TODO so the user fills in their own sector breakdown
    // rather than copying whatever stray industry slipped
    // through.
    const c: ClassificationRecord = .{
        .symbol = "VTI",
        .country = "US",
        .is_etf = true,
        .sector = "stale industry value",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, null, &sector_buf);
    try std.testing.expectEqualStrings("TODO", derived.sector);
    try std.testing.expectEqualStrings("ETF", derived.asset_class);
}

test "deriveMetadata: missing sector -> TODO (not 'Unknown')" {
    // SOXX-style: Wikidata returned an entity but no industry,
    // no country, no instance-of statements. The sector field
    // is null. We emit TODO so the user knows to fill in
    // manually rather than seeing "Unknown" and assuming it's
    // a valid taxonomy bucket.
    const c: ClassificationRecord = .{
        .symbol = "SPARSE",
        .country = "US",
        .sector = null,
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, null, &sector_buf);
    try std.testing.expectEqualStrings("TODO", derived.sector);
}

test "deriveMetadata: stock with sector preserved (canonical sector pass-through)" {
    // Wikidata's parser canonicalizes sectors before they reach
    // deriveMetadata; the function just title-cases them. Verify
    // the canonical strings (already title-cased) round-trip
    // unchanged.
    const c: ClassificationRecord = .{
        .symbol = "MSFT",
        .country = "US",
        .sector = "Technology",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 3_000_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("Technology", derived.sector);
}

test "deriveMetadata: asset_class == 'Mutual Fund' short-circuits before size buckets" {
    // When Wikidata says it's a mutual fund, we trust that and
    // skip size-bucket derivation. Verifies the line-140
    // branch.
    const c: ClassificationRecord = .{
        .symbol = "VFORX",
        .country = "US",
        .sector = "Financial Services",
        .asset_class = "Mutual Fund",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 50_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class);
    // Sector still gets title-cased (not overridden to TODO);
    // mutual funds aren't ETFs.
    try std.testing.expectEqualStrings("Financial Services", derived.sector);
}

test "deriveMetadata: asset_class == 'Mutual Fund' with no market cap" {
    // Mutual funds have null market caps in practice (no
    // shares-outstanding XBRL tag). Confirm we still take the
    // Mutual Fund branch and don't default to Large Cap.
    const c: ClassificationRecord = .{
        .symbol = "FAGIX",
        .country = "US",
        .asset_class = "Mutual Fund",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, null, &sector_buf);
    try std.testing.expectEqualStrings("Mutual Fund", derived.asset_class);
}

test "deriveMetadata: asset_class set but not 'Mutual Fund' -> falls through to size buckets" {
    // Defensive: any non-"Mutual Fund" string in asset_class
    // should NOT short-circuit. Today only "Mutual Fund" is
    // a recognized literal; anything else falls through.
    const c: ClassificationRecord = .{
        .symbol = "AAPL",
        .country = "US",
        .asset_class = "Open-End Fund", // hypothetical other value
        .sector = "Technology",
        .as_of = "2026-05-29",
        .source = "wikidata",
    };
    var sector_buf: [64]u8 = undefined;
    const derived = deriveMetadata(c, 3_000_000_000_000, &sector_buf);
    try std.testing.expectEqualStrings("US Large Cap", derived.asset_class);
}

// ── reportFetchError ────────────────────────────────────────
//
// `reportFetchError` writes a user-facing diagnostic to stderr
// (no-op under `builtin.is_test`) and returns either
// `.hard_stop` (every subsequent symbol will hit the same
// condition; abort the batch) or `.soft_skip` (per-symbol; keep
// going). The tests verify the action classification per error
// variant — the stderr text isn't asserted because stderr is
// suppressed in test mode.

test "reportFetchError: NoApiKey -> hard_stop" {
    const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NoApiKey);
    try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
}

test "reportFetchError: AuthError -> hard_stop" {
    const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.AuthError);
    try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
}

test "reportFetchError: RateLimited -> hard_stop" {
    const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.RateLimited);
    try std.testing.expectEqual(FetchErrorAction.hard_stop, action);
}

test "reportFetchError: NotFound -> soft_skip" {
    const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.NotFound);
    try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}

test "reportFetchError: TransientError -> soft_skip" {
    const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.TransientError);
    try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}

test "reportFetchError: unknown error variant -> soft_skip (catch-all)" {
    // Any error not matched by the explicit prongs (e.g. a
    // generic FetchFailed) falls through the `else` branch and
    // soft-skips. This is the safer default — better to keep
    // the batch going on a per-symbol failure than to abort
    // everything on an unexpected error class.
    const action = reportFetchError(std.testing.io, "AAPL", zfin.DataError.FetchFailed);
    try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}

test "reportFetchError: long symbol still classifies correctly (bufPrint fallback)" {
    // The internal msg_buf is 256 bytes; symbols approaching
    // that size hit the bufPrint-failed fallback path. Verify
    // the action still classifies correctly even if the message
    // truncates.
    const long_sym = "X" ** 200;
    const action = reportFetchError(std.testing.io, long_sym, zfin.DataError.NotFound);
    try std.testing.expectEqual(FetchErrorAction.soft_skip, action);
}

// ── formatProvenanceMessage ────────────────────────────────────

test "formatProvenanceMessage: wikidata -> 'classified via Wikidata' line" {
    var buf: [256]u8 = undefined;
    const msg = formatProvenanceMessage(&buf, "AAPL", .wikidata, null) orelse return error.Format;
    try std.testing.expect(std.mem.indexOf(u8, msg, "AAPL") != null);
    try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata") != null);
    try std.testing.expect(std.mem.endsWith(u8, msg, "\n"));
}

test "formatProvenanceMessage: edgar_fallback -> 'classified via EDGAR fallback' line" {
    var buf: [256]u8 = undefined;
    const msg = formatProvenanceMessage(&buf, "SOXX", .edgar_fallback, null) orelse return error.Format;
    try std.testing.expect(std.mem.indexOf(u8, msg, "SOXX") != null);
    try std.testing.expect(std.mem.indexOf(u8, msg, "EDGAR fallback") != null);
}

test "formatProvenanceMessage: none with no error -> 'no Wikidata or EDGAR entry'" {
    var buf: [256]u8 = undefined;
    const msg = formatProvenanceMessage(&buf, "MISSING", .none, null) orelse return error.Format;
    try std.testing.expect(std.mem.indexOf(u8, msg, "MISSING") != null);
    try std.testing.expect(std.mem.indexOf(u8, msg, "no Wikidata or EDGAR entry") != null);
    try std.testing.expect(std.mem.indexOf(u8, msg, "fill in by hand") != null);
}

test "formatProvenanceMessage: none with error -> includes error name" {
    // When Wikidata errored AND EDGAR had no entry, the message
    // includes the upstream error name so the user can act on
    // it (e.g. RateLimited → wait and rerun).
    var buf: [256]u8 = undefined;
    const msg = formatProvenanceMessage(&buf, "FOO", .none, error.RateLimited) orelse return error.Format;
    try std.testing.expect(std.mem.indexOf(u8, msg, "FOO") != null);
    try std.testing.expect(std.mem.indexOf(u8, msg, "RateLimited") != null);
    try std.testing.expect(std.mem.indexOf(u8, msg, "Wikidata errored") != null);
}

test "formatProvenanceMessage: small buffer returns null (safety valve)" {
    // 16-byte buffer can't hold any of the message variants.
    // Should return null rather than crash; caller treats null
    // as "skip the breadcrumb" rather than panicking.
    var buf: [16]u8 = undefined;
    try std.testing.expect(formatProvenanceMessage(&buf, "AAPL", .edgar_fallback, null) == null);
}

test "formatProvenanceMessage: messages have leading two-space indent" {
    // Match the rest of enrich's stderr output (progress
    // messages, fetch breadcrumbs all use "  " prefix).
    var buf: [256]u8 = undefined;
    const msg = formatProvenanceMessage(&buf, "X", .edgar_fallback, null) orelse return error.Format;
    try std.testing.expect(std.mem.startsWith(u8, msg, "  "));
}

// ── classifyForCounter ────────────────────────────────────────

test "classifyForCounter: wikidata -> wikidata_hit regardless of error arg" {
    try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false));
    try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true));
}

test "classifyForCounter: edgar_fallback -> edgar_fallback regardless of wikidata error" {
    // EDGAR rescued the symbol; the file has a usable line; it
    // counts as edgar_fallback whether or not Wikidata errored
    // upstream.
    try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true));
    try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false));
}

test "classifyForCounter: none + wikidata errored -> failed (no data anywhere)" {
    // True failure: Wikidata HTTP errored, EDGAR has no row.
    // Nothing usable in the file for this symbol; user must
    // rerun or fill in by hand.
    try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true));
}

test "classifyForCounter: none + wikidata succeeded but empty -> manual_todo" {
    // Wikidata returned empty/useless data, EDGAR has no row.
    // The symbol exists in metadata.srf as a TODO stub; user
    // fills in by hand. Different from `failed` because there's
    // nothing to retry — Wikidata simply has no entry.
    try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false));
}

test "classifyForCounter: covers all (FallbackKind, bool) input combinations" {
    // Exhaustive combinator test — locks in the truth table so
    // any future change to the policy has to update this test.
    try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, false));
    try std.testing.expectEqual(SummaryCounter.wikidata_hit, classifyForCounter(.wikidata, true));
    try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, false));
    try std.testing.expectEqual(SummaryCounter.edgar_fallback, classifyForCounter(.edgar_fallback, true));
    try std.testing.expectEqual(SummaryCounter.manual_todo, classifyForCounter(.none, false));
    try std.testing.expectEqual(SummaryCounter.failed, classifyForCounter(.none, true));
}

// ── hasDominantEquitySector ──────────────────────────────────

test "hasDominantEquitySector: single 99% Equity / Corporate -> true" {
    const sectors = [_]FundSector{
        .{ .description = "Equity / Corporate", .pct = 99.7 },
        .{ .description = "Short-Term Investment Vehicle / Registered Fund", .pct = 0.19 },
    };
    try std.testing.expect(hasDominantEquitySector(sectors[0..]));
}

test "hasDominantEquitySector: 95% threshold is inclusive" {
    const sectors = [_]FundSector{
        .{ .description = "Equity / Corporate", .pct = 95.0 },
    };
    try std.testing.expect(hasDominantEquitySector(sectors[0..]));
}

test "hasDominantEquitySector: 94.99% does NOT trigger" {
    const sectors = [_]FundSector{
        .{ .description = "Equity / Corporate", .pct = 94.99 },
    };
    try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
}

test "hasDominantEquitySector: multi-asset fund (FAGIX-shape) -> false" {
    // FAGIX has 22% Equity / Corporate plus debt and other
    // sleeves. 22% is way under the 95% threshold.
    const sectors = [_]FundSector{
        .{ .description = "Debt / Corporate", .pct = 47.69 },
        .{ .description = "Equity / Corporate", .pct = 22.49 },
        .{ .description = "Loan / Corporate", .pct = 9.99 },
    };
    try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
}

test "hasDominantEquitySector: pure-debt fund -> false" {
    // VBTLX-shape: all Debt / *. No Equity / Corporate row at all.
    const sectors = [_]FundSector{
        .{ .description = "Debt / Corporate", .pct = 50.0 },
        .{ .description = "Debt / US Treasury", .pct = 30.0 },
    };
    try std.testing.expect(!hasDominantEquitySector(sectors[0..]));
}

test "hasDominantEquitySector: null and empty -> false" {
    try std.testing.expect(!hasDominantEquitySector(null));
    const empty = [_]FundSector{};
    try std.testing.expect(!hasDominantEquitySector(empty[0..]));
}

test "emitFundLines: null sectors -> single TODO line" {
    var out_buf: [256]u8 = undefined;
    var out: std.Io.Writer = .fixed(&out_buf);
    try emitFundLines("VTI", "ETF", null, null, null, &out);
    try std.testing.expectEqualStrings(
        "symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
        out.buffered(),
    );
}

test "emitFundLines: populated sectors -> one line per sector with pct" {
    var out_buf: [512]u8 = undefined;
    var out: std.Io.Writer = .fixed(&out_buf);
    const sectors = [_]FundSector{
        .{ .description = "Debt / Corporate", .pct = 47.69 },
        .{ .description = "Equity / Corporate", .pct = 22.49 },
    };
    try emitFundLines("FAGIX", "Fund", sectors[0..], null, null, &out);

    const written = out.buffered();
    try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69") != null);
    try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Equity / Corporate,geo::US,asset_class::Fund,pct:num:22.49") != null);
    try std.testing.expect(std.mem.indexOf(u8, written, "TODO") == null);
}

test "emitFundLines: empty slice -> single TODO line (treats empty as null)" {
    var out_buf: [256]u8 = undefined;
    var out: std.Io.Writer = .fixed(&out_buf);
    const empty: [0]FundSector = .{};
    try emitFundLines("VTI", "ETF", empty[0..], null, null, &out);
    try std.testing.expectEqualStrings(
        "symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
        out.buffered(),
    );
}

test "emitFundLines: negative pct values render correctly" {
    // Real NPORT-P data has negative pcts for short positions
    // and derivatives. They must round-trip cleanly.
    var out_buf: [512]u8 = undefined;
    var out: std.Io.Writer = .fixed(&out_buf);
    const sectors = [_]FundSector{
        .{ .description = "Repurchase Agreement / Other", .pct = -29.72 },
        .{ .description = "Derivative-FX / Other", .pct = -0.84 },
    };
    try emitFundLines("PTY", "Fund", sectors[0..], null, null, &out);

    const written = out.buffered();
    try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-29.72") != null);
    try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-0.84") != null);
}

test "emitFundLines: ETF asset_class flows through" {
    var out_buf: [512]u8 = undefined;
    var out: std.Io.Writer = .fixed(&out_buf);
    const sectors = [_]FundSector{
        .{ .description = "Equity / Corporate", .pct = 99.86 },
    };
    try emitFundLines("SOXX", "ETF", sectors[0..], null, null, &out);
    try std.testing.expectEqualStrings(
        "symbol::SOXX,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.86\n",
        out.buffered(),
    );
}

test "freeFundSectors: frees slice + each description, no leak" {
    // Allocate the same shape `loadFundSectors` produces, then
    // free it via `freeFundSectors`. `std.testing.allocator`
    // catches any leak.
    const alloc = std.testing.allocator;
    var list: std.ArrayList(FundSector) = .empty;
    errdefer list.deinit(alloc);

    const desc1 = try alloc.dupe(u8, "Debt / Corporate");
    errdefer alloc.free(desc1);
    try list.append(alloc, .{ .description = desc1, .pct = 47.69 });

    const desc2 = try alloc.dupe(u8, "Equity / Corporate");
    errdefer alloc.free(desc2);
    try list.append(alloc, .{ .description = desc2, .pct = 22.49 });

    const slice = try list.toOwnedSlice(alloc);
    freeFundSectors(alloc, slice);
    // No assertion needed — testing.allocator panics on leak.
}

test "freeFundSectors: empty slice is a no-op" {
    const alloc = std.testing.allocator;
    const slice = try alloc.alloc(FundSector, 0);
    freeFundSectors(alloc, slice);
}

test "freeFundEtfData: frees both name and sectors without leak" {
    // Construct the same shape `loadFundEtfData` produces, then
    // free via the paired free function. testing.allocator
    // catches any leak.
    const alloc = std.testing.allocator;
    const name = try alloc.dupe(u8, "Vanguard Total Bond Market Index Fund");
    var list: std.ArrayList(FundSector) = .empty;
    errdefer {
        for (list.items) |s| alloc.free(s.description);
        list.deinit(alloc);
    }
    const desc = try alloc.dupe(u8, "Debt / Corporate");
    try list.append(alloc, .{ .description = desc, .pct = 50.0 });
    const sectors = try list.toOwnedSlice(alloc);

    freeFundEtfData(alloc, .{ .series_name = name, .sectors = sectors });
}

test "freeFundEtfData: handles null series_name (only sectors freed)" {
    const alloc = std.testing.allocator;
    var list: std.ArrayList(FundSector) = .empty;
    errdefer {
        for (list.items) |s| alloc.free(s.description);
        list.deinit(alloc);
    }
    const desc = try alloc.dupe(u8, "Equity / Corporate");
    try list.append(alloc, .{ .description = desc, .pct = 100.0 });
    const sectors = try list.toOwnedSlice(alloc);

    freeFundEtfData(alloc, .{ .series_name = null, .sectors = sectors });
}

test "freeFundEtfData: handles null sectors (only series_name freed)" {
    const alloc = std.testing.allocator;
    const name = try alloc.dupe(u8, "Some Fund");
    freeFundEtfData(alloc, .{ .series_name = name, .sectors = null });
}

test "freeFundEtfData: both null is a no-op" {
    const alloc = std.testing.allocator;
    freeFundEtfData(alloc, .{ .series_name = null, .sectors = null });
}

// ── sortSymbolsAlphabetically ────────────────────────────────

test "sortSymbolsAlphabetically: shuffled input -> alphabetical output" {
    var syms = [_][]const u8{ "QQQ", "AAPL", "VTI", "BND", "SPY" };
    sortSymbolsAlphabetically(&syms);
    try std.testing.expectEqualStrings("AAPL", syms[0]);
    try std.testing.expectEqualStrings("BND", syms[1]);
    try std.testing.expectEqualStrings("QQQ", syms[2]);
    try std.testing.expectEqualStrings("SPY", syms[3]);
    try std.testing.expectEqualStrings("VTI", syms[4]);
}

test "sortSymbolsAlphabetically: already-sorted input is stable" {
    var syms = [_][]const u8{ "AAPL", "BND", "VTI" };
    sortSymbolsAlphabetically(&syms);
    try std.testing.expectEqualStrings("AAPL", syms[0]);
    try std.testing.expectEqualStrings("BND", syms[1]);
    try std.testing.expectEqualStrings("VTI", syms[2]);
}

test "sortSymbolsAlphabetically: empty slice is a no-op" {
    var syms = [_][]const u8{};
    sortSymbolsAlphabetically(&syms);
    try std.testing.expectEqual(@as(usize, 0), syms.len);
}

test "sortSymbolsAlphabetically: single element is unchanged" {
    var syms = [_][]const u8{"AAPL"};
    sortSymbolsAlphabetically(&syms);
    try std.testing.expectEqualStrings("AAPL", syms[0]);
}

test "sortSymbolsAlphabetically: case-sensitive ordering (uppercase < lowercase)" {
    // Defensive: the symbols should always be uppercased before
    // they reach this function (portfolio.srf canonicalizes;
    // single-symbol mode uses framework's `uppercase_first_arg`).
    // But verify the underlying comparator is byte-lex so we
    // know what to expect if mixed-case ever leaks in.
    var syms = [_][]const u8{ "aapl", "AAPL", "BND" };
    sortSymbolsAlphabetically(&syms);
    // Uppercase letters have lower byte values than lowercase.
    try std.testing.expectEqualStrings("AAPL", syms[0]);
    try std.testing.expectEqualStrings("BND", syms[1]);
    try std.testing.expectEqualStrings("aapl", syms[2]);
}

test "sortSymbolsAlphabetically: numbers and digits sort before letters" {
    // CUSIPs (9-character alphanumeric) and class shares like
    // "BRK.B" can occur. Confirm byte-lex ordering puts digit
    // prefixes before letter prefixes, which matches user
    // intuition (numbered things group together at the top).
    var syms = [_][]const u8{ "AAPL", "02315N600", "BRK.B" };
    sortSymbolsAlphabetically(&syms);
    try std.testing.expectEqualStrings("02315N600", syms[0]);
    try std.testing.expectEqualStrings("AAPL", syms[1]);
    try std.testing.expectEqualStrings("BRK.B", syms[2]);
}

test "sortSymbolsAlphabetically: duplicate symbols stay together" {
    // Defensive: stockSymbols is supposed to dedup, but if
    // duplicates ever leak in, they should sort adjacent rather
    // than crash or scramble.
    var syms = [_][]const u8{ "VTI", "AAPL", "VTI", "BND" };
    sortSymbolsAlphabetically(&syms);
    try std.testing.expectEqualStrings("AAPL", syms[0]);
    try std.testing.expectEqualStrings("BND", syms[1]);
    try std.testing.expectEqualStrings("VTI", syms[2]);
    try std.testing.expectEqualStrings("VTI", syms[3]);
}