diff --git a/src/providers/Edgar.zig b/src/providers/Edgar.zig index b344476..4a949b6 100644 --- a/src/providers/Edgar.zig +++ b/src/providers/Edgar.zig @@ -155,6 +155,8 @@ const RateLimiter = @import("../net/RateLimiter.zig"); const fmt = @import("../format.zig"); const xml = @import("xml.zig"); +const log = std.log.scoped(.edgar); + const tickers_funds_url = "https://www.sec.gov/files/company_tickers_mf.json"; const tickers_companies_url = "https://www.sec.gov/files/company_tickers.json"; const search_url_prefix = "https://efts.sec.gov/LATEST/search-index?"; @@ -336,63 +338,94 @@ pub fn fetchEtfMetrics( symbol: []const u8, top_n_holdings: usize, ) !EtfMetricsResult { - // MF/ETF map first - authoritative for symbols filed under a - // series. Series-keyed full-text search; CIK fallback would - // yield arbitrary other series under the same trust. - if (mf_ticker_map.get(symbol)) |entry| { - const filing_url = (try self.findLatestNportP(allocator, entry.series_id.?)) orelse { - return .not_a_fund; - }; - defer allocator.free(filing_url); - const m = try self.fetchAndParseNportP( - io, - allocator, - entry.toGeneric(), - filing_url, - symbol, - top_n_holdings, - ); - return .{ .full = m }; - } + const mf_entry = mf_ticker_map.get(symbol); + const co_entry = stock_ticker_map.get(symbol); - // Stock map: probe the submissions feed (one extra HTTP per - // unique CIK) to classify the entity. Branches: - // - fund_shaped + has NPORT-P -> full holdings (SPY) - // - fund_shaped + no NPORT-P -> profile-only (SLVO ETN issuer) - // - trust_shaped -> profile-only (GLD commodity) - // - operating -> not-a-fund (AAPL, MSFT) - if (stock_ticker_map.get(symbol)) |entry| { - var sub = try self.fetchSubmissionsFeed(allocator, entry.cik); - defer sub.deinit(allocator); - - const class = classifyByEntityType(&sub); - switch (class) { - .operating => return .not_a_fund, - .fund_shaped => { - if (sub.latest_nport_p_url) |url| { - const m = try self.fetchAndParseNportP( - io, - allocator, - entry.toGeneric(), - url, - symbol, - top_n_holdings, - ); - return .{ .full = m }; - } - const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol); - return .{ .profile_only = profile }; - }, - .trust_shaped => { - // Skip the NPORT-P probe - by definition these - // don't file one. Saves an HTTP roundtrip. - const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol); - return .{ .profile_only = profile }; - }, + // Ticker-recycling guard: when a symbol is in BOTH ticker maps, + // classify the company side up front. SEC's + // `company_tickers_mf.json` is slow to drop a ticker after it's + // reassigned, so a stale fund-series mapping can collide with an + // operating company that now owns the ticker (e.g. SPCX: SpaceX's + // 2026 IPO took over the defunct Tuttle "SPAC and New Issue ETF" + // ticker). The probe is best-effort - on failure we leave + // `co_class` null and `resolveMapPrecedence` trusts the MF map, so + // a flaky company-feed fetch can't regress a both-map symbol. + var co_sub: ?SubmissionsSummary = null; + defer if (co_sub) |*s| s.deinit(allocator); + var co_class: ?EntityClass = null; + if (mf_entry != null and co_entry != null) { + if (self.fetchSubmissionsFeed(allocator, co_entry.?.cik)) |sub| { + co_sub = sub; + co_class = classifyByEntityType(&co_sub.?); + } else |err| { + log.warn("{s}: recycled-ticker company probe failed, trusting MF map: {s}", .{ symbol, @errorName(err) }); } } - return .not_in_edgar; + switch (resolveMapPrecedence(mf_entry != null, co_entry != null, co_class)) { + // A ticker now owned by an operating company: do not report the + // stale fund series as a fund. + .recycled_not_a_fund => return .not_a_fund, + + // MF/ETF map - authoritative for symbols filed under a series. + // Series-keyed full-text search; a CIK fallback would yield + // arbitrary other series under the same trust. + .mf_series => { + const entry = mf_entry.?; + const filing_url = (try self.findLatestNportP(allocator, entry.series_id.?)) orelse { + return .not_a_fund; + }; + defer allocator.free(filing_url); + const m = try self.fetchAndParseNportP( + io, + allocator, + entry.toGeneric(), + filing_url, + symbol, + top_n_holdings, + ); + return .{ .full = m }; + }, + + // Stock map only: probe the submissions feed (one extra HTTP + // per unique CIK) to classify the entity. Branches: + // - fund_shaped + has NPORT-P -> full holdings (SPY) + // - fund_shaped + no NPORT-P -> profile-only (SLVO ETN issuer) + // - trust_shaped -> profile-only (GLD commodity) + // - operating -> not-a-fund (AAPL, MSFT) + .probe_company => { + const entry = co_entry.?; + var sub = try self.fetchSubmissionsFeed(allocator, entry.cik); + defer sub.deinit(allocator); + + switch (classifyByEntityType(&sub)) { + .operating => return .not_a_fund, + .fund_shaped => { + if (sub.latest_nport_p_url) |url| { + const m = try self.fetchAndParseNportP( + io, + allocator, + entry.toGeneric(), + url, + symbol, + top_n_holdings, + ); + return .{ .full = m }; + } + const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol); + return .{ .profile_only = profile }; + }, + .trust_shaped => { + // Skip the NPORT-P probe - by definition these + // don't file one. Saves an HTTP roundtrip. + const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol); + return .{ .profile_only = profile }; + }, + } + }, + + .not_in_edgar => return .not_in_edgar, + } } /// Download and parse a NPORT-P primary_doc.xml at `filing_url`. @@ -1221,6 +1254,73 @@ fn parseLatestNportPFromSearch(allocator: std.mem.Allocator, json_bytes: []const ); } +/// Coarse classification of an EDGAR entity from its submissions feed, +/// produced by `classifyByEntityType`. +const EntityClass = enum { fund_shaped, trust_shaped, operating }; + +/// Which path `fetchEtfMetrics` should take for a symbol, given its +/// ticker-map membership. +const MapResolution = enum { + /// Use the MF/ETF series-keyed path (symbol filed under a series). + mf_series, + /// Probe the company submissions feed to classify (symbol present + /// only in the company map). + probe_company, + /// A ticker present in both maps that now belongs to an operating + /// company (recycled ticker): treat as not-a-fund. + recycled_not_a_fund, + /// Symbol is in neither ticker map. + not_in_edgar, +}; + +/// Pure precedence decision for `fetchEtfMetrics`. `in_mf` / `in_co` +/// are whether the symbol appears in the MF/ETF and company ticker +/// maps; `co_class` is the company side's classification, which is +/// only consulted (and only needs to be computed) when the symbol is +/// in BOTH maps. +/// +/// The load-bearing case is a both-maps collision. SEC's +/// `company_tickers_mf.json` lags ticker reassignments, so a stale +/// fund-series mapping can coexist with an operating company that now +/// owns the ticker (e.g. SPCX: SpaceX's 2026 IPO over the defunct +/// Tuttle "SPAC and New Issue ETF"). When the company side is an +/// operating company the ticker belongs to it now, so the stale series +/// must not be reported as a fund. A fund/trust-shaped company side +/// keeps the more precise series path. A null `co_class` (probe failed +/// or not performed) falls back to the MF path - the pre-guard +/// behavior. +fn resolveMapPrecedence(in_mf: bool, in_co: bool, co_class: ?EntityClass) MapResolution { + if (in_mf and in_co) { + if (co_class) |c| return switch (c) { + .operating => .recycled_not_a_fund, + .fund_shaped, .trust_shaped => .mf_series, + }; + return .mf_series; + } + if (in_mf) return .mf_series; + if (in_co) return .probe_company; + return .not_in_edgar; +} + +test "resolveMapPrecedence: ticker-map membership decides the path" { + const T = std.testing; + // Single-map cases ignore co_class. + try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, false, null)); + try T.expectEqual(MapResolution.probe_company, resolveMapPrecedence(false, true, null)); + try T.expectEqual(MapResolution.not_in_edgar, resolveMapPrecedence(false, false, null)); + + // Both maps: an operating company side means the ticker was + // recycled (SPCX/SpaceX) -> not a fund. + try T.expectEqual(MapResolution.recycled_not_a_fund, resolveMapPrecedence(true, true, .operating)); + // Both maps but the company side is itself a fund/trust: the + // series-keyed MF path remains the more precise source. + try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, .fund_shaped)); + try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, .trust_shaped)); + // Both maps but the probe failed (null): trust the MF map, matching + // the pre-guard behavior. + try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, null)); +} + /// Classify a CIK based on its submissions-feed metadata. Decides /// whether the symbol is a registered fund (probe NPORT-P), a /// trust/ETN-style instrument (profile-only), or a plain operating @@ -1258,11 +1358,7 @@ fn parseLatestNportPFromSearch(allocator: std.mem.Allocator, json_bytes: []const /// that distribute rental income, not registered investment /// companies. They get bucketed under `operating` - Wikidata is /// the right source for them. -fn classifyByEntityType(sub: *const SubmissionsSummary) enum { - fund_shaped, - trust_shaped, - operating, -} { +fn classifyByEntityType(sub: *const SubmissionsSummary) EntityClass { // Rule 1: NPORT-P presence is the strongest fund signal. if (sub.latest_nport_p_url != null) return .fund_shaped; @@ -1364,6 +1460,16 @@ test "classifyByEntityType buckets real-world entities" { s.sic_description = try T.allocator.dupe(u8, "Real Estate Investment Trusts"); try T.expectEqual(.operating, classifyByEntityType(&s)); } + // SPCX/SpaceX after its 2026 IPO: operating company, no NPORT-P, + // aerospace SIC. Must be `operating` so the ticker-recycling guard + // can override the stale "SPAC and New Issue ETF" fund-map entry. + { + var s: SubmissionsSummary = .{}; + defer s.deinit(T.allocator); + s.entity_type = try T.allocator.dupe(u8, "operating"); + s.sic_description = try T.allocator.dupe(u8, "Guided Missiles & Space Vehicles & Parts"); + try T.expectEqual(.operating, classifyByEntityType(&s)); + } } /// Result kind for `fetchEtfMetrics`. The caller - see `main.zig` -