add edgar guard for recycled ticker symbols (SPCX IPO reused ticker from an ETF)

This commit is contained in:
Emil Lerch 2026-06-27 09:25:44 -07:00
parent 2bd49af8f3
commit 3abce454dc
Signed by: lobo
GPG key ID: A7B62D657EF764F8

View file

@ -155,6 +155,8 @@ const RateLimiter = @import("../net/RateLimiter.zig");
const fmt = @import("../format.zig");
const xml = @import("xml.zig");
const log = std.log.scoped(.edgar);
const tickers_funds_url = "https://www.sec.gov/files/company_tickers_mf.json";
const tickers_companies_url = "https://www.sec.gov/files/company_tickers.json";
const search_url_prefix = "https://efts.sec.gov/LATEST/search-index?";
@ -336,63 +338,94 @@ pub fn fetchEtfMetrics(
symbol: []const u8,
top_n_holdings: usize,
) !EtfMetricsResult {
// MF/ETF map first - authoritative for symbols filed under a
// series. Series-keyed full-text search; CIK fallback would
// yield arbitrary other series under the same trust.
if (mf_ticker_map.get(symbol)) |entry| {
const filing_url = (try self.findLatestNportP(allocator, entry.series_id.?)) orelse {
return .not_a_fund;
};
defer allocator.free(filing_url);
const m = try self.fetchAndParseNportP(
io,
allocator,
entry.toGeneric(),
filing_url,
symbol,
top_n_holdings,
);
return .{ .full = m };
}
const mf_entry = mf_ticker_map.get(symbol);
const co_entry = stock_ticker_map.get(symbol);
// Stock map: probe the submissions feed (one extra HTTP per
// unique CIK) to classify the entity. Branches:
// - fund_shaped + has NPORT-P -> full holdings (SPY)
// - fund_shaped + no NPORT-P -> profile-only (SLVO ETN issuer)
// - trust_shaped -> profile-only (GLD commodity)
// - operating -> not-a-fund (AAPL, MSFT)
if (stock_ticker_map.get(symbol)) |entry| {
var sub = try self.fetchSubmissionsFeed(allocator, entry.cik);
defer sub.deinit(allocator);
const class = classifyByEntityType(&sub);
switch (class) {
.operating => return .not_a_fund,
.fund_shaped => {
if (sub.latest_nport_p_url) |url| {
const m = try self.fetchAndParseNportP(
io,
allocator,
entry.toGeneric(),
url,
symbol,
top_n_holdings,
);
return .{ .full = m };
}
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
return .{ .profile_only = profile };
},
.trust_shaped => {
// Skip the NPORT-P probe - by definition these
// don't file one. Saves an HTTP roundtrip.
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
return .{ .profile_only = profile };
},
// Ticker-recycling guard: when a symbol is in BOTH ticker maps,
// classify the company side up front. SEC's
// `company_tickers_mf.json` is slow to drop a ticker after it's
// reassigned, so a stale fund-series mapping can collide with an
// operating company that now owns the ticker (e.g. SPCX: SpaceX's
// 2026 IPO took over the defunct Tuttle "SPAC and New Issue ETF"
// ticker). The probe is best-effort - on failure we leave
// `co_class` null and `resolveMapPrecedence` trusts the MF map, so
// a flaky company-feed fetch can't regress a both-map symbol.
var co_sub: ?SubmissionsSummary = null;
defer if (co_sub) |*s| s.deinit(allocator);
var co_class: ?EntityClass = null;
if (mf_entry != null and co_entry != null) {
if (self.fetchSubmissionsFeed(allocator, co_entry.?.cik)) |sub| {
co_sub = sub;
co_class = classifyByEntityType(&co_sub.?);
} else |err| {
log.warn("{s}: recycled-ticker company probe failed, trusting MF map: {s}", .{ symbol, @errorName(err) });
}
}
return .not_in_edgar;
switch (resolveMapPrecedence(mf_entry != null, co_entry != null, co_class)) {
// A ticker now owned by an operating company: do not report the
// stale fund series as a fund.
.recycled_not_a_fund => return .not_a_fund,
// MF/ETF map - authoritative for symbols filed under a series.
// Series-keyed full-text search; a CIK fallback would yield
// arbitrary other series under the same trust.
.mf_series => {
const entry = mf_entry.?;
const filing_url = (try self.findLatestNportP(allocator, entry.series_id.?)) orelse {
return .not_a_fund;
};
defer allocator.free(filing_url);
const m = try self.fetchAndParseNportP(
io,
allocator,
entry.toGeneric(),
filing_url,
symbol,
top_n_holdings,
);
return .{ .full = m };
},
// Stock map only: probe the submissions feed (one extra HTTP
// per unique CIK) to classify the entity. Branches:
// - fund_shaped + has NPORT-P -> full holdings (SPY)
// - fund_shaped + no NPORT-P -> profile-only (SLVO ETN issuer)
// - trust_shaped -> profile-only (GLD commodity)
// - operating -> not-a-fund (AAPL, MSFT)
.probe_company => {
const entry = co_entry.?;
var sub = try self.fetchSubmissionsFeed(allocator, entry.cik);
defer sub.deinit(allocator);
switch (classifyByEntityType(&sub)) {
.operating => return .not_a_fund,
.fund_shaped => {
if (sub.latest_nport_p_url) |url| {
const m = try self.fetchAndParseNportP(
io,
allocator,
entry.toGeneric(),
url,
symbol,
top_n_holdings,
);
return .{ .full = m };
}
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
return .{ .profile_only = profile };
},
.trust_shaped => {
// Skip the NPORT-P probe - by definition these
// don't file one. Saves an HTTP roundtrip.
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
return .{ .profile_only = profile };
},
}
},
.not_in_edgar => return .not_in_edgar,
}
}
/// Download and parse a NPORT-P primary_doc.xml at `filing_url`.
@ -1221,6 +1254,73 @@ fn parseLatestNportPFromSearch(allocator: std.mem.Allocator, json_bytes: []const
);
}
/// Coarse classification of an EDGAR entity from its submissions feed,
/// produced by `classifyByEntityType`.
const EntityClass = enum { fund_shaped, trust_shaped, operating };
/// Which path `fetchEtfMetrics` should take for a symbol, given its
/// ticker-map membership.
const MapResolution = enum {
/// Use the MF/ETF series-keyed path (symbol filed under a series).
mf_series,
/// Probe the company submissions feed to classify (symbol present
/// only in the company map).
probe_company,
/// A ticker present in both maps that now belongs to an operating
/// company (recycled ticker): treat as not-a-fund.
recycled_not_a_fund,
/// Symbol is in neither ticker map.
not_in_edgar,
};
/// Pure precedence decision for `fetchEtfMetrics`. `in_mf` / `in_co`
/// are whether the symbol appears in the MF/ETF and company ticker
/// maps; `co_class` is the company side's classification, which is
/// only consulted (and only needs to be computed) when the symbol is
/// in BOTH maps.
///
/// The load-bearing case is a both-maps collision. SEC's
/// `company_tickers_mf.json` lags ticker reassignments, so a stale
/// fund-series mapping can coexist with an operating company that now
/// owns the ticker (e.g. SPCX: SpaceX's 2026 IPO over the defunct
/// Tuttle "SPAC and New Issue ETF"). When the company side is an
/// operating company the ticker belongs to it now, so the stale series
/// must not be reported as a fund. A fund/trust-shaped company side
/// keeps the more precise series path. A null `co_class` (probe failed
/// or not performed) falls back to the MF path - the pre-guard
/// behavior.
fn resolveMapPrecedence(in_mf: bool, in_co: bool, co_class: ?EntityClass) MapResolution {
if (in_mf and in_co) {
if (co_class) |c| return switch (c) {
.operating => .recycled_not_a_fund,
.fund_shaped, .trust_shaped => .mf_series,
};
return .mf_series;
}
if (in_mf) return .mf_series;
if (in_co) return .probe_company;
return .not_in_edgar;
}
test "resolveMapPrecedence: ticker-map membership decides the path" {
const T = std.testing;
// Single-map cases ignore co_class.
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, false, null));
try T.expectEqual(MapResolution.probe_company, resolveMapPrecedence(false, true, null));
try T.expectEqual(MapResolution.not_in_edgar, resolveMapPrecedence(false, false, null));
// Both maps: an operating company side means the ticker was
// recycled (SPCX/SpaceX) -> not a fund.
try T.expectEqual(MapResolution.recycled_not_a_fund, resolveMapPrecedence(true, true, .operating));
// Both maps but the company side is itself a fund/trust: the
// series-keyed MF path remains the more precise source.
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, .fund_shaped));
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, .trust_shaped));
// Both maps but the probe failed (null): trust the MF map, matching
// the pre-guard behavior.
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, null));
}
/// Classify a CIK based on its submissions-feed metadata. Decides
/// whether the symbol is a registered fund (probe NPORT-P), a
/// trust/ETN-style instrument (profile-only), or a plain operating
@ -1258,11 +1358,7 @@ fn parseLatestNportPFromSearch(allocator: std.mem.Allocator, json_bytes: []const
/// that distribute rental income, not registered investment
/// companies. They get bucketed under `operating` - Wikidata is
/// the right source for them.
fn classifyByEntityType(sub: *const SubmissionsSummary) enum {
fund_shaped,
trust_shaped,
operating,
} {
fn classifyByEntityType(sub: *const SubmissionsSummary) EntityClass {
// Rule 1: NPORT-P presence is the strongest fund signal.
if (sub.latest_nport_p_url != null) return .fund_shaped;
@ -1364,6 +1460,16 @@ test "classifyByEntityType buckets real-world entities" {
s.sic_description = try T.allocator.dupe(u8, "Real Estate Investment Trusts");
try T.expectEqual(.operating, classifyByEntityType(&s));
}
// SPCX/SpaceX after its 2026 IPO: operating company, no NPORT-P,
// aerospace SIC. Must be `operating` so the ticker-recycling guard
// can override the stale "SPAC and New Issue ETF" fund-map entry.
{
var s: SubmissionsSummary = .{};
defer s.deinit(T.allocator);
s.entity_type = try T.allocator.dupe(u8, "operating");
s.sic_description = try T.allocator.dupe(u8, "Guided Missiles & Space Vehicles & Parts");
try T.expectEqual(.operating, classifyByEntityType(&s));
}
}
/// Result kind for `fetchEtfMetrics`. The caller - see `main.zig` -