add edgar guard for recycled ticker symbols (SPCX IPO reused ticker from an ETF)
This commit is contained in:
parent
2bd49af8f3
commit
3abce454dc
1 changed files with 164 additions and 58 deletions
|
|
@ -155,6 +155,8 @@ const RateLimiter = @import("../net/RateLimiter.zig");
|
|||
const fmt = @import("../format.zig");
|
||||
const xml = @import("xml.zig");
|
||||
|
||||
const log = std.log.scoped(.edgar);
|
||||
|
||||
const tickers_funds_url = "https://www.sec.gov/files/company_tickers_mf.json";
|
||||
const tickers_companies_url = "https://www.sec.gov/files/company_tickers.json";
|
||||
const search_url_prefix = "https://efts.sec.gov/LATEST/search-index?";
|
||||
|
|
@ -336,63 +338,94 @@ pub fn fetchEtfMetrics(
|
|||
symbol: []const u8,
|
||||
top_n_holdings: usize,
|
||||
) !EtfMetricsResult {
|
||||
// MF/ETF map first - authoritative for symbols filed under a
|
||||
// series. Series-keyed full-text search; CIK fallback would
|
||||
// yield arbitrary other series under the same trust.
|
||||
if (mf_ticker_map.get(symbol)) |entry| {
|
||||
const filing_url = (try self.findLatestNportP(allocator, entry.series_id.?)) orelse {
|
||||
return .not_a_fund;
|
||||
};
|
||||
defer allocator.free(filing_url);
|
||||
const m = try self.fetchAndParseNportP(
|
||||
io,
|
||||
allocator,
|
||||
entry.toGeneric(),
|
||||
filing_url,
|
||||
symbol,
|
||||
top_n_holdings,
|
||||
);
|
||||
return .{ .full = m };
|
||||
}
|
||||
const mf_entry = mf_ticker_map.get(symbol);
|
||||
const co_entry = stock_ticker_map.get(symbol);
|
||||
|
||||
// Stock map: probe the submissions feed (one extra HTTP per
|
||||
// unique CIK) to classify the entity. Branches:
|
||||
// - fund_shaped + has NPORT-P -> full holdings (SPY)
|
||||
// - fund_shaped + no NPORT-P -> profile-only (SLVO ETN issuer)
|
||||
// - trust_shaped -> profile-only (GLD commodity)
|
||||
// - operating -> not-a-fund (AAPL, MSFT)
|
||||
if (stock_ticker_map.get(symbol)) |entry| {
|
||||
var sub = try self.fetchSubmissionsFeed(allocator, entry.cik);
|
||||
defer sub.deinit(allocator);
|
||||
|
||||
const class = classifyByEntityType(&sub);
|
||||
switch (class) {
|
||||
.operating => return .not_a_fund,
|
||||
.fund_shaped => {
|
||||
if (sub.latest_nport_p_url) |url| {
|
||||
const m = try self.fetchAndParseNportP(
|
||||
io,
|
||||
allocator,
|
||||
entry.toGeneric(),
|
||||
url,
|
||||
symbol,
|
||||
top_n_holdings,
|
||||
);
|
||||
return .{ .full = m };
|
||||
}
|
||||
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
|
||||
return .{ .profile_only = profile };
|
||||
},
|
||||
.trust_shaped => {
|
||||
// Skip the NPORT-P probe - by definition these
|
||||
// don't file one. Saves an HTTP roundtrip.
|
||||
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
|
||||
return .{ .profile_only = profile };
|
||||
},
|
||||
// Ticker-recycling guard: when a symbol is in BOTH ticker maps,
|
||||
// classify the company side up front. SEC's
|
||||
// `company_tickers_mf.json` is slow to drop a ticker after it's
|
||||
// reassigned, so a stale fund-series mapping can collide with an
|
||||
// operating company that now owns the ticker (e.g. SPCX: SpaceX's
|
||||
// 2026 IPO took over the defunct Tuttle "SPAC and New Issue ETF"
|
||||
// ticker). The probe is best-effort - on failure we leave
|
||||
// `co_class` null and `resolveMapPrecedence` trusts the MF map, so
|
||||
// a flaky company-feed fetch can't regress a both-map symbol.
|
||||
var co_sub: ?SubmissionsSummary = null;
|
||||
defer if (co_sub) |*s| s.deinit(allocator);
|
||||
var co_class: ?EntityClass = null;
|
||||
if (mf_entry != null and co_entry != null) {
|
||||
if (self.fetchSubmissionsFeed(allocator, co_entry.?.cik)) |sub| {
|
||||
co_sub = sub;
|
||||
co_class = classifyByEntityType(&co_sub.?);
|
||||
} else |err| {
|
||||
log.warn("{s}: recycled-ticker company probe failed, trusting MF map: {s}", .{ symbol, @errorName(err) });
|
||||
}
|
||||
}
|
||||
|
||||
return .not_in_edgar;
|
||||
switch (resolveMapPrecedence(mf_entry != null, co_entry != null, co_class)) {
|
||||
// A ticker now owned by an operating company: do not report the
|
||||
// stale fund series as a fund.
|
||||
.recycled_not_a_fund => return .not_a_fund,
|
||||
|
||||
// MF/ETF map - authoritative for symbols filed under a series.
|
||||
// Series-keyed full-text search; a CIK fallback would yield
|
||||
// arbitrary other series under the same trust.
|
||||
.mf_series => {
|
||||
const entry = mf_entry.?;
|
||||
const filing_url = (try self.findLatestNportP(allocator, entry.series_id.?)) orelse {
|
||||
return .not_a_fund;
|
||||
};
|
||||
defer allocator.free(filing_url);
|
||||
const m = try self.fetchAndParseNportP(
|
||||
io,
|
||||
allocator,
|
||||
entry.toGeneric(),
|
||||
filing_url,
|
||||
symbol,
|
||||
top_n_holdings,
|
||||
);
|
||||
return .{ .full = m };
|
||||
},
|
||||
|
||||
// Stock map only: probe the submissions feed (one extra HTTP
|
||||
// per unique CIK) to classify the entity. Branches:
|
||||
// - fund_shaped + has NPORT-P -> full holdings (SPY)
|
||||
// - fund_shaped + no NPORT-P -> profile-only (SLVO ETN issuer)
|
||||
// - trust_shaped -> profile-only (GLD commodity)
|
||||
// - operating -> not-a-fund (AAPL, MSFT)
|
||||
.probe_company => {
|
||||
const entry = co_entry.?;
|
||||
var sub = try self.fetchSubmissionsFeed(allocator, entry.cik);
|
||||
defer sub.deinit(allocator);
|
||||
|
||||
switch (classifyByEntityType(&sub)) {
|
||||
.operating => return .not_a_fund,
|
||||
.fund_shaped => {
|
||||
if (sub.latest_nport_p_url) |url| {
|
||||
const m = try self.fetchAndParseNportP(
|
||||
io,
|
||||
allocator,
|
||||
entry.toGeneric(),
|
||||
url,
|
||||
symbol,
|
||||
top_n_holdings,
|
||||
);
|
||||
return .{ .full = m };
|
||||
}
|
||||
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
|
||||
return .{ .profile_only = profile };
|
||||
},
|
||||
.trust_shaped => {
|
||||
// Skip the NPORT-P probe - by definition these
|
||||
// don't file one. Saves an HTTP roundtrip.
|
||||
const profile = try buildProfileOnlyMetrics(io, allocator, entry.toGeneric(), &sub, symbol);
|
||||
return .{ .profile_only = profile };
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
.not_in_edgar => return .not_in_edgar,
|
||||
}
|
||||
}
|
||||
|
||||
/// Download and parse a NPORT-P primary_doc.xml at `filing_url`.
|
||||
|
|
@ -1221,6 +1254,73 @@ fn parseLatestNportPFromSearch(allocator: std.mem.Allocator, json_bytes: []const
|
|||
);
|
||||
}
|
||||
|
||||
/// Coarse classification of an EDGAR entity from its submissions feed,
|
||||
/// produced by `classifyByEntityType`.
|
||||
const EntityClass = enum { fund_shaped, trust_shaped, operating };
|
||||
|
||||
/// Which path `fetchEtfMetrics` should take for a symbol, given its
|
||||
/// ticker-map membership.
|
||||
const MapResolution = enum {
|
||||
/// Use the MF/ETF series-keyed path (symbol filed under a series).
|
||||
mf_series,
|
||||
/// Probe the company submissions feed to classify (symbol present
|
||||
/// only in the company map).
|
||||
probe_company,
|
||||
/// A ticker present in both maps that now belongs to an operating
|
||||
/// company (recycled ticker): treat as not-a-fund.
|
||||
recycled_not_a_fund,
|
||||
/// Symbol is in neither ticker map.
|
||||
not_in_edgar,
|
||||
};
|
||||
|
||||
/// Pure precedence decision for `fetchEtfMetrics`. `in_mf` / `in_co`
|
||||
/// are whether the symbol appears in the MF/ETF and company ticker
|
||||
/// maps; `co_class` is the company side's classification, which is
|
||||
/// only consulted (and only needs to be computed) when the symbol is
|
||||
/// in BOTH maps.
|
||||
///
|
||||
/// The load-bearing case is a both-maps collision. SEC's
|
||||
/// `company_tickers_mf.json` lags ticker reassignments, so a stale
|
||||
/// fund-series mapping can coexist with an operating company that now
|
||||
/// owns the ticker (e.g. SPCX: SpaceX's 2026 IPO over the defunct
|
||||
/// Tuttle "SPAC and New Issue ETF"). When the company side is an
|
||||
/// operating company the ticker belongs to it now, so the stale series
|
||||
/// must not be reported as a fund. A fund/trust-shaped company side
|
||||
/// keeps the more precise series path. A null `co_class` (probe failed
|
||||
/// or not performed) falls back to the MF path - the pre-guard
|
||||
/// behavior.
|
||||
fn resolveMapPrecedence(in_mf: bool, in_co: bool, co_class: ?EntityClass) MapResolution {
|
||||
if (in_mf and in_co) {
|
||||
if (co_class) |c| return switch (c) {
|
||||
.operating => .recycled_not_a_fund,
|
||||
.fund_shaped, .trust_shaped => .mf_series,
|
||||
};
|
||||
return .mf_series;
|
||||
}
|
||||
if (in_mf) return .mf_series;
|
||||
if (in_co) return .probe_company;
|
||||
return .not_in_edgar;
|
||||
}
|
||||
|
||||
test "resolveMapPrecedence: ticker-map membership decides the path" {
|
||||
const T = std.testing;
|
||||
// Single-map cases ignore co_class.
|
||||
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, false, null));
|
||||
try T.expectEqual(MapResolution.probe_company, resolveMapPrecedence(false, true, null));
|
||||
try T.expectEqual(MapResolution.not_in_edgar, resolveMapPrecedence(false, false, null));
|
||||
|
||||
// Both maps: an operating company side means the ticker was
|
||||
// recycled (SPCX/SpaceX) -> not a fund.
|
||||
try T.expectEqual(MapResolution.recycled_not_a_fund, resolveMapPrecedence(true, true, .operating));
|
||||
// Both maps but the company side is itself a fund/trust: the
|
||||
// series-keyed MF path remains the more precise source.
|
||||
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, .fund_shaped));
|
||||
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, .trust_shaped));
|
||||
// Both maps but the probe failed (null): trust the MF map, matching
|
||||
// the pre-guard behavior.
|
||||
try T.expectEqual(MapResolution.mf_series, resolveMapPrecedence(true, true, null));
|
||||
}
|
||||
|
||||
/// Classify a CIK based on its submissions-feed metadata. Decides
|
||||
/// whether the symbol is a registered fund (probe NPORT-P), a
|
||||
/// trust/ETN-style instrument (profile-only), or a plain operating
|
||||
|
|
@ -1258,11 +1358,7 @@ fn parseLatestNportPFromSearch(allocator: std.mem.Allocator, json_bytes: []const
|
|||
/// that distribute rental income, not registered investment
|
||||
/// companies. They get bucketed under `operating` - Wikidata is
|
||||
/// the right source for them.
|
||||
fn classifyByEntityType(sub: *const SubmissionsSummary) enum {
|
||||
fund_shaped,
|
||||
trust_shaped,
|
||||
operating,
|
||||
} {
|
||||
fn classifyByEntityType(sub: *const SubmissionsSummary) EntityClass {
|
||||
// Rule 1: NPORT-P presence is the strongest fund signal.
|
||||
if (sub.latest_nport_p_url != null) return .fund_shaped;
|
||||
|
||||
|
|
@ -1364,6 +1460,16 @@ test "classifyByEntityType buckets real-world entities" {
|
|||
s.sic_description = try T.allocator.dupe(u8, "Real Estate Investment Trusts");
|
||||
try T.expectEqual(.operating, classifyByEntityType(&s));
|
||||
}
|
||||
// SPCX/SpaceX after its 2026 IPO: operating company, no NPORT-P,
|
||||
// aerospace SIC. Must be `operating` so the ticker-recycling guard
|
||||
// can override the stale "SPAC and New Issue ETF" fund-map entry.
|
||||
{
|
||||
var s: SubmissionsSummary = .{};
|
||||
defer s.deinit(T.allocator);
|
||||
s.entity_type = try T.allocator.dupe(u8, "operating");
|
||||
s.sic_description = try T.allocator.dupe(u8, "Guided Missiles & Space Vehicles & Parts");
|
||||
try T.expectEqual(.operating, classifyByEntityType(&s));
|
||||
}
|
||||
}
|
||||
|
||||
/// Result kind for `fetchEtfMetrics`. The caller - see `main.zig` -
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue