diff --git a/README.md b/README.md
index b75c2fe..b312632 100644
--- a/README.md
+++ b/README.md
@@ -922,6 +922,20 @@ zig build run -- <args> # build and run
 
 The compiled binary is at `zig-out/bin/zfin`.
 
+## Vendored code
+
+A small amount of third-party source is vendored directly into the
+tree (rather than added as a Zig package dependency) where the
+upstream is small, stable, and not packaged for `build.zig.zon`:
+
+|          File           |                                                                                  Source                                                                                                 |                                         Purpose                                           |
+|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------|
+| `src/providers/xml.zig` | [Snektron/vulkan-zig](https://github.com/Snektron/vulkan-zig/blob/797ae8af88e84753af9640266de61a985b76b580/generator/xml.zig), via [aws-zig](https://github.com/elerch/aws-sdk-for-zig) | XML DOM parser used by the EDGAR provider for NPORT-P primary documents. |
+
+Each vendored file carries a `// VENDORED - see README.md` header
+identifying its upstream source. When updating, copy the new
+upstream verbatim and re-add the header.
+
 ## License
 
 MIT
diff --git a/build.zig.zon b/build.zig.zon
index a711d4d..b4b08cb 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -13,8 +13,8 @@
             .hash = "z2d-0.11.0-j5P_HtLzDwBGyQt49DrT0v4BuVqI_SRs6CXsuj7eBVhR",
         },
         .srf = .{
-            .url = "git+https://git.lerch.org/lobo/srf.git?ref=master#512eab0db082f1679af4de77b1f1713409766fcf",
-            .hash = "srf-0.0.0-qZj57-7CAQBdAFgdiSB2bE5Socq8QNId8PFzynVQbSUN",
+            .url = "git+https://git.lerch.org/lobo/srf#12b755660e96ed65c645975110214fcc9c66ca4d",
+            .hash = "srf-0.0.0-qZj5743KAQAykeIHzFJdRDwgAA-Yy1RLaj0Lw4W5Rphx",
         },
     },
     .paths = .{
diff --git a/src/Config.zig b/src/Config.zig
index 0f580b9..3250650 100644
--- a/src/Config.zig
+++ b/src/Config.zig
@@ -38,6 +38,11 @@ fmp_key: ?[]const u8 = null,
 alphavantage_key: ?[]const u8 = null,
 tiingo_key: ?[]const u8 = null,
 openfigi_key: ?[]const u8 = null,
+/// User contact email used as the User-Agent / From header for
+/// open-data providers that require politeness identification
+/// (Wikidata SPARQL, EDGAR). No API-key authentication semantics —
+/// just identifies the operator. Sourced from `ZFIN_USER_EMAIL`.
+user_email: ?[]const u8 = null,
 /// URL of a zfin-server instance for lazy cache sync (e.g. "https://zfin.lerch.org")
 server_url: ?[]const u8 = null,
 cache_dir: []const u8,
@@ -92,6 +97,7 @@ pub fn fromEnv(io: std.Io, allocator: std.mem.Allocator, environ_map: *const std
     self.alphavantage_key = self.resolve("ALPHAVANTAGE_API_KEY");
     self.tiingo_key = self.resolve("TIINGO_API_KEY");
     self.openfigi_key = self.resolve("OPENFIGI_API_KEY");
+    self.user_email = self.resolve("ZFIN_USER_EMAIL");
     self.server_url = self.resolve("ZFIN_SERVER");
 
     const env_cache = self.resolve("ZFIN_CACHE_DIR");
diff --git a/src/cache/store.zig b/src/cache/store.zig
index 5ec4159..2c91041 100644
--- a/src/cache/store.zig
+++ b/src/cache/store.zig
@@ -63,6 +63,15 @@ pub const Ttl = struct {
     /// Refreshes on quarterly filing cadence; 30-day TTL gives a
     /// fortnightly margin around each fiscal-quarter boundary.
     pub const entity_facts: i64 = 30 * s_per_day;
+
+    /// EDGAR ticker-map indexes (`company_tickers.json` and the MF
+    /// equivalent). SEC updates these daily upstream, but the
+    /// ticker→CIK mapping is extremely stable (changes are rare
+    /// rename events). 30-day TTL with jitter keeps the load
+    /// reasonable while still picking up new listings within a
+    /// month.
+    pub const tickers_funds: i64 = 30 * s_per_day;
+    pub const tickers_companies: i64 = 30 * s_per_day;
 };
 
 /// Cache TTL specification with optional per-key expiration jitter.
@@ -175,6 +184,15 @@ pub const DataType = enum {
     /// symbol-keyed, so a single dual-class issuer (BRK.A / BRK.B)
     /// has one shared facts file.
     entity_facts,
+    /// EDGAR's `company_tickers_mf.json` index, cached at
+    /// `<cache_dir>/_edgar/tickers_funds.srf`. Single-record file
+    /// (one MutualFundTickerMapBlob) under a synthetic `_edgar` key.
+    /// Updated daily upstream; refreshes monthly with jitter.
+    tickers_funds,
+    /// EDGAR's `company_tickers.json` index, cached at
+    /// `<cache_dir>/_edgar/tickers_companies.srf`. Same shape as
+    /// `tickers_funds`.
+    tickers_companies,
 
     pub fn fileName(self: DataType) []const u8 {
         return switch (self) {
@@ -189,6 +207,8 @@ pub const DataType = enum {
             .classification => "classification.srf",
             .etf_metrics => "etf_metrics.srf",
             .entity_facts => "entity_facts.srf",
+            .tickers_funds => "tickers_funds.srf",
+            .tickers_companies => "tickers_companies.srf",
         };
     }
 
@@ -202,6 +222,8 @@ pub const DataType = enum {
             .classification => Ttl.classification,
             .etf_metrics => Ttl.etf_metrics,
             .entity_facts => Ttl.entity_facts,
+            .tickers_funds => Ttl.tickers_funds,
+            .tickers_companies => Ttl.tickers_companies,
             .candles_daily, .candles_meta, .meta => 0,
         };
     }
@@ -2331,6 +2353,9 @@ test "TTL constants are reasonable" {
     try std.testing.expectEqual(@as(i64, 90 * std.time.s_per_day), Ttl.classification);
     try std.testing.expectEqual(@as(i64, 90 * std.time.s_per_day), Ttl.etf_metrics);
     try std.testing.expectEqual(@as(i64, 30 * std.time.s_per_day), Ttl.entity_facts);
+    // EDGAR ticker-map indexes refresh monthly with jitter.
+    try std.testing.expectEqual(@as(i64, 30 * std.time.s_per_day), Ttl.tickers_funds);
+    try std.testing.expectEqual(@as(i64, 30 * std.time.s_per_day), Ttl.tickers_companies);
 }
 
 test "DataType.ttl returns correct values" {
@@ -2342,6 +2367,8 @@ test "DataType.ttl returns correct values" {
     try std.testing.expectEqual(Ttl.classification, DataType.classification.ttl());
     try std.testing.expectEqual(Ttl.etf_metrics, DataType.etf_metrics.ttl());
     try std.testing.expectEqual(Ttl.entity_facts, DataType.entity_facts.ttl());
+    try std.testing.expectEqual(Ttl.tickers_funds, DataType.tickers_funds.ttl());
+    try std.testing.expectEqual(Ttl.tickers_companies, DataType.tickers_companies.ttl());
 
     // These types have no TTL (0 = managed elsewhere)
     try std.testing.expectEqual(@as(i64, 0), DataType.candles_daily.ttl());
@@ -2361,6 +2388,8 @@ test "DataType.fileName returns correct file names" {
     try std.testing.expectEqualStrings("classification.srf", DataType.classification.fileName());
     try std.testing.expectEqualStrings("etf_metrics.srf", DataType.etf_metrics.fileName());
     try std.testing.expectEqualStrings("entity_facts.srf", DataType.entity_facts.fileName());
+    try std.testing.expectEqualStrings("tickers_funds.srf", DataType.tickers_funds.fileName());
+    try std.testing.expectEqualStrings("tickers_companies.srf", DataType.tickers_companies.fileName());
 }
 
 test "negative_cache_content format" {
diff --git a/src/main.zig b/src/main.zig
index 966b9f1..2b3df0f 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -721,4 +721,10 @@ test "looksLikeUnquotedGlob: empty arg returns false" {
 
 test {
     std.testing.refAllDecls(@This());
+    // Wikidata and EDGAR providers aren't yet imported via
+    // `service.zig`; pull them in here for test discovery in the
+    // meantime. Drop these once the providers are wired through
+    // the data service.
+    _ = @import("providers/Wikidata.zig");
+    _ = @import("providers/Edgar.zig");
 }
diff --git a/src/providers/Edgar.zig b/src/providers/Edgar.zig
new file mode 100644
index 0000000..f0aa602
--- /dev/null
+++ b/src/providers/Edgar.zig
@@ -0,0 +1,1826 @@
+//! EDGAR provider — SEC's electronic filing system as a data source.
+//!
+//! ## What this provider does
+//!
+//! Given a stock or fund symbol, EDGAR can answer:
+//!
+//!   * "What's this fund made of?" — the latest portfolio holdings,
+//!     sector breakdown, and net assets, parsed from the fund's most
+//!     recent NPORT-P filing.
+//!   * "How many shares does this company have outstanding?" — read
+//!     from XBRL-tagged fields on the company's most recent 10-K /
+//!     10-Q / 40-F cover page. Combined with a price quote (from
+//!     elsewhere) this gives market cap.
+//!   * "Where in EDGAR does this symbol live?" — symbol → CIK
+//!     lookup via SEC's two ticker-map indexes.
+//!
+//! ## Workflow when a caller asks about one symbol
+//!
+//! Symbols don't carry CIKs, so the first step is always a
+//! ticker-map lookup. From there the path forks:
+//!
+//!   AAPL (operating company)
+//!     1. Look up "AAPL" in the company ticker map → CIK 320193.
+//!     2. Fetch the submissions feed for CIK 320193 → entityType
+//!        "operating", no NPORT-P. Classify as `not_a_fund`.
+//!     3. (Optional) fetch shares-outstanding from the XBRL
+//!        companyconcept endpoint for use in market cap math.
+//!
+//!   VTI (mutual-fund-trust ETF)
+//!     1. Look up "VTI" in the mutual-fund ticker map → CIK 36405,
+//!        seriesId S000002848.
+//!     2. Run the EDGAR full-text search for that seriesId, filtered
+//!        to NPORT-P. Get the URL of the most recent filing.
+//!     3. Download the NPORT-P primary_doc.xml. Parse profile,
+//!        sectors, holdings.
+//!
+//!   SPY (unit-investment-trust ETF)
+//!     1. Not in mutual-fund ticker map. Look up "SPY" in the
+//!        company ticker map → CIK 884394.
+//!     2. Fetch the submissions feed → entityType "other", has a
+//!        NPORT-P at trust-CIK level (UITs don't have a seriesId).
+//!     3. Download that NPORT-P. Parse like a fund.
+//!
+//!   GLD (commodity trust)
+//!     1. Not in mutual-fund ticker map. Look up "GLD" in the
+//!        company ticker map → CIK 1222333.
+//!     2. Submissions feed → entityType "operating", SIC describes a
+//!        commodity trust. No NPORT-P. Return profile-only metrics
+//!        (the trust exists but has no portfolio to disclose).
+//!
+//! ## Glossary
+//!
+//!   CIK         Central Index Key. SEC's primary identifier for a
+//!               filer. 10-digit zero-padded number; we normalize to
+//!               that shape at the boundary so all callers can
+//!               assume it.
+//!   NPORT-P     Form NPORT-P (public). Quarterly portfolio
+//!               disclosure filed by registered investment companies
+//!               (mutual funds, most ETFs). Contains every position,
+//!               aggregated valuation, and asset/issuer classifiers.
+//!   10-K        Annual report filed by US-domiciled operating
+//!               companies. Cover page carries shares-outstanding.
+//!   10-Q        Quarterly equivalent of 10-K.
+//!   40-F        Annual report filed by Canadian companies that
+//!               participate in the SEC's MJDS regime. Same XBRL
+//!               cover-page fields as 10-K — the dei taxonomy
+//!               handles both. Barrick Mining, Shopify, etc.
+//!   20-F        Annual report filed by other foreign private
+//!               issuers (BP, Toyota, Sony, ...). Covers the same
+//!               financial-statement ground as 10-K but the SEC
+//!               doesn't require dei-tagged shares-outstanding here,
+//!               so the XBRL companyconcept endpoint returns 404 for
+//!               many of them. Caller treats this as "shares unknown."
+//!   XBRL        Structured-data tagging for SEC filings. Makes
+//!               specific fields (revenue, shares outstanding, etc.)
+//!               machine-readable across forms.
+//!   dei         Document and Entity Information — XBRL taxonomy for
+//!               cover-page metadata (entity name, registrant info,
+//!               shares outstanding). Cross-form, cross-jurisdiction.
+//!   us-gaap     XBRL taxonomy for US GAAP financial concepts.
+//!               Carries fallback shares-outstanding for dual-class
+//!               issuers (GOOGL, META) that don't tag the dei field.
+//!   UIT         Unit Investment Trust. A specific kind of fund
+//!               structure (SPY, GLD, IVV, ...) that files NPORT-P
+//!               at the trust-CIK level rather than under a
+//!               series-of-trust seriesId like mutual funds do.
+//!   SIC         Standard Industrial Classification. Four-digit
+//!               industry code on the submissions feed; we use it to
+//!               distinguish commodity trusts (SIC 6221) from
+//!               operating companies (most other codes).
+//!
+//! ## SEC endpoints used
+//!
+//!   1. https://www.sec.gov/files/company_tickers_mf.json
+//!      Mutual fund and ETF ticker map: (ticker → CIK, seriesId,
+//!      classId). One file, ~3 MB.
+//!
+//!   2. https://www.sec.gov/files/company_tickers.json
+//!      Stocks and unit-investment-trust ETFs: (ticker → CIK,
+//!      title). One file, ~5 MB.
+//!
+//!   3. https://efts.sec.gov/LATEST/search-index?q=<seriesId>&forms=NPORT-P
+//!      Full-text search for NPORT-P filings referencing
+//!      `seriesId`. Necessary because the submissions feed only
+//!      lists at trust-CIK level — a trust hosting hundreds of
+//!      series would otherwise force us to download every NPORT-P
+//!      to find the one we want.
+//!
+//!   4. https://data.sec.gov/submissions/CIK{cik:0>10}.json
+//!      Per-CIK submissions feed. Carries entityType,
+//!      sicDescription, ticker list, and the most-recent NPORT-P URL
+//!      for UIT-style ETFs that lack a seriesId.
+//!
+//!   5. https://www.sec.gov/Archives/edgar/data/<CIK>/<ACC>/primary_doc.xml
+//!      The actual NPORT-P document. XML, ~50-100 MB depending on
+//!      fund size.
+//!
+//!   6. https://data.sec.gov/api/xbrl/companyconcept/CIK{cik:0>10}/{taxonomy}/{Concept}.json
+//!      XBRL companyconcept endpoint. Used for shares-outstanding
+//!      via `dei:EntityCommonStockSharesOutstanding` (single-class
+//!      issuers) with fallback to `us-gaap:CommonStockSharesOutstanding`
+//!      (dual-class issuers like GOOGL, META).
+//!
+//! ## Politeness
+//!
+//! SEC requires a descriptive User-Agent + From: header on every
+//! request, populated from `Config.user_email` (env
+//! `ZFIN_USER_EMAIL`). The provider takes the email as a non-null
+//! constructor argument; callers must surface a clear error if the
+//! env var is missing rather than letting requests go out
+//! un-identified. SEC's documented ceiling is 10 req/s per IP; we
+//! throttle at 8 req/s via a `RateLimiter`, leaving a 20% margin
+//! against timing jitter and retry bursts. A per-symbol loop over a
+//! typical portfolio reaches this ceiling quickly without it.
+//!
+//! ## Caching
+//!
+//! `Edgar` carries no cache state of its own. Every method does HTTP
+//! + parse and returns a typed result; the `DataService` layer
+//! writes the parsed results to the user-facing cache files
+//! (`classification.srf`, `etf_metrics.srf`, `entity_facts.srf`)
+//! and reads them back on subsequent calls.
+//!
+//! Ticker maps (`company_tickers*.json`) are the one upstream
+//! document we cache through `Store` — typed `MutualFundTickerMapBlob`
+//! / `CompanyTickerMapBlob` records under a synthetic `_edgar` key —
+//! because they're refreshed at SEC's daily cadence rather than per
+//! symbol. Everything else gets parsed into typed records and
+//! written to the user-facing per-symbol or per-CIK cache files.
+
+const std = @import("std");
+const http = @import("../net/http.zig");
+const RateLimiter = @import("../net/RateLimiter.zig");
+const fmt = @import("../format.zig");
+const xml = @import("xml.zig");
+
+const tickers_funds_url = "https://www.sec.gov/files/company_tickers_mf.json";
+const tickers_companies_url = "https://www.sec.gov/files/company_tickers.json";
+const search_url_prefix = "https://efts.sec.gov/LATEST/search-index?";
+
+// ── Edgar provider state ─────────────────────────────────────────
+//
+// File-as-struct: the file's top-level fields and methods together
+// form the `Edgar` provider. Callers do
+// `const Edgar = @import("providers/Edgar.zig");` followed by
+// `var ed = Edgar.init(...);` and `ed.fetchMutualFundTickerMap(...)`
+// etc.
+
+client: http.Client,
+/// Contact email for the User-Agent + From headers SEC requires on
+/// every request. Sourced from `Config.user_email`. Required, not
+/// optional: callers must surface a clear missing-config error
+/// before constructing this provider rather than letting requests
+/// go out un-identified.
+user_email: []const u8,
+/// Token-bucket throttle keeping us under SEC's 10 req/s ceiling.
+/// Sized at 8 req/s to leave a 20% margin against timing jitter and
+/// any retry bursts. Per-symbol fetch loops over a portfolio reach
+/// this ceiling quickly without it.
+rate_limiter: RateLimiter,
+allocator: std.mem.Allocator,
+
+const Edgar = @This();
+
+pub fn init(io: std.Io, allocator: std.mem.Allocator, user_email: []const u8) Edgar {
+    return .{
+        .client = http.Client.init(io, allocator),
+        .user_email = user_email,
+        .rate_limiter = RateLimiter.init(io, 8, std.time.ns_per_s),
+        .allocator = allocator,
+    };
+}
+
+pub fn deinit(self: *Edgar) void {
+    self.client.deinit();
+}
+
+/// GET wrapper that attaches the User-Agent + From headers SEC
+/// requires on every request and acquires a rate-limit token before
+/// issuing the call.
+fn httpGet(self: *Edgar, url: []const u8) !http.Response {
+    self.rate_limiter.acquire();
+
+    var ua_buf: [256]u8 = undefined;
+    const ua = std.fmt.bufPrint(&ua_buf, "zfin/0.1 ({s})", .{self.user_email}) catch return error.UserEmailTooLong;
+
+    const headers = [_]std.http.Header{
+        .{ .name = "User-Agent", .value = ua },
+        .{ .name = "From", .value = self.user_email },
+        .{ .name = "Accept-Encoding", .value = "identity" },
+    };
+
+    return self.client.request(.GET, url, null, &headers);
+}
+
+/// Fetch and parse SEC's mutual-fund/ETF ticker map
+/// (`company_tickers_mf.json`). Maps each ticker to a CIK +
+/// seriesId + classId. Returns the parsed map; caching is the
+/// DataService's job.
+pub fn fetchMutualFundTickerMap(self: *Edgar, allocator: std.mem.Allocator) !TickerMap {
+    var resp = try self.httpGet(tickers_funds_url);
+    defer resp.deinit();
+    return parseTickerMap(allocator, resp.body);
+}
+
+/// Fetch and parse SEC's stocks-and-UITs ticker map
+/// (`company_tickers.json`). Despite the filename, this file covers
+/// operating companies AND unit investment trust ETFs (SPY, GLD,
+/// IVV) — anything that doesn't file under a series-of-trust shape.
+/// Returns the parsed map.
+pub fn fetchCompanyTickerMap(self: *Edgar, allocator: std.mem.Allocator) !TickerMap {
+    var resp = try self.httpGet(tickers_companies_url);
+    defer resp.deinit();
+    return parseStockTickerMap(allocator, resp.body);
+}
+
+/// Find the most recent NPORT-P filing for `series_id`. Returns null
+/// if no filing exists. Caller owns the returned URL.
+pub fn findLatestNportP(
+    self: *Edgar,
+    allocator: std.mem.Allocator,
+    series_id: []const u8,
+) !?[]u8 {
+    const url = try std.fmt.allocPrint(
+        allocator,
+        "{s}q=%22{s}%22&forms=NPORT-P",
+        .{ search_url_prefix, series_id },
+    );
+    defer allocator.free(url);
+    var resp = try self.httpGet(url);
+    defer resp.deinit();
+
+    return parseLatestNportPFromSearch(allocator, resp.body);
+}
+
+/// Find the most recent NPORT-P filing for a CIK. Used for UIT-style
+/// ETFs (SPY, etc.) that file at the trust-CIK level rather than a
+/// series. Returns null if the CIK has no NPORT-P.
+pub fn findLatestNportPByCik(
+    self: *Edgar,
+    allocator: std.mem.Allocator,
+    cik: []const u8,
+) !?[]u8 {
+    const sub = try self.fetchSubmissionsFeed(allocator, cik);
+    return sub.latest_nport_p_url;
+}
+
+/// Fetch and parse the per-CIK submissions feed.
+pub fn fetchSubmissionsFeed(
+    self: *Edgar,
+    allocator: std.mem.Allocator,
+    cik: []const u8,
+) !SubmissionsSummary {
+    const url = try std.fmt.allocPrint(
+        allocator,
+        "https://data.sec.gov/submissions/CIK{s:0>10}.json",
+        .{cik},
+    );
+    defer allocator.free(url);
+    var resp = try self.httpGet(url);
+    defer resp.deinit();
+
+    return parseSubmissionsFeed(allocator, resp.body, cik);
+}
+
+/// Fetch the most recent shares-outstanding value for a CIK. Returns
+/// null on 404 (e.g. 20-F-only filers). Cascades through
+/// `dei:EntityCommonStockSharesOutstanding` (single-class) then
+/// `us-gaap:CommonStockSharesOutstanding` (dual-class fallback,
+/// e.g. GOOGL, META).
+pub fn fetchSharesOutstanding(
+    self: *Edgar,
+    allocator: std.mem.Allocator,
+    cik: []const u8,
+) !?SharesOutstanding {
+    if (try self.fetchSharesConcept(allocator, cik, "dei", "EntityCommonStockSharesOutstanding")) |so| {
+        return so;
+    }
+    return try self.fetchSharesConcept(allocator, cik, "us-gaap", "CommonStockSharesOutstanding");
+}
+
+fn fetchSharesConcept(
+    self: *Edgar,
+    allocator: std.mem.Allocator,
+    cik: []const u8,
+    taxonomy: []const u8,
+    concept: []const u8,
+) !?SharesOutstanding {
+    const url = try std.fmt.allocPrint(
+        allocator,
+        "https://data.sec.gov/api/xbrl/companyconcept/CIK{s:0>10}/{s}/{s}.json",
+        .{ cik, taxonomy, concept },
+    );
+    defer allocator.free(url);
+    var resp = self.httpGet(url) catch |err| {
+        if (err == error.NotFound) return null;
+        return err;
+    };
+    defer resp.deinit();
+
+    return parseSharesOutstanding(allocator, resp.body);
+}
+
+/// Fetch and parse N-PORT-P metrics for one ETF/MF ticker. The
+/// return value describes what was found (full holdings,
+/// profile-only, or not-a-fund). `top_n_holdings` caps holdings
+/// emitted (sorted by pctVal descending).
+pub fn fetchEtfMetrics(
+    self: *Edgar,
+    io: std.Io,
+    allocator: std.mem.Allocator,
+    mf_ticker_map: *const TickerMap,
+    stock_ticker_map: *const TickerMap,
+    symbol: []const u8,
+    top_n_holdings: usize,
+) !EtfMetricsResult {
+    // MF/ETF map first — authoritative for symbols filed under a
+    // series. Series-keyed full-text search; CIK fallback would
+    // yield arbitrary other series under the same trust.
+    if (mf_ticker_map.map.get(symbol)) |entry| {
+        const filing_url = (try self.findLatestNportP(allocator, entry.series_id.?)) orelse {
+            return .not_a_fund;
+        };
+        defer allocator.free(filing_url);
+        const m = try self.fetchAndParseNportP(io, allocator, &entry, filing_url, symbol, top_n_holdings);
+        return .{ .full = m };
+    }
+
+    // Stock map: probe the submissions feed (one extra HTTP per
+    // unique CIK) to classify the entity. Branches:
+    //   - fund_shaped  + has NPORT-P → full holdings (SPY)
+    //   - fund_shaped  + no NPORT-P  → profile-only (SLVO ETN issuer)
+    //   - trust_shaped               → profile-only (GLD commodity)
+    //   - operating                  → not-a-fund (AAPL, MSFT)
+    if (stock_ticker_map.map.get(symbol)) |entry| {
+        var sub = try self.fetchSubmissionsFeed(allocator, entry.cik);
+        defer sub.deinit(allocator);
+
+        const class = classifyByEntityType(&sub);
+        switch (class) {
+            .operating => return .not_a_fund,
+            .fund_shaped => {
+                if (sub.latest_nport_p_url) |url| {
+                    const m = try self.fetchAndParseNportP(io, allocator, &entry, url, symbol, top_n_holdings);
+                    return .{ .full = m };
+                }
+                const profile = try buildProfileOnlyMetrics(io, allocator, &entry, &sub, symbol);
+                return .{ .profile_only = profile };
+            },
+            .trust_shaped => {
+                // Skip the NPORT-P probe — by definition these
+                // don't file one. Saves an HTTP roundtrip.
+                const profile = try buildProfileOnlyMetrics(io, allocator, &entry, &sub, symbol);
+                return .{ .profile_only = profile };
+            },
+        }
+    }
+
+    return .not_in_edgar;
+}
+
+/// Download and parse a NPORT-P primary_doc.xml at `filing_url`.
+/// Used by both the MF and UIT paths in `fetchEtfMetrics`. The
+/// parsed `EtfMetrics` is the cacheable artifact; the XML bytes are
+/// discarded after parsing — no provider-internal XML cache, so
+/// re-fetches always re-download.
+fn fetchAndParseNportP(
+    self: *Edgar,
+    io: std.Io,
+    allocator: std.mem.Allocator,
+    entry: *const TickerEntry,
+    filing_url: []const u8,
+    symbol: []const u8,
+    top_n_holdings: usize,
+) !EtfMetrics {
+    var resp = try self.httpGet(filing_url);
+    defer resp.deinit();
+
+    return parseNportP(io, allocator, resp.body, symbol, entry, top_n_holdings);
+}
+
+// ── Free types and helpers (no `self`) ───────────────────────────
+
+pub const SectorWeight = struct {
+    code: []const u8, // owned; raw NPORT-P code, e.g. "EC/CORP"
+    description: []const u8, // owned; human-readable, e.g. "Equity / Corporate"
+    pct_of_portfolio: f64,
+};
+
+pub const Holding = struct {
+    name: []const u8, // owned
+    ticker: ?[]const u8 = null, // owned; present for some equity holdings
+    cusip: ?[]const u8 = null, // owned
+    lei: ?[]const u8 = null, // owned; ISO 17442 Legal Entity Identifier
+    country: ?[]const u8 = null, // owned; ISO-3166 alpha-2 from <invCountry>
+    pct_of_portfolio: f64,
+};
+
+pub const EtfMetrics = struct {
+    symbol: []const u8, // owned
+    series_name: ?[]const u8 = null, // owned
+    cik: []const u8, // owned
+    /// Null for unit-investment-trust ETFs (SPY, etc.) that file
+    /// NPORT-P at the trust-CIK level without a series identifier.
+    series_id: ?[]const u8 = null, // owned
+    net_assets: ?f64 = null,
+    period_end: ?[]const u8 = null, // owned
+    as_of: []const u8, // owned (date scraper ran)
+    holdings: []Holding, // owned
+    sectors: []SectorWeight, // owned
+
+    pub fn deinit(self: *EtfMetrics, allocator: std.mem.Allocator) void {
+        allocator.free(self.symbol);
+        if (self.series_name) |s| allocator.free(s);
+        allocator.free(self.cik);
+        if (self.series_id) |s| allocator.free(s);
+        if (self.period_end) |s| allocator.free(s);
+        allocator.free(self.as_of);
+        for (self.holdings) |*h| {
+            allocator.free(h.name);
+            if (h.ticker) |t| allocator.free(t);
+            if (h.cusip) |c| allocator.free(c);
+            if (h.lei) |l| allocator.free(l);
+            if (h.country) |c| allocator.free(c);
+        }
+        allocator.free(self.holdings);
+        for (self.sectors) |*s| {
+            allocator.free(s.code);
+            allocator.free(s.description);
+        }
+        allocator.free(self.sectors);
+    }
+};
+
+pub const TickerEntry = struct {
+    cik: []const u8, // owned
+    /// Series identifier — present for ETFs/MFs filing as a series of a
+    /// trust (sourced from `company_tickers_mf.json`). Null for stocks
+    /// and unit-investment-trust ETFs (sourced from `company_tickers.json`),
+    /// which file at the trust-CIK level without a series.
+    series_id: ?[]const u8 = null, // owned
+    class_id: ?[]const u8 = null, // owned
+    /// Trust / company name from the ticker map. Useful as a friendly
+    /// label for symbols where Wikidata didn't surface anything.
+    title: ?[]const u8 = null, // owned
+};
+
+pub const TickerMap = struct {
+    map: std.StringHashMap(TickerEntry),
+    allocator: std.mem.Allocator,
+
+    pub fn deinit(self: *TickerMap) void {
+        var it = self.map.iterator();
+        while (it.next()) |entry| {
+            self.allocator.free(entry.key_ptr.*);
+            self.allocator.free(entry.value_ptr.cik);
+            if (entry.value_ptr.series_id) |s| self.allocator.free(s);
+            if (entry.value_ptr.class_id) |s| self.allocator.free(s);
+            if (entry.value_ptr.title) |s| self.allocator.free(s);
+        }
+        self.map.deinit();
+    }
+};
+
+/// Cache shape for the SEC's `company_tickers_mf.json` document.
+/// Held under a synthetic `_edgar` key in the typed `Store` (one
+/// record per cache file), which gives us:
+///   - `#!expires=` freshness via TtlSpec → DataType.tickers_mf
+///   - Atomic write + temp-file-rename via Store.writeRaw
+///   - SRF length-prefix encoding handles the JSON body's commas /
+///     newlines / `::` without escaping
+///
+/// The provider deserializes from a fresh-fetched HTTP response; the
+/// DataService writes to cache, reads back as `MutualFundTickerMapBlob`,
+/// and parses the `.json` field via `parseTickerMap`. The blob is
+/// the on-disk shape; `TickerMap` is the in-memory shape.
+pub const MutualFundTickerMapBlob = struct {
+    json: []const u8, // owned (post-process duped in cache reads)
+};
+
+/// Cache shape for the SEC's `company_tickers.json` document.
+/// Same structure as `MutualFundTickerMapBlob`; the two are distinct
+/// types because `Store.dataTypeFor(T)` keys on Zig type, not on a
+/// string argument.
+pub const CompanyTickerMapBlob = struct {
+    json: []const u8, // owned (post-process duped in cache reads)
+};
+
+/// Parse the SEC's `company_tickers_mf.json` shape into a TickerMap.
+/// Exposed publicly so cache-hit paths in DataService can call this
+/// directly on bytes loaded from `Store`.
+pub fn parseTickerMap(allocator: std.mem.Allocator, json_bytes: []const u8) !TickerMap {
+    var out: TickerMap = .{
+        .map = .init(allocator),
+        .allocator = allocator,
+    };
+    errdefer out.deinit();
+
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_bytes, .{});
+    defer parsed.deinit();
+
+    const root = switch (parsed.value) {
+        .object => |o| o,
+        else => return error.InvalidTickerMap,
+    };
+    const data_array = switch (root.get("data") orelse return error.InvalidTickerMap) {
+        .array => |a| a.items,
+        else => return error.InvalidTickerMap,
+    };
+
+    for (data_array) |row| {
+        const fields = switch (row) {
+            .array => |a| a.items,
+            else => continue,
+        };
+        if (fields.len < 4) continue;
+
+        const cik_n = switch (fields[0]) {
+            .integer => |n| n,
+            else => continue,
+        };
+        const series_id = switch (fields[1]) {
+            .string => |s| s,
+            else => continue,
+        };
+        const class_id = switch (fields[2]) {
+            .string => |s| s,
+            else => continue,
+        };
+        const symbol = switch (fields[3]) {
+            .string => |s| s,
+            else => continue,
+        };
+
+        // CIKs are normalized to 10-digit zero-padded strings at
+        // every boundary. Wikidata's P5531 uses this convention, so
+        // downstream merge logic can join on the same key shape.
+        // EDGAR ticker-map JSON delivers them as bare integers, so
+        // we pad here. Cast to u64 first because signed `{d:0>10}`
+        // reserves a slot for the sign character and produces
+        // "0000+36405".
+        const cik_str = try std.fmt.allocPrint(allocator, "{d:0>10}", .{@as(u64, @intCast(cik_n))});
+        errdefer allocator.free(cik_str);
+        const symbol_owned = try allocator.dupe(u8, symbol);
+        errdefer allocator.free(symbol_owned);
+        const series_owned = try allocator.dupe(u8, series_id);
+        errdefer allocator.free(series_owned);
+        const class_owned = try allocator.dupe(u8, class_id);
+        errdefer allocator.free(class_owned);
+
+        const gop = try out.map.getOrPut(symbol_owned);
+        if (gop.found_existing) {
+            // Multiple class IDs share a ticker — take the first.
+            // A more sophisticated rule (prefer lowest-cost class)
+            // would need expense-ratio data this provider doesn't
+            // currently load.
+            allocator.free(symbol_owned);
+            allocator.free(cik_str);
+            allocator.free(series_owned);
+            allocator.free(class_owned);
+            continue;
+        }
+        gop.value_ptr.* = .{
+            .cik = cik_str,
+            .series_id = series_owned,
+            .class_id = class_owned,
+            .title = null,
+        };
+    }
+    return out;
+}
+
+/// Parser for the stocks-and-UITs `company_tickers.json` shape, which
+/// is keyed by integer-string indices rather than the array-of-arrays
+/// shape used by `company_tickers_mf.json`. Each entry has
+/// `cik_str`, `ticker`, `title`.
+pub fn parseStockTickerMap(allocator: std.mem.Allocator, json_bytes: []const u8) !TickerMap {
+    var out: TickerMap = .{
+        .map = .init(allocator),
+        .allocator = allocator,
+    };
+    errdefer out.deinit();
+
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_bytes, .{});
+    defer parsed.deinit();
+
+    const root = switch (parsed.value) {
+        .object => |o| o,
+        else => return error.InvalidTickerMap,
+    };
+
+    var it = root.iterator();
+    while (it.next()) |entry| {
+        const obj = switch (entry.value_ptr.*) {
+            .object => |o| o,
+            else => continue,
+        };
+        const cik_n = switch (obj.get("cik_str") orelse continue) {
+            .integer => |n| n,
+            else => continue,
+        };
+        const symbol = switch (obj.get("ticker") orelse continue) {
+            .string => |s| s,
+            else => continue,
+        };
+        const title = if (obj.get("title")) |v| switch (v) {
+            .string => |s| s,
+            else => null,
+        } else null;
+
+        // CIKs are normalized to 10-digit zero-padded strings at
+        // every boundary. Wikidata's P5531 uses this convention, so
+        // downstream merge logic can join on the same key shape.
+        // EDGAR ticker-map JSON delivers them as bare integers, so
+        // we pad here. Cast to u64 first because signed `{d:0>10}`
+        // reserves a slot for the sign character and produces
+        // "0000+36405".
+        const cik_str = try std.fmt.allocPrint(allocator, "{d:0>10}", .{@as(u64, @intCast(cik_n))});
+        errdefer allocator.free(cik_str);
+        const symbol_owned = try allocator.dupe(u8, symbol);
+        errdefer allocator.free(symbol_owned);
+        const title_owned = if (title) |t| try allocator.dupe(u8, t) else null;
+        errdefer if (title_owned) |t| allocator.free(t);
+
+        const gop = try out.map.getOrPut(symbol_owned);
+        if (gop.found_existing) {
+            allocator.free(symbol_owned);
+            allocator.free(cik_str);
+            if (title_owned) |t| allocator.free(t);
+            continue;
+        }
+        gop.value_ptr.* = .{
+            .cik = cik_str,
+            .series_id = null,
+            .class_id = null,
+            .title = title_owned,
+        };
+    }
+    return out;
+}
+
+/// Lightweight summary of a CIK's `submissions/CIK*.json` feed.
+/// Pulls out the four fields callers need (entity_name, entity_type,
+/// sic_description, latest_nport_p_url) so they can branch without
+/// re-parsing the full JSON. All owned strings allocated by the
+/// caller's allocator; caller must free via `deinit`.
+pub const SubmissionsSummary = struct {
+    entity_name: ?[]u8 = null,
+    entity_type: ?[]u8 = null,
+    sic_description: ?[]u8 = null,
+    /// URL to the most-recent NPORT-P primary_doc.xml, if any.
+    latest_nport_p_url: ?[]u8 = null,
+
+    pub fn deinit(self: *SubmissionsSummary, allocator: std.mem.Allocator) void {
+        if (self.entity_name) |s| allocator.free(s);
+        if (self.entity_type) |s| allocator.free(s);
+        if (self.sic_description) |s| allocator.free(s);
+        if (self.latest_nport_p_url) |s| allocator.free(s);
+    }
+};
+
+fn parseSubmissionsFeed(
+    allocator: std.mem.Allocator,
+    json_bytes: []const u8,
+    cik: []const u8,
+) !SubmissionsSummary {
+    var out: SubmissionsSummary = .{};
+    errdefer out.deinit(allocator);
+
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_bytes, .{});
+    defer parsed.deinit();
+
+    const root = switch (parsed.value) {
+        .object => |o| o,
+        else => return out,
+    };
+    if (root.get("name")) |v| switch (v) {
+        .string => |s| out.entity_name = try allocator.dupe(u8, s),
+        else => {},
+    };
+    if (root.get("entityType")) |v| switch (v) {
+        .string => |s| out.entity_type = try allocator.dupe(u8, s),
+        else => {},
+    };
+    if (root.get("sicDescription")) |v| switch (v) {
+        .string => |s| if (s.len > 0) {
+            out.sic_description = try allocator.dupe(u8, s);
+        },
+        else => {},
+    };
+
+    out.latest_nport_p_url = try findNportPUrlInSubmissions(allocator, root, cik);
+    return out;
+}
+
+/// Shares-outstanding from EDGAR XBRL companyconcept endpoint.
+/// Sourced from the `dei:EntityCommonStockSharesOutstanding` concept,
+/// which the SEC's Document and Entity Information taxonomy mandates
+/// on the cover page of 10-K, 10-Q, 40-F, and similar forms.
+///
+/// The dei concept is preferred over `us-gaap:CommonStockSharesOutstanding`
+/// because it covers Canadian 40-F filers (e.g. Barrick Mining) that
+/// don't file under us-gaap. EU 20-F filers (e.g. BP) are still NOT
+/// covered — they use pure ifrs-full without dei tagging — so callers
+/// must tolerate `null` returns.
+///
+/// `value` is the share count from the most recent reporting period.
+/// `period_end` is the `end` date that count was reported as-of, in
+/// `YYYY-MM-DD` form. `form` is the SEC form name (`10-K`, `10-Q`,
+/// `40-F`, etc.) that supplied the number, useful for staleness
+/// reasoning ("a 10-Q is 3 months stale, a 40-F is 12 months stale").
+pub const SharesOutstanding = struct {
+    value: u64,
+    period_end: []u8, // owned
+    form: []u8, // owned
+
+    pub fn deinit(self: *SharesOutstanding, allocator: std.mem.Allocator) void {
+        allocator.free(self.period_end);
+        allocator.free(self.form);
+    }
+};
+
+/// Per-symbol shares-outstanding record, ready for SRF emission. Joins
+/// the bare `SharesOutstanding` fetch result (CIK-level) with caller-
+/// supplied `symbol` and `as_of` so each output row carries the full
+/// provenance needed by downstream merge logic.
+///
+/// The `source` field has no default — provenance is always emitted
+/// (per the project's source-pure invariant: every row in a shared
+/// classification file must self-identify which source produced it).
+pub const SharesRecord = struct {
+    symbol: []u8, // owned
+    shares_outstanding: u64,
+    period_end: []u8, // owned, YYYY-MM-DD
+    form: ?[]u8 = null, // owned (e.g. "10-Q", "40-F")
+    cik: []u8, // owned
+    as_of: []u8, // owned (date scraper ran)
+    source: []const u8, // no default
+
+    pub fn deinit(self: *SharesRecord, allocator: std.mem.Allocator) void {
+        allocator.free(self.symbol);
+        allocator.free(self.period_end);
+        if (self.form) |f| allocator.free(f);
+        allocator.free(self.cik);
+        allocator.free(self.as_of);
+    }
+};
+
+/// SRF-emit shape for the `profile` variant of an ETF metrics record.
+/// One per fund. Disjoint from the internal `EtfMetrics` struct, which
+/// holds the whole fund's data (profile + N sectors + M holdings) in
+/// nested arrays for parsing convenience.
+pub const EtfProfileRecord = struct {
+    symbol: []u8, // owned
+    series_name: ?[]u8 = null, // owned
+    cik: []u8, // owned
+    series_id: ?[]u8 = null, // owned
+    net_assets: ?f64 = null,
+    period_end: ?[]u8 = null, // owned, YYYY-MM-DD
+    as_of: []u8, // owned
+    source: []const u8, // no default
+
+    pub fn deinit(self: *EtfProfileRecord, allocator: std.mem.Allocator) void {
+        allocator.free(self.symbol);
+        if (self.series_name) |s| allocator.free(s);
+        allocator.free(self.cik);
+        if (self.series_id) |s| allocator.free(s);
+        if (self.period_end) |s| allocator.free(s);
+        allocator.free(self.as_of);
+    }
+};
+
+/// SRF-emit shape for the `sector` variant. One per (assetCat,
+/// issuerCat) pair within a fund. The `code` field is the raw
+/// NPORT-P abbreviation; `description` is the human-readable
+/// translation per `sectorDescription`.
+pub const EtfSectorRecord = struct {
+    symbol: []u8, // owned
+    code: []u8, // owned, e.g. "EC/CORP"
+    description: []u8, // owned, e.g. "Equity / Corporate"
+    pct_of_portfolio: f64,
+    as_of: []u8, // owned
+    source: []const u8, // no default
+
+    pub fn deinit(self: *EtfSectorRecord, allocator: std.mem.Allocator) void {
+        allocator.free(self.symbol);
+        allocator.free(self.code);
+        allocator.free(self.description);
+        allocator.free(self.as_of);
+    }
+};
+
+/// SRF-emit shape for the `holding` variant. One per top-N holding
+/// retained from NPORT-P. Carries the full identifier inventory so
+/// downstream display can prefer ticker > cusip > lei without
+/// refetching.
+pub const EtfHoldingRecord = struct {
+    symbol: []u8, // owned; the FUND's symbol
+    name: []u8, // owned; holding's company / instrument name
+    ticker: ?[]u8 = null, // owned
+    cusip: ?[]u8 = null, // owned
+    lei: ?[]u8 = null, // owned
+    country: ?[]u8 = null, // owned, ISO-3166 alpha-2
+    pct_of_portfolio: f64,
+    as_of: []u8, // owned
+    source: []const u8, // no default
+
+    pub fn deinit(self: *EtfHoldingRecord, allocator: std.mem.Allocator) void {
+        allocator.free(self.symbol);
+        allocator.free(self.name);
+        if (self.ticker) |s| allocator.free(s);
+        if (self.cusip) |s| allocator.free(s);
+        if (self.lei) |s| allocator.free(s);
+        if (self.country) |s| allocator.free(s);
+        allocator.free(self.as_of);
+    }
+};
+
+/// Tagged union covering all three rows of `etf_metrics.srf`. SRF's
+/// default `type` discriminator is what we want, so no `srf_tag_field`
+/// override is declared. Builders (in `main.zig`) construct the slice
+/// by appending one `.profile` then N `.sector` then M `.holding`
+/// variants per fund.
+pub const EtfMetricRecord = union(enum) {
+    profile: EtfProfileRecord,
+    sector: EtfSectorRecord,
+    holding: EtfHoldingRecord,
+
+    pub fn deinit(self: *EtfMetricRecord, allocator: std.mem.Allocator) void {
+        switch (self.*) {
+            .profile => |*r| r.deinit(allocator),
+            .sector => |*r| r.deinit(allocator),
+            .holding => |*r| r.deinit(allocator),
+        }
+    }
+};
+
+/// Decompose one fund's internal `EtfMetrics` struct into the SRF-
+/// emit-shaped union slice. Appends one `.profile` variant then N
+/// `.sector` variants then M `.holding` variants to `out`. All
+/// strings on the resulting union values are freshly duped so the
+/// caller can deinit `metrics` independently. Caller owns the
+/// appended union values and must deinit them.
+pub fn appendEtfMetricRecords(
+    allocator: std.mem.Allocator,
+    out: *std.ArrayList(EtfMetricRecord),
+    metrics: EtfMetrics,
+) !void {
+    try out.append(allocator, .{ .profile = .{
+        .symbol = try allocator.dupe(u8, metrics.symbol),
+        .series_name = if (metrics.series_name) |s| try allocator.dupe(u8, s) else null,
+        .cik = try allocator.dupe(u8, metrics.cik),
+        .series_id = if (metrics.series_id) |s| try allocator.dupe(u8, s) else null,
+        .net_assets = metrics.net_assets,
+        .period_end = if (metrics.period_end) |s| try allocator.dupe(u8, s) else null,
+        .as_of = try allocator.dupe(u8, metrics.as_of),
+        .source = "edgar",
+    } });
+    for (metrics.sectors) |s| {
+        try out.append(allocator, .{ .sector = .{
+            .symbol = try allocator.dupe(u8, metrics.symbol),
+            .code = try allocator.dupe(u8, s.code),
+            .description = try allocator.dupe(u8, s.description),
+            .pct_of_portfolio = s.pct_of_portfolio,
+            .as_of = try allocator.dupe(u8, metrics.as_of),
+            .source = "edgar",
+        } });
+    }
+    for (metrics.holdings) |h| {
+        try out.append(allocator, .{ .holding = .{
+            .symbol = try allocator.dupe(u8, metrics.symbol),
+            .name = try allocator.dupe(u8, h.name),
+            .ticker = if (h.ticker) |t| try allocator.dupe(u8, t) else null,
+            .cusip = if (h.cusip) |c| try allocator.dupe(u8, c) else null,
+            .lei = if (h.lei) |l| try allocator.dupe(u8, l) else null,
+            .country = if (h.country) |c| try allocator.dupe(u8, c) else null,
+            .pct_of_portfolio = h.pct_of_portfolio,
+            .as_of = try allocator.dupe(u8, metrics.as_of),
+            .source = "edgar",
+        } });
+    }
+}
+
+fn parseSharesOutstanding(
+    allocator: std.mem.Allocator,
+    json_bytes: []const u8,
+) !?SharesOutstanding {
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_bytes, .{});
+    defer parsed.deinit();
+
+    const root = switch (parsed.value) {
+        .object => |o| o,
+        else => return null,
+    };
+    const units = switch (root.get("units") orelse return null) {
+        .object => |o| o,
+        else => return null,
+    };
+    // The unit key is "shares". Defensive: take the first units
+    // entry whose array has at least one row.
+    var rows: []std.json.Value = &.{};
+    var unit_it = units.iterator();
+    while (unit_it.next()) |entry| {
+        switch (entry.value_ptr.*) {
+            .array => |a| if (a.items.len > 0) {
+                rows = a.items;
+                break;
+            },
+            else => continue,
+        }
+    }
+    if (rows.len == 0) return null;
+
+    // Pick the row with the latest `end` date. EDGAR usually returns
+    // them in chronological order but don't rely on that.
+    var best_idx: usize = 0;
+    var best_end: []const u8 = "";
+    for (rows, 0..) |row, i| {
+        const obj = switch (row) {
+            .object => |o| o,
+            else => continue,
+        };
+        const end = switch (obj.get("end") orelse continue) {
+            .string => |s| s,
+            else => continue,
+        };
+        if (std.mem.order(u8, end, best_end) == .gt) {
+            best_end = end;
+            best_idx = i;
+        }
+    }
+    if (best_end.len == 0) return null;
+
+    const obj = switch (rows[best_idx]) {
+        .object => |o| o,
+        else => return null,
+    };
+    const val_node = obj.get("val") orelse return null;
+    const val: u64 = switch (val_node) {
+        .integer => |n| if (n < 0) return null else @intCast(n),
+        .float => |f| if (f < 0) return null else @intFromFloat(f),
+        else => return null,
+    };
+    const form_str: []const u8 = switch (obj.get("form") orelse .null) {
+        .string => |s| s,
+        else => "",
+    };
+
+    return .{
+        .value = val,
+        .period_end = try allocator.dupe(u8, best_end),
+        .form = try allocator.dupe(u8, form_str),
+    };
+}
+
+fn findNportPUrlInSubmissions(
+    allocator: std.mem.Allocator,
+    root: std.json.ObjectMap,
+    cik: []const u8,
+) !?[]u8 {
+    const filings = switch (root.get("filings") orelse return null) {
+        .object => |o| o,
+        else => return null,
+    };
+    const recent = switch (filings.get("recent") orelse return null) {
+        .object => |o| o,
+        else => return null,
+    };
+    const forms = switch (recent.get("form") orelse return null) {
+        .array => |a| a.items,
+        else => return null,
+    };
+    const accessions = switch (recent.get("accessionNumber") orelse return null) {
+        .array => |a| a.items,
+        else => return null,
+    };
+    const dates = switch (recent.get("filingDate") orelse return null) {
+        .array => |a| a.items,
+        else => return null,
+    };
+
+    var best_idx: ?usize = null;
+    var best_date: []const u8 = "";
+    for (forms, 0..) |form, i| {
+        const fname = switch (form) {
+            .string => |s| s,
+            else => continue,
+        };
+        if (!std.mem.eql(u8, fname, "NPORT-P")) continue;
+        if (i >= dates.len) continue;
+        const fd = switch (dates[i]) {
+            .string => |s| s,
+            else => continue,
+        };
+        if (std.mem.order(u8, fd, best_date) == .gt) {
+            best_date = fd;
+            best_idx = i;
+        }
+    }
+    const idx = best_idx orelse return null;
+
+    const acc = switch (accessions[idx]) {
+        .string => |s| s,
+        else => return null,
+    };
+
+    var cik_no_zeros = cik;
+    while (cik_no_zeros.len > 1 and cik_no_zeros[0] == '0') cik_no_zeros = cik_no_zeros[1..];
+
+    var adsh_buf: std.ArrayList(u8) = .empty;
+    defer adsh_buf.deinit(allocator);
+    for (acc) |c| if (c != '-') try adsh_buf.append(allocator, c);
+
+    return try std.fmt.allocPrint(
+        allocator,
+        "https://www.sec.gov/Archives/edgar/data/{s}/{s}/primary_doc.xml",
+        .{ cik_no_zeros, adsh_buf.items },
+    );
+}
+
+/// Extract the most-recent filing URL from an EDGAR full-text search
+/// response. Used by `findLatestNportP` (series-keyed search). Lifted
+/// out so the same parser can be reused if we add more search calls.
+fn parseLatestNportPFromSearch(allocator: std.mem.Allocator, json_bytes: []const u8) !?[]u8 {
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_bytes, .{});
+    defer parsed.deinit();
+
+    const root = switch (parsed.value) {
+        .object => |o| o,
+        else => return null,
+    };
+    const hits_obj = switch (root.get("hits") orelse return null) {
+        .object => |o| o,
+        else => return null,
+    };
+    const hits_arr = switch (hits_obj.get("hits") orelse return null) {
+        .array => |a| a.items,
+        else => return null,
+    };
+    if (hits_arr.len == 0) return null;
+
+    var best_idx: usize = 0;
+    var best_date: []const u8 = "";
+    for (hits_arr, 0..) |hit, i| {
+        const hit_obj = switch (hit) {
+            .object => |o| o,
+            else => continue,
+        };
+        const src = switch (hit_obj.get("_source") orelse continue) {
+            .object => |o| o,
+            else => continue,
+        };
+        const fd = switch (src.get("file_date") orelse continue) {
+            .string => |s| s,
+            else => continue,
+        };
+        if (std.mem.order(u8, fd, best_date) == .gt) {
+            best_date = fd;
+            best_idx = i;
+        }
+    }
+
+    const best = switch (hits_arr[best_idx]) {
+        .object => |o| o,
+        else => return null,
+    };
+    const src = switch (best.get("_source") orelse return null) {
+        .object => |o| o,
+        else => return null,
+    };
+    const adsh = switch (src.get("adsh") orelse return null) {
+        .string => |s| s,
+        else => return null,
+    };
+    const ciks_arr = switch (src.get("ciks") orelse return null) {
+        .array => |a| a.items,
+        else => return null,
+    };
+    if (ciks_arr.len == 0) return null;
+    const cik_padded = switch (ciks_arr[0]) {
+        .string => |s| s,
+        else => return null,
+    };
+
+    var cik_no_zeros = cik_padded;
+    while (cik_no_zeros.len > 1 and cik_no_zeros[0] == '0') cik_no_zeros = cik_no_zeros[1..];
+
+    var adsh_buf = std.ArrayList(u8).empty;
+    defer adsh_buf.deinit(allocator);
+    for (adsh) |c| if (c != '-') try adsh_buf.append(allocator, c);
+
+    return try std.fmt.allocPrint(
+        allocator,
+        "https://www.sec.gov/Archives/edgar/data/{s}/{s}/primary_doc.xml",
+        .{ cik_no_zeros, adsh_buf.items },
+    );
+}
+
+/// Classify a CIK based on its submissions-feed metadata. Decides
+/// whether the symbol is a registered fund (probe NPORT-P), a
+/// trust/ETN-style instrument (profile-only), or a plain operating
+/// company (skip).
+///
+/// Decision rules — kept in one place because they're load-bearing
+/// for what `EtfMetricsResult` variant `fetchEtfMetrics` returns.
+/// Rules are based on observation across ~100 real symbols:
+///
+///   1. Has NPORT-P filing                    → fund_shaped.
+///      The presence of a NPORT-P is the unambiguous signal that
+///      the entity is a registered investment company. Catches all
+///      ETFs and mutual funds regardless of entityType / SIC.
+///
+///   2. entityType == "other" AND SIC indicates
+///      a securities issuer or commodity dealer → trust_shaped.
+///      Catches ETN issuers (Credit Suisse AG → SLVO), commodity
+///      brokers (some smaller commodity trusts), without a NPORT-P.
+///      Does NOT catch foreign issuers like BP/Barrick (entityType
+///      "other" but SIC is industry-specific, not securities-related).
+///
+///   3. entityType == "operating" AND SIC contains
+///      "Commodity"                           → trust_shaped.
+///      Catches commodity grantor trusts (GLD, SLV, IAU, GBTC).
+///      `entityType` is "operating" for these despite their
+///      trust-like nature — SEC classifies them as commodity-
+///      contracts brokers because they hold physical commodities.
+///
+///   4. otherwise                             → operating.
+///      Plain operating companies (AAPL, NFLX, BRK.B, BP, etc.).
+///      No fund records emitted; Wikidata covers their classification.
+///
+/// Note: REITs (e.g. Realty Income, O) are `operating` + SIC
+/// "Real Estate Investment Trusts". They are operating companies
+/// that distribute rental income, not registered investment
+/// companies. They get bucketed under `operating` — Wikidata is
+/// the right source for them.
+fn classifyByEntityType(sub: *const SubmissionsSummary) enum {
+    fund_shaped,
+    trust_shaped,
+    operating,
+} {
+    // Rule 1: NPORT-P presence is the strongest fund signal.
+    if (sub.latest_nport_p_url != null) return .fund_shaped;
+
+    const et = sub.entity_type orelse return .operating;
+    const sic_opt = sub.sic_description;
+
+    // Rule 2: securities issuers (ETN sponsor banks).
+    if (std.mem.eql(u8, et, "other")) {
+        if (sic_opt) |sic| {
+            const securities_hints = [_][]const u8{
+                "Security Brokers", // "Security Brokers, Dealers..."
+                "Commodity Contracts",
+                "Investment Trust", // explicit, not "Real Estate Investment Trusts"
+            };
+            for (securities_hints) |h| {
+                if (std.mem.indexOf(u8, sic, h) != null) return .trust_shaped;
+            }
+        }
+        return .operating;
+    }
+
+    // Rule 3: commodity grantor trusts classified as "operating".
+    if (std.mem.eql(u8, et, "operating")) {
+        if (sic_opt) |sic| {
+            if (std.mem.indexOf(u8, sic, "Commodity") != null) {
+                return .trust_shaped;
+            }
+        }
+    }
+    return .operating;
+}
+
+test "classifyByEntityType buckets real-world entities" {
+    const T = std.testing;
+
+    // SPY: NPORT-P present → fund_shaped (regardless of other fields).
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "other");
+        s.latest_nport_p_url = try T.allocator.dupe(u8, "https://example/primary_doc.xml");
+        try T.expectEqual(.fund_shaped, classifyByEntityType(&s));
+    }
+    // SLVO/GLDI/USOI issuer (Credit Suisse AG): no NPORT-P, "other"
+    // entityType, SIC = "Security Brokers..." → trust_shaped.
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "other");
+        s.sic_description = try T.allocator.dupe(u8, "Security Brokers, Dealers & Flotation Companies");
+        try T.expectEqual(.trust_shaped, classifyByEntityType(&s));
+    }
+    // BP plc: foreign issuer, "other" entityType, SIC = industry.
+    // Should be `operating`, not trust_shaped.
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "other");
+        s.sic_description = try T.allocator.dupe(u8, "Petroleum Refining");
+        try T.expectEqual(.operating, classifyByEntityType(&s));
+    }
+    // Barrick: same shape as BP.
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "other");
+        s.sic_description = try T.allocator.dupe(u8, "Gold and Silver Ores");
+        try T.expectEqual(.operating, classifyByEntityType(&s));
+    }
+    // GLD: "operating" entityType but SIC is commodity-contracts.
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "operating");
+        s.sic_description = try T.allocator.dupe(u8, "Commodity Contracts Brokers & Dealers");
+        try T.expectEqual(.trust_shaped, classifyByEntityType(&s));
+    }
+    // AAPL.
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "operating");
+        s.sic_description = try T.allocator.dupe(u8, "Electronic Computers");
+        try T.expectEqual(.operating, classifyByEntityType(&s));
+    }
+    // NFLX.
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "operating");
+        s.sic_description = try T.allocator.dupe(u8, "Services-Video Tape Rental");
+        try T.expectEqual(.operating, classifyByEntityType(&s));
+    }
+    // Realty Income (O): REIT, operating company.
+    {
+        var s: SubmissionsSummary = .{};
+        defer s.deinit(T.allocator);
+        s.entity_type = try T.allocator.dupe(u8, "operating");
+        s.sic_description = try T.allocator.dupe(u8, "Real Estate Investment Trusts");
+        try T.expectEqual(.operating, classifyByEntityType(&s));
+    }
+}
+
+/// Result kind for `fetchEtfMetrics`. The caller — see `main.zig` —
+/// distinguishes a full holdings record from a profile-only record so
+/// it can log the right thing and produce accurate coverage stats.
+pub const EtfMetricsResult = union(enum) {
+    /// Full NPORT-P parse with holdings + sectors.
+    full: EtfMetrics,
+    /// Submissions-feed metadata only. Used for unit-investment trusts
+    /// that file 10-K instead of NPORT-P (commodity trusts like GLD,
+    /// some grantor trusts).
+    profile_only: EtfMetrics,
+    /// Symbol is in the stock-ticker map but is a plain operating
+    /// company (AAPL, MSFT, …). Not a fund. Caller should skip.
+    not_a_fund: void,
+    /// Symbol isn't in either ticker map. Caller should skip.
+    not_in_edgar: void,
+};
+
+/// Construct an EtfMetrics record from submissions-feed metadata
+/// alone, with no holdings or sectors. Used for trust entities (e.g.
+/// commodity trusts) that lack a NPORT-P filing but for which we
+/// still want to surface name + CIK in `etf_metrics.srf`.
+fn buildProfileOnlyMetrics(
+    io: std.Io,
+    allocator: std.mem.Allocator,
+    entry: *const TickerEntry,
+    sub: *const SubmissionsSummary,
+    symbol: []const u8,
+) !EtfMetrics {
+    var as_of_buf: [10]u8 = undefined;
+    const today_date = fmt.todayDate(io);
+    const as_of = try std.fmt.bufPrint(&as_of_buf, "{f}", .{today_date});
+
+    // Prefer the submissions-feed name (canonical, "SPDR GOLD TRUST")
+    // over the company_tickers.json title (less authoritative).
+    const name_src: ?[]const u8 = sub.entity_name orelse entry.title;
+    const series_name: ?[]u8 = if (name_src) |n| try allocator.dupe(u8, n) else null;
+    errdefer if (series_name) |s| allocator.free(s);
+
+    return .{
+        .symbol = try allocator.dupe(u8, symbol),
+        .series_name = series_name,
+        .cik = try allocator.dupe(u8, entry.cik),
+        .series_id = null,
+        .net_assets = null,
+        .period_end = null,
+        .as_of = try allocator.dupe(u8, as_of),
+        .holdings = &.{},
+        .sectors = &.{},
+    };
+}
+
+/// Parse N-PORT-P bytes into an EtfMetrics struct. Heavy XML — we use
+/// the vendored `xml.zig` DOM parser.
+fn parseNportP(
+    io: std.Io,
+    allocator: std.mem.Allocator,
+    xml_bytes: []const u8,
+    symbol: []const u8,
+    entry: *const TickerEntry,
+    top_n_holdings: usize,
+) !EtfMetrics {
+    var as_of_buf: [10]u8 = undefined;
+    const today_date = fmt.todayDate(io);
+    const as_of = try std.fmt.bufPrint(&as_of_buf, "{f}", .{today_date});
+
+    var doc = try xml.parse(allocator, xml_bytes);
+    defer doc.deinit();
+    const root = doc.root;
+
+    // Walk: edgarSubmission > formData > genInfo and fundInfo.
+    const form_data = (try root.findChildByTag("formData")) orelse return error.MissingFormData;
+    const gen_info = try form_data.findChildByTag("genInfo");
+    const fund_info = try form_data.findChildByTag("fundInfo");
+    const invst_or_secs = try form_data.findChildByTag("invstOrSecs");
+
+    var series_name: ?[]const u8 = null;
+    var period_end: ?[]const u8 = null;
+    if (gen_info) |gi| {
+        if (try gi.findChildByTag("seriesName")) |e| {
+            if (e.children.items.len > 0) {
+                if (e.children.items[0] == .CharData) {
+                    const sn = e.children.items[0].CharData;
+                    // Single-series trusts (SPY, IVV, …) write
+                    // "N/A" here — drop it so we fall through to the
+                    // ticker-map title below.
+                    if (!std.mem.eql(u8, sn, "N/A") and sn.len > 0) {
+                        series_name = try allocator.dupe(u8, sn);
+                    }
+                }
+            }
+        }
+    }
+    // Fall back to the ticker-map title (e.g. "SPDR S&P 500 ETF Trust"
+    // for SPY) if NPORT-P didn't supply a useful series name. The
+    // title comes from `company_tickers.json` for stock-map entries.
+    if (series_name == null) {
+        if (entry.title) |t| {
+            series_name = try allocator.dupe(u8, t);
+        }
+    }
+    if (gen_info) |gi| {
+        if (try gi.findChildByTag("repPdEnd")) |e| {
+            if (e.children.items.len > 0) {
+                if (e.children.items[0] == .CharData) {
+                    period_end = try allocator.dupe(u8, e.children.items[0].CharData);
+                }
+            }
+        }
+    }
+
+    var net_assets: ?f64 = null;
+    if (fund_info) |fi| {
+        if (try fi.findChildByTag("netAssets")) |e| {
+            if (e.children.items.len > 0) {
+                if (e.children.items[0] == .CharData) {
+                    net_assets = std.fmt.parseFloat(f64, e.children.items[0].CharData) catch null;
+                }
+            }
+        }
+    }
+
+    // Holdings + sector breakdown.
+    var holdings_list: std.ArrayList(Holding) = .empty;
+    errdefer {
+        for (holdings_list.items) |h| {
+            allocator.free(h.name);
+            if (h.ticker) |t| allocator.free(t);
+            if (h.cusip) |c| allocator.free(c);
+            if (h.lei) |l| allocator.free(l);
+            if (h.country) |c| allocator.free(c);
+        }
+        holdings_list.deinit(allocator);
+    }
+
+    // Sector aggregation: assetCat × issuerCat → cumulative weight
+    var sector_map: std.StringHashMap(f64) = .init(allocator);
+    defer {
+        var it = sector_map.iterator();
+        while (it.next()) |entry2| allocator.free(entry2.key_ptr.*);
+        sector_map.deinit();
+    }
+
+    if (invst_or_secs) |secs| {
+        for (secs.children.items) |child| {
+            if (child != .Element) continue;
+            const sec = child.Element;
+            if (!std.mem.eql(u8, sec.tag, "invstOrSec")) continue;
+
+            const name_text = elementText(sec, "name") orelse continue;
+            const pct_text = elementText(sec, "pctVal") orelse continue;
+            const pct = std.fmt.parseFloat(f64, pct_text) catch continue;
+
+            try holdings_list.append(allocator, .{
+                .name = try allocator.dupe(u8, name_text),
+                .ticker = if (elementAttrValue(sec, "identifiers", "ticker")) |t| try allocator.dupe(u8, t) else null,
+                .cusip = if (elementText(sec, "cusip")) |c| try allocator.dupe(u8, c) else null,
+                .lei = if (elementTextOptional(sec, "lei")) |l| try allocator.dupe(u8, l) else null,
+                .country = if (elementTextOptional(sec, "invCountry")) |c| try allocator.dupe(u8, c) else null,
+                .pct_of_portfolio = pct,
+            });
+
+            // Aggregate by (assetCat, issuerCat).
+            const asset_cat = elementText(sec, "assetCat") orelse "?";
+            const issuer_cat = elementText(sec, "issuerCat") orelse "?";
+            const key = try std.fmt.allocPrint(allocator, "{s}/{s}", .{ asset_cat, issuer_cat });
+            const gop = try sector_map.getOrPut(key);
+            if (gop.found_existing) {
+                allocator.free(key);
+                gop.value_ptr.* += pct;
+            } else {
+                gop.value_ptr.* = pct;
+            }
+        }
+    }
+
+    // Top N holdings by pct_of_portfolio.
+    const all_holdings = try holdings_list.toOwnedSlice(allocator);
+    std.mem.sort(Holding, all_holdings, {}, struct {
+        fn gt(_: void, a: Holding, b: Holding) bool {
+            return a.pct_of_portfolio > b.pct_of_portfolio;
+        }
+    }.gt);
+
+    const keep = @min(all_holdings.len, top_n_holdings);
+    const top = try allocator.alloc(Holding, keep);
+    for (all_holdings[0..keep], 0..) |h, i| top[i] = h;
+    // Free the rest.
+    for (all_holdings[keep..]) |h| {
+        allocator.free(h.name);
+        if (h.ticker) |t| allocator.free(t);
+        if (h.cusip) |c| allocator.free(c);
+        if (h.lei) |l| allocator.free(l);
+        if (h.country) |c| allocator.free(c);
+    }
+    allocator.free(all_holdings);
+
+    // Sector list.
+    var sectors_list: std.ArrayList(SectorWeight) = .empty;
+    errdefer sectors_list.deinit(allocator);
+    var s_it = sector_map.iterator();
+    while (s_it.next()) |s_entry| {
+        const code = s_entry.key_ptr.*;
+        try sectors_list.append(allocator, .{
+            .code = try allocator.dupe(u8, code),
+            .description = try allocator.dupe(u8, sectorDescription(code)),
+            .pct_of_portfolio = s_entry.value_ptr.*,
+        });
+    }
+    const sectors = try sectors_list.toOwnedSlice(allocator);
+    std.mem.sort(SectorWeight, sectors, {}, struct {
+        fn gt(_: void, a: SectorWeight, b: SectorWeight) bool {
+            return a.pct_of_portfolio > b.pct_of_portfolio;
+        }
+    }.gt);
+
+    return .{
+        .symbol = try allocator.dupe(u8, symbol),
+        .series_name = series_name,
+        .cik = try allocator.dupe(u8, entry.cik),
+        .series_id = if (entry.series_id) |sid| try allocator.dupe(u8, sid) else null,
+        .net_assets = net_assets,
+        .period_end = period_end,
+        .as_of = try allocator.dupe(u8, as_of),
+        .holdings = top,
+        .sectors = sectors,
+    };
+}
+
+/// Translation table for NPORT-P `assetCat/issuerCat` codes. The
+/// values are the SEC's own form-instruction abbreviations; the
+/// descriptions are condensed-but-accurate human readings used to
+/// populate `SectorWeight.description`.
+///
+/// Coverage targets the codes observed across a representative
+/// real-world portfolio (~32 distinct codes seen across stock /
+/// bond / blended ETFs). Unrecognized codes round-trip raw (the
+/// lookup falls back to the code itself) so unknowns surface for
+/// table extension rather than silently corrupting downstream
+/// classification.
+///
+/// AssetCat values per SEC form instructions:
+///   EC       Equity (common)        DE       Derivative
+///   EP       Equity Preferred       DFE      Derivative — Foreign Exchange
+///   DBT      Debt                   DIR      Direct Investment in Real Property
+///   ABS-MBS  Asset-Backed Mortgage  DCR      Direct Credit Risk
+///   ABS-O    Asset-Backed Other     LON      Loan
+///   ABS-CBDO Asset-Backed CBO/CDO   STIV     Short-Term Investment Vehicle
+///   RA       Repurchase Agreement   ?        Other / Unknown
+///
+/// IssuerCat values per SEC form instructions:
+///   CORP   Corporate              MUN    Municipal
+///   UST    US Treasury            NUSS   Non-US Sovereign
+///   USGA   US Government Agency   RF     Registered Fund
+///   USGSE  US Government-Sponsored Enterprise
+///   PF     Private Fund           ?      Other / Unknown
+const sector_code_descriptions = [_]struct {
+    code: []const u8,
+    description: []const u8,
+}{
+    // Equity (common)
+    .{ .code = "EC/CORP", .description = "Equity / Corporate" },
+    .{ .code = "EC/RF", .description = "Equity / Registered Fund" },
+    .{ .code = "EC/NUSS", .description = "Equity / Non-US Sovereign" },
+    .{ .code = "EC/?", .description = "Equity / Other" },
+
+    // Equity preferred
+    .{ .code = "EP/CORP", .description = "Equity Preferred / Corporate" },
+    .{ .code = "EP/NUSS", .description = "Equity Preferred / Non-US Sovereign" },
+
+    // Debt
+    .{ .code = "DBT/UST", .description = "Debt / US Treasury" },
+    .{ .code = "DBT/USGA", .description = "Debt / US Gov Agency" },
+    .{ .code = "DBT/USGSE", .description = "Debt / US GSE" },
+    .{ .code = "DBT/CORP", .description = "Debt / Corporate" },
+    .{ .code = "DBT/MUN", .description = "Debt / Municipal" },
+    .{ .code = "DBT/NUSS", .description = "Debt / Non-US Sovereign" },
+    .{ .code = "DBT/?", .description = "Debt / Other" },
+
+    // Asset-backed
+    .{ .code = "ABS-MBS/USGSE", .description = "Asset-Backed / US GSE Mortgage" },
+    .{ .code = "ABS-MBS/USGA", .description = "Asset-Backed / US Gov Agency Mortgage" },
+    .{ .code = "ABS-MBS/CORP", .description = "Asset-Backed / Corporate Mortgage" },
+    .{ .code = "ABS-O/CORP", .description = "Asset-Backed Other / Corporate" },
+    .{ .code = "ABS-CBDO/CORP", .description = "Asset-Backed CBO/CDO / Corporate" },
+
+    // Repurchase agreements
+    .{ .code = "RA/CORP", .description = "Repurchase Agreement / Corporate" },
+    .{ .code = "RA/?", .description = "Repurchase Agreement / Other" },
+
+    // Loans
+    .{ .code = "LON/CORP", .description = "Loan / Corporate" },
+
+    // Short-term investment vehicles
+    .{ .code = "STIV/CORP", .description = "Short-Term Investment Vehicle / Corporate" },
+    .{ .code = "STIV/RF", .description = "Short-Term Investment Vehicle / Registered Fund" },
+    .{ .code = "STIV/PF", .description = "Short-Term Investment Vehicle / Private Fund" },
+
+    // Derivatives
+    .{ .code = "DE/CORP", .description = "Derivative / Corporate" },
+    .{ .code = "DE/?", .description = "Derivative / Other" },
+    .{ .code = "DFE/CORP", .description = "Derivative-FX / Corporate" },
+    .{ .code = "DFE/?", .description = "Derivative-FX / Other" },
+
+    // Direct investment / direct credit risk
+    .{ .code = "DIR/?", .description = "Direct Real Property / Other" },
+    .{ .code = "DCR/?", .description = "Direct Credit Risk / Other" },
+
+    // Catch-all unknowns. We translate "?/X" to a more readable
+    // shape but preserve the structure (issuer is known even if
+    // asset class isn't).
+    .{ .code = "?/CORP", .description = "Other / Corporate" },
+    .{ .code = "?/?", .description = "Other / Other" },
+};
+
+/// Look up an NPORT-P sector code's human-readable description. For
+/// unknown codes returns the code itself, so the caller can always
+/// `dupe` the result without conditional handling.
+pub fn sectorDescription(code: []const u8) []const u8 {
+    for (sector_code_descriptions) |entry| {
+        if (std.mem.eql(u8, entry.code, code)) return entry.description;
+    }
+    return code;
+}
+
+fn elementText(parent: *xml.Element, tag: []const u8) ?[]const u8 {
+    const child = (parent.findChildByTag(tag) catch return null) orelse return null;
+    if (child.children.items.len == 0) return null;
+    if (child.children.items[0] != .CharData) return null;
+    return child.children.items[0].CharData;
+}
+
+/// Read the `value` attribute of a child element identified by `tag`,
+/// optionally nested inside `parent_tag` (use `null` for direct
+/// children). Returns null when either path step fails. Used to pull
+/// `<identifiers><ticker value="AGX"/></identifiers>` out of NPORT-P
+/// holding records, where the ticker is encoded as an attribute on a
+/// nested element rather than as text content.
+fn elementAttrValue(parent: *xml.Element, parent_tag: ?[]const u8, tag: []const u8) ?[]const u8 {
+    const container: *xml.Element = if (parent_tag) |pt|
+        (parent.findChildByTag(pt) catch return null) orelse return null
+    else
+        parent;
+    const child = (container.findChildByTag(tag) catch return null) orelse return null;
+    return child.getAttribute("value");
+}
+
+/// Like `elementText` but treats NPORT-P's `"N/A"` sentinel and the
+/// empty string as absent. NPORT-P uses literal `"N/A"` for missing
+/// LEIs on issuers without one, and empty `<invCountry/>` for
+/// holdings whose country can't be determined; both should round-trip
+/// as null in Zig.
+fn elementTextOptional(parent: *xml.Element, tag: []const u8) ?[]const u8 {
+    const text = elementText(parent, tag) orelse return null;
+    if (text.len == 0) return null;
+    if (std.mem.eql(u8, text, "N/A")) return null;
+    return text;
+}
+
+test "parseTickerMap parses fixture rows" {
+    const fixture =
+        \\{"fields":["cik","seriesId","classId","symbol"],"data":[
+        \\  [36405,"S000002848","C000007808","VTI"],
+        \\  [1100663,"S000004362","C000012092","AGG"]
+        \\]}
+    ;
+    const allocator = std.testing.allocator;
+    var map = try parseTickerMap(allocator, fixture);
+    defer map.deinit();
+
+    const vti = map.map.get("VTI") orelse return error.TestFailed;
+    try std.testing.expectEqualStrings("0000036405", vti.cik);
+    try std.testing.expectEqualStrings("S000002848", vti.series_id orelse return error.TestFailed);
+
+    const agg = map.map.get("AGG") orelse return error.TestFailed;
+    try std.testing.expectEqualStrings("0001100663", agg.cik);
+}
+
+test "parseStockTickerMap parses fixture" {
+    const fixture =
+        \\{
+        \\  "0":{"cik_str":78462,"ticker":"SPY","title":"SPDR S&P 500 ETF Trust"},
+        \\  "1":{"cik_str":1222333,"ticker":"GLD","title":"SPDR GOLD TRUST"}
+        \\}
+    ;
+    const allocator = std.testing.allocator;
+    var map = try parseStockTickerMap(allocator, fixture);
+    defer map.deinit();
+
+    const spy = map.map.get("SPY") orelse return error.TestFailed;
+    try std.testing.expectEqualStrings("0000078462", spy.cik);
+    try std.testing.expect(spy.series_id == null);
+    try std.testing.expectEqualStrings("SPDR S&P 500 ETF Trust", spy.title orelse return error.TestFailed);
+
+    const gld = map.map.get("GLD") orelse return error.TestFailed;
+    try std.testing.expectEqualStrings("0001222333", gld.cik);
+}
+
+test "sectorDescription translates known codes and round-trips unknown" {
+    // Known codes get translated.
+    try std.testing.expectEqualStrings("Equity / Corporate", sectorDescription("EC/CORP"));
+    try std.testing.expectEqualStrings("Debt / US Treasury", sectorDescription("DBT/UST"));
+    try std.testing.expectEqualStrings("Asset-Backed / US GSE Mortgage", sectorDescription("ABS-MBS/USGSE"));
+
+    // Codes added to cover real-world NPORT-P output.
+    try std.testing.expectEqualStrings("Debt / Municipal", sectorDescription("DBT/MUN"));
+    try std.testing.expectEqualStrings("Short-Term Investment Vehicle / Registered Fund", sectorDescription("STIV/RF"));
+    try std.testing.expectEqualStrings("Repurchase Agreement / Corporate", sectorDescription("RA/CORP"));
+    try std.testing.expectEqualStrings("Other / Other", sectorDescription("?/?"));
+
+    // Unknown codes round-trip raw so future additions surface for
+    // table extension rather than getting silently mistranslated.
+    try std.testing.expectEqualStrings("MADE/UPCODE", sectorDescription("MADE/UPCODE"));
+    try std.testing.expectEqualStrings("", sectorDescription(""));
+}
+
+test "parseNportP holdings: ticker/lei/country populated when present" {
+    const allocator = std.testing.allocator;
+    // Minimal NPORT-P fixture covering the holding-identifier shapes.
+    // Two holdings: first has all identifiers, second is bare-bones
+    // with the "N/A" LEI sentinel and an empty <invCountry/>.
+    const xml_fixture =
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<edgarSubmission>
+        \\  <formData>
+        \\    <invstOrSecs>
+        \\      <invstOrSec>
+        \\        <name>Argan Inc</name>
+        \\        <lei>529900E4KZWBV9KGBS83</lei>
+        \\        <cusip>04010E109</cusip>
+        \\        <identifiers>
+        \\          <isin value="US04010E1091"/>
+        \\          <ticker value="AGX"/>
+        \\        </identifiers>
+        \\        <pctVal>4.89</pctVal>
+        \\        <assetCat>EC</assetCat>
+        \\        <issuerCat>CORP</issuerCat>
+        \\        <invCountry>US</invCountry>
+        \\      </invstOrSec>
+        \\      <invstOrSec>
+        \\        <name>Mystery Bond</name>
+        \\        <lei>N/A</lei>
+        \\        <cusip>000000000</cusip>
+        \\        <pctVal>0.50</pctVal>
+        \\        <assetCat>DBT</assetCat>
+        \\        <issuerCat>CORP</issuerCat>
+        \\        <invCountry></invCountry>
+        \\      </invstOrSec>
+        \\    </invstOrSecs>
+        \\  </formData>
+        \\</edgarSubmission>
+    ;
+    const entry = TickerEntry{
+        .cik = "0000000000",
+        .series_id = null,
+        .class_id = null,
+        .title = null,
+    };
+    var metrics = try parseNportP(std.testing.io, allocator, xml_fixture, "TEST", &entry, 10);
+    defer metrics.deinit(allocator);
+
+    try std.testing.expectEqual(@as(usize, 2), metrics.holdings.len);
+
+    // Holdings are sorted by pct descending — Argan first.
+    const argan = metrics.holdings[0];
+    try std.testing.expectEqualStrings("Argan Inc", argan.name);
+    try std.testing.expectEqualStrings("AGX", argan.ticker orelse return error.TickerMissing);
+    try std.testing.expectEqualStrings("04010E109", argan.cusip orelse return error.CusipMissing);
+    try std.testing.expectEqualStrings("529900E4KZWBV9KGBS83", argan.lei orelse return error.LeiMissing);
+    try std.testing.expectEqualStrings("US", argan.country orelse return error.CountryMissing);
+
+    // Mystery Bond: no <ticker>, "N/A" lei, empty <invCountry/>.
+    const mystery = metrics.holdings[1];
+    try std.testing.expectEqualStrings("Mystery Bond", mystery.name);
+    try std.testing.expect(mystery.ticker == null);
+    try std.testing.expect(mystery.lei == null);
+    try std.testing.expect(mystery.country == null);
+    try std.testing.expectEqualStrings("000000000", mystery.cusip orelse return error.CusipMissing);
+}
+
+test "appendEtfMetricRecords decomposes one fund into profile + sectors + holdings" {
+    const allocator = std.testing.allocator;
+
+    // Build a minimal EtfMetrics by hand. Strings are owned; deinit
+    // matches what `parseNportP` would do.
+    const sectors = try allocator.alloc(SectorWeight, 2);
+    sectors[0] = .{
+        .code = try allocator.dupe(u8, "EC/CORP"),
+        .description = try allocator.dupe(u8, "Equity / Corporate"),
+        .pct_of_portfolio = 98.5,
+    };
+    sectors[1] = .{
+        .code = try allocator.dupe(u8, "STIV/CORP"),
+        .description = try allocator.dupe(u8, "Short-Term Investment Vehicle / Corporate"),
+        .pct_of_portfolio = 1.5,
+    };
+    const holdings = try allocator.alloc(Holding, 3);
+    holdings[0] = .{
+        .name = try allocator.dupe(u8, "Apple Inc"),
+        .ticker = try allocator.dupe(u8, "AAPL"),
+        .cusip = try allocator.dupe(u8, "037833100"),
+        .lei = null,
+        .country = try allocator.dupe(u8, "US"),
+        .pct_of_portfolio = 7.0,
+    };
+    holdings[1] = .{
+        .name = try allocator.dupe(u8, "Microsoft Corp"),
+        .ticker = try allocator.dupe(u8, "MSFT"),
+        .cusip = try allocator.dupe(u8, "594918104"),
+        .lei = null,
+        .country = try allocator.dupe(u8, "US"),
+        .pct_of_portfolio = 6.0,
+    };
+    holdings[2] = .{
+        .name = try allocator.dupe(u8, "NVIDIA Corp"),
+        .ticker = try allocator.dupe(u8, "NVDA"),
+        .cusip = try allocator.dupe(u8, "67066G104"),
+        .lei = null,
+        .country = try allocator.dupe(u8, "US"),
+        .pct_of_portfolio = 5.0,
+    };
+    var metrics = EtfMetrics{
+        .symbol = try allocator.dupe(u8, "VTI"),
+        .series_name = try allocator.dupe(u8, "VANGUARD TOTAL STOCK MARKET INDEX FUND"),
+        .cik = try allocator.dupe(u8, "0000036405"),
+        .series_id = try allocator.dupe(u8, "S000002848"),
+        .net_assets = 2_000_000_000_000.0,
+        .period_end = try allocator.dupe(u8, "2025-12-31"),
+        .as_of = try allocator.dupe(u8, "2026-05-25"),
+        .holdings = holdings,
+        .sectors = sectors,
+    };
+    defer metrics.deinit(allocator);
+
+    var out: std.ArrayList(EtfMetricRecord) = .empty;
+    defer {
+        for (out.items) |*r| r.deinit(allocator);
+        out.deinit(allocator);
+    }
+
+    try appendEtfMetricRecords(allocator, &out, metrics);
+
+    // Expect 1 profile + 2 sectors + 3 holdings = 6 records.
+    try std.testing.expectEqual(@as(usize, 6), out.items.len);
+
+    // First is profile.
+    try std.testing.expect(out.items[0] == .profile);
+    try std.testing.expectEqualStrings("VTI", out.items[0].profile.symbol);
+    try std.testing.expectEqualStrings("0000036405", out.items[0].profile.cik);
+    try std.testing.expectEqualStrings("S000002848", out.items[0].profile.series_id orelse return error.SeriesIdMissing);
+
+    // Next two are sectors.
+    try std.testing.expect(out.items[1] == .sector);
+    try std.testing.expectEqualStrings("EC/CORP", out.items[1].sector.code);
+    try std.testing.expectEqualStrings("Equity / Corporate", out.items[1].sector.description);
+    try std.testing.expect(out.items[2] == .sector);
+
+    // Last three are holdings.
+    try std.testing.expect(out.items[3] == .holding);
+    try std.testing.expect(out.items[4] == .holding);
+    try std.testing.expect(out.items[5] == .holding);
+    try std.testing.expectEqualStrings("AAPL", out.items[3].holding.ticker orelse return error.TickerMissing);
+    try std.testing.expectEqualStrings("VTI", out.items[3].holding.symbol); // fund symbol, not holding's
+}
diff --git a/src/providers/Wikidata.zig b/src/providers/Wikidata.zig
new file mode 100644
index 0000000..e8eb3f0
--- /dev/null
+++ b/src/providers/Wikidata.zig
@@ -0,0 +1,618 @@
+//! Wikidata SPARQL classification provider.
+//!
+//! ## What this provider does
+//!
+//! Given a stock symbol, Wikidata can answer:
+//!
+//!   * "What kind of entity is this?" — name, industry, sector,
+//!     country of incorporation, inception date, instance-of
+//!     classification (operating company / mutual fund / ETF / …).
+//!   * "Does this match the SEC's CIK?" — Wikidata's P5531 already
+//!     stores the 10-digit zero-padded CIK matching SEC's convention.
+//!
+//! ## Workflow
+//!
+//! `fetch(symbols)` runs ONE batched SPARQL query that returns
+//! per-ticker rows. The query is keyed on the US-listing (NYSE /
+//! Nasdaq / NYSE Arca / OTC Markets) of each ticker — without that
+//! filter, common US tickers silently resolve to whichever
+//! foreign-exchange company happens to share the symbol (`MRK` →
+//! Merck KGaA on Frankfurt; `PG` → People's Garment on SET; etc.).
+//!
+//! The provider is stateless. Caching belongs to the data service,
+//! which writes per-symbol `classification.srf` files after this
+//! provider returns and reads them back on subsequent calls.
+//!
+//! ## Glossary
+//!
+//!   SPARQL    Query language for RDF-shaped data. Wikidata's
+//!             primary read API.
+//!   P-number  Property identifier in Wikidata (P249 = ticker symbol,
+//!             P414 = stock exchange, P31 = instance of, ...).
+//!   Q-number  Entity identifier in Wikidata (Q40244 = ETF as a
+//!             concept, Q13677 = NYSE the entity, Q312 = Apple Inc.
+//!             the entity).
+//!   wdt:Pxxx  Truthy/direct property statement — the simple shape.
+//!   p:Pxxx    Reified property statement — lets a statement carry
+//!             qualifiers (e.g. ticker symbol AS A QUALIFIER on the
+//!             stock-exchange statement, rather than as a direct
+//!             property of the company).
+//!   ps:Pxxx   "Statement value" predicate — within a reified
+//!             statement, points to the statement's main value.
+//!   pq:Pxxx   "Qualifier" predicate — within a reified statement,
+//!             points to a qualifier on that statement.
+//!
+//! Why the reified statement matters here: Wikidata stores tickers
+//! as P249 qualifiers on a P414 (stock exchange) statement, NOT as
+//! a direct `wdt:P249` property. Querying naively returns zero rows
+//! for nearly every US-listed equity.
+
+const std = @import("std");
+const http = @import("../net/http.zig");
+const fmt = @import("../format.zig");
+
+const sparql_endpoint = "https://query.wikidata.org/sparql";
+
+/// Per-symbol classification record produced by parsing a Wikidata
+/// SPARQL response. Fields are nullable when Wikidata has no value
+/// for that property; the `source` field always emits per the
+/// project's source-pure invariant.
+pub const ClassificationRecord = struct {
+    symbol: []const u8, // owned
+    name: ?[]const u8 = null, // owned
+    sector: ?[]const u8 = null, // owned
+    industry: ?[]const u8 = null, // owned
+    /// ISO-3166 alpha-2 country code (e.g. "US", "GB", "DE").
+    country: ?[]const u8 = null, // owned
+    asset_class: ?[]const u8 = null, // owned
+    is_etf: bool = false,
+    /// YYYY-MM-DD; trimmed from Wikidata's ISO-8601 date.
+    inception_date: ?[]const u8 = null, // owned
+    /// Wikidata's P5531 — the SEC CIK as a digit string. Wikidata
+    /// already zero-pads to 10 digits, matching the project-wide
+    /// CIK normalization convention.
+    cik: ?[]const u8 = null, // owned
+    /// YYYY-MM-DD when this provider ran, NOT when Wikidata last
+    /// updated the underlying entity.
+    as_of: []const u8, // owned
+    source: []const u8, // no default — provenance always emitted
+
+    pub fn deinit(self: *ClassificationRecord, allocator: std.mem.Allocator) void {
+        allocator.free(self.symbol);
+        if (self.name) |s| allocator.free(s);
+        if (self.sector) |s| allocator.free(s);
+        if (self.industry) |s| allocator.free(s);
+        if (self.country) |s| allocator.free(s);
+        if (self.asset_class) |s| allocator.free(s);
+        if (self.inception_date) |s| allocator.free(s);
+        if (self.cik) |s| allocator.free(s);
+        allocator.free(self.as_of);
+    }
+};
+
+/// Geo-bucket constants used by the country → geo lookup. Kept as
+/// named constants (rather than inline string literals in the map)
+/// so callers can reference them without typo risk and the
+/// taxonomy is tweakable in one place.
+pub const geo = struct {
+    pub const us = "US";
+    pub const developed = "International Developed";
+    pub const emerging = "Emerging Markets";
+    pub const unknown = "Unknown";
+};
+
+/// Wikidata Q-IDs we test against `instance of` (P31) to classify
+/// fund-shaped securities. Curated, not exhaustive.
+const etf_q_ids = [_][]const u8{
+    "Q40244", // exchange-traded fund
+    "Q4118901", // exchange-traded bond fund
+    "Q104638128", // ETF tracking specific index
+};
+const mutual_fund_q_ids = [_][]const u8{
+    "Q1752230", // mutual fund
+    "Q11644608", // open-end fund
+};
+
+/// US stock exchanges accepted by the SPARQL exchange filter.
+/// Without this filter, ticker collisions across global exchanges
+/// silently return the wrong company.
+///
+/// Q-IDs:
+///   Q13677     New York Stock Exchange (NYSE)
+///   Q82059     Nasdaq
+///   Q4527260   NYSE Arca
+///   Q1666011   OTC Markets Group / Pink Sheets
+const us_exchanges = [_][]const u8{
+    "wd:Q13677",
+    "wd:Q82059",
+    "wd:Q4527260",
+    "wd:Q1666011",
+};
+
+/// Country-code-to-geo-bucket lookup. Wikidata returns ISO-3166
+/// alpha-2 codes via P17 → P297; we map them to the geo taxonomy
+/// (`geo.us` / `geo.developed` / `geo.emerging` / `geo.unknown`).
+///
+/// MSCI conventions used as the developed/emerging split. Taiwan
+/// and South Korea are MSCI-emerging despite FTSE classifying them
+/// developed. Israel is MSCI-developed (upgraded 2010). Canada is
+/// folded into International Developed (some users prefer separate
+/// Canada bucket; override in `metadata.srf` if so).
+const country_to_geo = std.StaticStringMap([]const u8).initComptime(.{
+    // United States
+    .{ "US", geo.us },
+    // Alpha-3 fallback for entries that use the longer form.
+    .{ "USA", geo.us },
+
+    // International Developed — Europe ex-CIS
+    .{ "GB", geo.developed },
+    .{ "DE", geo.developed },
+    .{ "FR", geo.developed },
+    .{ "NL", geo.developed },
+    .{ "CH", geo.developed },
+    .{ "SE", geo.developed },
+    .{ "DK", geo.developed },
+    .{ "NO", geo.developed },
+    .{ "FI", geo.developed },
+    .{ "IT", geo.developed },
+    .{ "ES", geo.developed },
+    .{ "BE", geo.developed },
+    .{ "AT", geo.developed },
+    .{ "IE", geo.developed },
+    .{ "LU", geo.developed },
+    .{ "PT", geo.developed },
+    .{ "GR", geo.developed },
+    .{ "IS", geo.developed },
+
+    // International Developed — Asia-Pacific + Israel + Canada
+    .{ "JP", geo.developed },
+    .{ "AU", geo.developed },
+    .{ "NZ", geo.developed },
+    .{ "SG", geo.developed },
+    .{ "HK", geo.developed },
+    .{ "IL", geo.developed },
+    .{ "CA", geo.developed },
+
+    // Emerging Markets (MSCI)
+    .{ "CN", geo.emerging },
+    .{ "TW", geo.emerging },
+    .{ "KR", geo.emerging },
+    .{ "IN", geo.emerging },
+    .{ "BR", geo.emerging },
+    .{ "MX", geo.emerging },
+    .{ "RU", geo.emerging },
+    .{ "TR", geo.emerging },
+    .{ "ZA", geo.emerging },
+    .{ "TH", geo.emerging },
+    .{ "MY", geo.emerging },
+    .{ "ID", geo.emerging },
+    .{ "PH", geo.emerging },
+    .{ "VN", geo.emerging },
+    .{ "AR", geo.emerging },
+    .{ "CL", geo.emerging },
+    .{ "CO", geo.emerging },
+    .{ "PE", geo.emerging },
+    .{ "EG", geo.emerging },
+});
+
+/// Map an ISO-3166 alpha-2 country code to one of the geo buckets.
+/// Null/empty input or an unknown code returns `geo.unknown` so the
+/// user can override in `metadata.srf`.
+pub fn geoFor(iso2: ?[]const u8) []const u8 {
+    const code = iso2 orelse return geo.unknown;
+    if (code.len == 0) return geo.unknown;
+    return country_to_geo.get(code) orelse geo.unknown;
+}
+
+// ── Wikidata provider state (file-as-struct) ─────────────────────
+//
+// Callers do `const wikidata = @import("providers/Wikidata.zig");`
+// followed by `var wd = wikidata.init(...);` and `wd.fetch(...)`.
+
+client: http.Client,
+allocator: std.mem.Allocator,
+io: std.Io,
+/// Contact email for User-Agent / From headers, sourced from
+/// `Config.user_email`. Required; callers must surface a clear
+/// missing-config error before constructing this provider.
+user_email: []const u8,
+
+const Wikidata = @This();
+
+pub fn init(
+    io: std.Io,
+    allocator: std.mem.Allocator,
+    user_email: []const u8,
+) Wikidata {
+    return .{
+        .client = http.Client.init(io, allocator),
+        .allocator = allocator,
+        .io = io,
+        .user_email = user_email,
+    };
+}
+
+pub fn deinit(self: *Wikidata) void {
+    self.client.deinit();
+}
+
+/// Fetch and parse Wikidata classifications for `symbols`.
+/// Runs a single batched SPARQL query and parses the response.
+/// Caller owns the returned slice and each record.
+pub fn fetch(
+    self: *Wikidata,
+    result_allocator: std.mem.Allocator,
+    symbols: []const []const u8,
+) ![]ClassificationRecord {
+    if (symbols.len == 0) return &.{};
+
+    const query = try buildQuery(self.allocator, symbols);
+    defer self.allocator.free(query);
+
+    const json = try self.postSparql(query);
+    defer self.allocator.free(json);
+
+    return parse(self.io, result_allocator, json, symbols);
+}
+
+/// POST a SPARQL query. Sets the User-Agent + From headers from
+/// `user_email` for politeness; Wikidata explicitly recommends
+/// descriptive User-Agent strings.
+fn postSparql(self: *Wikidata, query: []const u8) ![]u8 {
+    var form_buf: std.Io.Writer.Allocating = .init(self.allocator);
+    defer form_buf.deinit();
+    try form_buf.writer.writeAll("query=");
+    // `Component.formatEscaped` percent-encodes everything outside
+    // RFC 3986's unreserved set — exactly the contract for the
+    // `application/x-www-form-urlencoded` body we're building.
+    try (std.Uri.Component{ .raw = query }).formatEscaped(&form_buf.writer);
+
+    var ua_buf: [256]u8 = undefined;
+    const ua = std.fmt.bufPrint(&ua_buf, "zfin/0.1 ({s})", .{self.user_email}) catch return error.UserEmailTooLong;
+
+    const headers = [_]std.http.Header{
+        .{ .name = "User-Agent", .value = ua },
+        .{ .name = "Accept", .value = "application/sparql-results+json" },
+        .{ .name = "Content-Type", .value = "application/x-www-form-urlencoded" },
+        .{ .name = "From", .value = self.user_email },
+    };
+
+    var resp = try self.client.request(.POST, sparql_endpoint, form_buf.written(), &headers);
+    defer resp.deinit();
+    return self.allocator.dupe(u8, resp.body);
+}
+
+/// Build the batched SPARQL query for a slice of ticker symbols.
+/// Caller owns the returned bytes. Symbols interpolated via
+/// `VALUES ?ticker { "AAPL" "MSFT" ... }`.
+///
+/// Wikidata's ticker storage is non-obvious: tickers are stored as
+/// `P249` qualifiers on a `P414` (stock exchange) statement. Naive
+/// `?security wdt:P249 ?ticker` returns zero rows for nearly every
+/// US-listed equity. The query reaches them via:
+///
+///   ?security p:P414 ?stmt .
+///   ?stmt ps:P414 ?exchange .
+///   ?stmt pq:P249 ?ticker .
+///
+/// `?exchange` is filtered to a small set of US exchanges to avoid
+/// ticker collisions with foreign listings.
+fn buildQuery(allocator: std.mem.Allocator, symbols: []const []const u8) ![]u8 {
+    var aw: std.Io.Writer.Allocating = .init(allocator);
+    defer aw.deinit();
+
+    try aw.writer.writeAll(
+        \\SELECT ?ticker ?security ?securityLabel ?industryLabel ?countryCode ?inception ?cik ?instance WHERE {
+        \\  VALUES ?ticker {
+    );
+    for (symbols) |s| {
+        try aw.writer.print(" \"{s}\"", .{s});
+    }
+    try aw.writer.writeAll(" }\n");
+    try aw.writer.writeAll("  VALUES ?exchange {");
+    for (us_exchanges) |x| {
+        try aw.writer.print(" {s}", .{x});
+    }
+    try aw.writer.writeAll(" }\n");
+    try aw.writer.writeAll(
+        \\  ?security p:P414 ?exchstmt .
+        \\  ?exchstmt ps:P414 ?exchange .
+        \\  ?exchstmt pq:P249 ?ticker .
+        \\  OPTIONAL { ?security wdt:P452 ?industry . }
+        \\  OPTIONAL { ?security wdt:P17 ?country . ?country wdt:P297 ?countryCode . }
+        \\  OPTIONAL { ?security wdt:P571 ?inception . }
+        \\  OPTIONAL { ?security wdt:P5531 ?cik . }
+        \\  OPTIONAL { ?security wdt:P31 ?instance . }
+        \\  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
+        \\}
+    );
+    return aw.toOwnedSlice();
+}
+
+/// Parse the SPARQL JSON response into `ClassificationRecord` values.
+/// Multiple bindings for the same ticker (e.g. multiple `instance of`
+/// values) get merged into one record — first-non-null wins.
+fn parse(
+    io: std.Io,
+    allocator: std.mem.Allocator,
+    json_bytes: []const u8,
+    expected_symbols: []const []const u8,
+) ![]ClassificationRecord {
+    const today = fmt.todayDate(io);
+    var as_of_buf: [10]u8 = undefined;
+    const as_of = try std.fmt.bufPrint(&as_of_buf, "{f}", .{today});
+
+    const parsed = std.json.parseFromSlice(std.json.Value, allocator, json_bytes, .{}) catch
+        return &.{};
+    defer parsed.deinit();
+
+    const root = switch (parsed.value) {
+        .object => |o| o,
+        else => return &.{},
+    };
+    const results = switch (root.get("results") orelse return &.{}) {
+        .object => |o| o,
+        else => return &.{},
+    };
+    const bindings = switch (results.get("bindings") orelse return &.{}) {
+        .array => |a| a.items,
+        else => return &.{},
+    };
+
+    // Map symbol → record; merge multiple bindings.
+    var by_symbol: std.StringHashMap(ClassificationRecord) = .init(allocator);
+    defer {
+        var it = by_symbol.valueIterator();
+        while (it.next()) |r| r.deinit(allocator);
+        by_symbol.deinit();
+    }
+
+    for (bindings) |b| {
+        const obj = switch (b) {
+            .object => |o| o,
+            else => continue,
+        };
+        const ticker = sparqlValue(obj, "ticker") orelse continue;
+
+        // Verify ticker is one we asked for. Wikidata can return
+        // surprising matches (foreign exchanges); skip those.
+        var matched = false;
+        for (expected_symbols) |s| {
+            if (std.ascii.eqlIgnoreCase(s, ticker)) {
+                matched = true;
+                break;
+            }
+        }
+        if (!matched) continue;
+
+        const existing_or_new = try by_symbol.getOrPut(ticker);
+        if (!existing_or_new.found_existing) {
+            existing_or_new.key_ptr.* = try allocator.dupe(u8, ticker);
+            existing_or_new.value_ptr.* = .{
+                .symbol = try allocator.dupe(u8, ticker),
+                .as_of = try allocator.dupe(u8, as_of),
+                .source = "wikidata",
+            };
+        }
+        const rec = existing_or_new.value_ptr;
+
+        if (rec.name == null) {
+            if (sparqlValue(obj, "securityLabel")) |label| {
+                rec.name = try allocator.dupe(u8, label);
+            }
+        }
+        if (rec.industry == null) {
+            if (sparqlValue(obj, "industryLabel")) |ind| {
+                rec.industry = try allocator.dupe(u8, ind);
+                rec.sector = try allocator.dupe(u8, ind);
+            }
+        }
+        if (rec.country == null) {
+            if (sparqlValue(obj, "countryCode")) |c| {
+                rec.country = try allocator.dupe(u8, c);
+            }
+        }
+        if (rec.inception_date == null) {
+            if (sparqlValue(obj, "inception")) |d| {
+                if (d.len >= 10) {
+                    rec.inception_date = try allocator.dupe(u8, d[0..10]);
+                }
+            }
+        }
+        if (rec.cik == null) {
+            if (sparqlValue(obj, "cik")) |c| {
+                rec.cik = try allocator.dupe(u8, c);
+            }
+        }
+        if (sparqlValue(obj, "instance")) |inst_iri| {
+            // The "instance" value is a Q-ID URI like
+            // "http://www.wikidata.org/entity/Q40244". Extract the
+            // Q-ID suffix and test against our known sets.
+            const last_slash = std.mem.lastIndexOfScalar(u8, inst_iri, '/');
+            const q_id = if (last_slash) |i| inst_iri[i + 1 ..] else inst_iri;
+            for (etf_q_ids) |target| {
+                if (std.mem.eql(u8, q_id, target)) {
+                    rec.is_etf = true;
+                    if (rec.asset_class == null) {
+                        rec.asset_class = try allocator.dupe(u8, "ETF (uncategorized)");
+                    }
+                    break;
+                }
+            }
+            for (mutual_fund_q_ids) |target| {
+                if (std.mem.eql(u8, q_id, target)) {
+                    rec.is_etf = true;
+                    if (rec.asset_class == null) {
+                        rec.asset_class = try allocator.dupe(u8, "Mutual Fund (uncategorized)");
+                    }
+                    break;
+                }
+            }
+        }
+    }
+
+    // Drain map into owned slice. Caller takes ownership; our defer
+    // above calls deinit on values, so clear the map before returning
+    // to avoid double-free.
+    var out = try allocator.alloc(ClassificationRecord, by_symbol.count());
+    var idx: usize = 0;
+    var it = by_symbol.iterator();
+    while (it.next()) |entry| {
+        out[idx] = entry.value_ptr.*;
+        idx += 1;
+    }
+    var key_it = by_symbol.keyIterator();
+    while (key_it.next()) |k| allocator.free(k.*);
+    by_symbol.clearRetainingCapacity();
+    return out;
+}
+
+/// Pull the `.value` string out of a SPARQL JSON binding object's
+/// named field. Returns null if absent or non-string.
+fn sparqlValue(obj: std.json.ObjectMap, field: []const u8) ?[]const u8 {
+    const slot = obj.get(field) orelse return null;
+    const slot_obj = switch (slot) {
+        .object => |o| o,
+        else => return null,
+    };
+    const val = slot_obj.get("value") orelse return null;
+    return switch (val) {
+        .string => |s| s,
+        else => null,
+    };
+}
+
+// ── Tests ────────────────────────────────────────────────────────
+
+test "buildQuery includes all symbols and required SELECT vars" {
+    const allocator = std.testing.allocator;
+    const syms = [_][]const u8{ "AAPL", "VTI" };
+    const q = try buildQuery(allocator, &syms);
+    defer allocator.free(q);
+
+    try std.testing.expect(std.mem.indexOf(u8, q, "\"AAPL\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, q, "\"VTI\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, q, "p:P414") != null);
+    try std.testing.expect(std.mem.indexOf(u8, q, "pq:P249") != null);
+    try std.testing.expect(std.mem.indexOf(u8, q, "wdt:P452") != null);
+    try std.testing.expect(std.mem.indexOf(u8, q, "wdt:P17") != null);
+    // US-exchange filter must be present — without it, US tickers
+    // collide with foreign exchanges (MRK→Merck KGaA, PG→People's
+    // Garment, etc.). See `us_exchanges` doc-block.
+    try std.testing.expect(std.mem.indexOf(u8, q, "wd:Q13677") != null); // NYSE
+    try std.testing.expect(std.mem.indexOf(u8, q, "wd:Q82059") != null); // Nasdaq
+    try std.testing.expect(std.mem.indexOf(u8, q, "ps:P414 ?exchange") != null);
+}
+
+test "parse: AAPL fixture round-trips name + industry + country" {
+    const fixture =
+        \\{
+        \\  "head": {"vars": ["ticker", "security", "securityLabel", "industryLabel", "countryCode", "inception", "cik", "instance"]},
+        \\  "results": {
+        \\    "bindings": [
+        \\      {
+        \\        "ticker": {"type": "literal", "value": "AAPL"},
+        \\        "security": {"type": "uri", "value": "http://www.wikidata.org/entity/Q312"},
+        \\        "securityLabel": {"type": "literal", "value": "Apple Inc."},
+        \\        "industryLabel": {"type": "literal", "value": "consumer electronics"},
+        \\        "countryCode": {"type": "literal", "value": "US"},
+        \\        "instance": {"type": "uri", "value": "http://www.wikidata.org/entity/Q4830453"}
+        \\      }
+        \\    ]
+        \\  }
+        \\}
+    ;
+
+    const allocator = std.testing.allocator;
+    const expected = [_][]const u8{"AAPL"};
+    const recs = try parse(std.testing.io, allocator, fixture, &expected);
+    defer {
+        for (recs) |*r| {
+            var m = r.*;
+            m.deinit(allocator);
+        }
+        allocator.free(recs);
+    }
+
+    try std.testing.expectEqual(@as(usize, 1), recs.len);
+    try std.testing.expectEqualStrings("AAPL", recs[0].symbol);
+    try std.testing.expectEqualStrings("Apple Inc.", recs[0].name.?);
+    try std.testing.expectEqualStrings("consumer electronics", recs[0].industry.?);
+    try std.testing.expectEqualStrings("consumer electronics", recs[0].sector.?);
+    try std.testing.expectEqualStrings("US", recs[0].country.?);
+    try std.testing.expect(!recs[0].is_etf);
+}
+
+test "parse: ETF fixture sets is_etf=true and asset_class" {
+    const fixture =
+        \\{
+        \\  "head": {"vars": ["ticker", "security", "securityLabel", "instance"]},
+        \\  "results": {
+        \\    "bindings": [
+        \\      {
+        \\        "ticker": {"type": "literal", "value": "VTI"},
+        \\        "security": {"type": "uri", "value": "http://www.wikidata.org/entity/Q1809462"},
+        \\        "securityLabel": {"type": "literal", "value": "Vanguard Total Stock Market ETF"},
+        \\        "instance": {"type": "uri", "value": "http://www.wikidata.org/entity/Q40244"}
+        \\      }
+        \\    ]
+        \\  }
+        \\}
+    ;
+
+    const allocator = std.testing.allocator;
+    const expected = [_][]const u8{"VTI"};
+    const recs = try parse(std.testing.io, allocator, fixture, &expected);
+    defer {
+        for (recs) |*r| {
+            var m = r.*;
+            m.deinit(allocator);
+        }
+        allocator.free(recs);
+    }
+
+    try std.testing.expectEqual(@as(usize, 1), recs.len);
+    try std.testing.expect(recs[0].is_etf);
+    try std.testing.expectEqualStrings("ETF (uncategorized)", recs[0].asset_class.?);
+}
+
+test "parse: bindings for symbols not requested are dropped" {
+    const fixture =
+        \\{
+        \\  "head": {"vars": ["ticker", "security", "securityLabel"]},
+        \\  "results": {
+        \\    "bindings": [
+        \\      {"ticker": {"type": "literal", "value": "WRONG"},
+        \\       "security": {"type": "uri", "value": "http://example/Q1"},
+        \\       "securityLabel": {"type": "literal", "value": "Wrong Co"}}
+        \\    ]
+        \\  }
+        \\}
+    ;
+
+    const allocator = std.testing.allocator;
+    const expected = [_][]const u8{"AAPL"};
+    const recs = try parse(std.testing.io, allocator, fixture, &expected);
+    defer allocator.free(recs);
+
+    try std.testing.expectEqual(@as(usize, 0), recs.len);
+}
+
+test "geoFor maps known ISO-3166 codes to bucket" {
+    try std.testing.expectEqualStrings(geo.us, geoFor("US"));
+    try std.testing.expectEqualStrings(geo.us, geoFor("USA"));
+    try std.testing.expectEqualStrings(geo.developed, geoFor("GB"));
+    try std.testing.expectEqualStrings(geo.developed, geoFor("DE"));
+    try std.testing.expectEqualStrings(geo.developed, geoFor("CA"));
+    try std.testing.expectEqualStrings(geo.developed, geoFor("IL"));
+    try std.testing.expectEqualStrings(geo.emerging, geoFor("CN"));
+    try std.testing.expectEqualStrings(geo.emerging, geoFor("TW"));
+    try std.testing.expectEqualStrings(geo.emerging, geoFor("KR"));
+}
+
+test "geoFor returns Unknown for null/empty/unmapped" {
+    try std.testing.expectEqualStrings(geo.unknown, geoFor(null));
+    try std.testing.expectEqualStrings(geo.unknown, geoFor(""));
+    try std.testing.expectEqualStrings(geo.unknown, geoFor("ZZ")); // unassigned ISO-2
+    try std.testing.expectEqualStrings(geo.unknown, geoFor("XX"));
+}
diff --git a/src/providers/xml.zig b/src/providers/xml.zig
new file mode 100644
index 0000000..a2eca82
--- /dev/null
+++ b/src/providers/xml.zig
@@ -0,0 +1,713 @@
+// VENDORED - see README.md.
+// File sourced from:
+// https://github.com/Snektron/vulkan-zig/blob/797ae8af88e84753af9640266de61a985b76b580/generator/xml.zig
+// via ~/shared/aws-zig/src/xml.zig
+const std = @import("std");
+const mem = std.mem;
+const testing = std.testing;
+const Allocator = mem.Allocator;
+const ArenaAllocator = std.heap.ArenaAllocator;
+const ArrayList = std.ArrayList;
+
+pub const Attribute = struct {
+    name: []const u8,
+    value: []const u8,
+};
+
+pub const Content = union(enum) {
+    CharData: []const u8,
+    Comment: []const u8,
+    Element: *Element,
+};
+
+pub const Element = struct {
+    pub const AttributeList = ArrayList(*Attribute);
+    pub const ContentList = ArrayList(Content);
+
+    tag: []const u8,
+    attributes: AttributeList,
+    children: ContentList,
+    next_sibling: ?*Element = null,
+    allocator: std.mem.Allocator,
+
+    fn init(tag: []const u8, alloc: Allocator) Element {
+        return .{
+            .tag = tag,
+            .attributes = .empty,
+            .children = .empty,
+            .allocator = alloc,
+        };
+    }
+
+    pub fn getAttribute(self: *Element, attrib_name: []const u8) ?[]const u8 {
+        for (self.attributes.items) |child| {
+            if (mem.eql(u8, child.name, attrib_name)) {
+                return child.value;
+            }
+        }
+
+        return null;
+    }
+
+    pub fn getCharData(self: *Element, child_tag: []const u8) ?[]const u8 {
+        const child = (self.findChildByTag(child_tag) catch return null) orelse return null;
+        if (child.children.items.len != 1) {
+            return null;
+        }
+
+        return switch (child.children.items[0]) {
+            .CharData => |char_data| char_data,
+            else => null,
+        };
+    }
+
+    pub fn iterator(self: *Element) ChildIterator {
+        return .{
+            .items = self.children.items,
+            .i = 0,
+        };
+    }
+
+    pub fn elements(self: *Element) ChildElementIterator {
+        return .{
+            .inner = self.iterator(),
+        };
+    }
+
+    pub fn findChildByTag(self: *Element, tag: []const u8) !?*Element {
+        var it = self.findChildrenByTag(tag);
+        return try it.next();
+    }
+
+    pub fn findChildrenByTag(self: *Element, tag: []const u8) FindChildrenByTagIterator {
+        return .{
+            .inner = self.elements(),
+            .tag = tag,
+        };
+    }
+
+    pub const ChildIterator = struct {
+        items: []Content,
+        i: usize,
+
+        pub fn next(self: *ChildIterator) ?*Content {
+            if (self.i < self.items.len) {
+                self.i += 1;
+                return &self.items[self.i - 1];
+            }
+
+            return null;
+        }
+    };
+
+    pub const ChildElementIterator = struct {
+        inner: ChildIterator,
+
+        pub fn next(self: *ChildElementIterator) ?*Element {
+            while (self.inner.next()) |child| {
+                if (child.* != .Element) {
+                    continue;
+                }
+
+                return child.*.Element;
+            }
+
+            return null;
+        }
+    };
+
+    fn strictEqual(a: []const u8, b: []const u8, _: PredicateOptions) !bool {
+        return mem.eql(u8, a, b);
+    }
+    pub const FindChildrenByTagIterator = struct {
+        inner: ChildElementIterator,
+        tag: []const u8,
+        predicate: *const fn (a: []const u8, b: []const u8, options: PredicateOptions) anyerror!bool = strictEqual,
+        predicate_options: PredicateOptions = .{},
+
+        pub fn next(self: *FindChildrenByTagIterator) !?*Element {
+            while (self.inner.next()) |child| {
+                if (!try self.predicate(child.tag, self.tag, self.predicate_options)) {
+                    continue;
+                }
+
+                return child;
+            }
+
+            return null;
+        }
+    };
+};
+
+pub const PredicateOptions = struct {
+    allocator: ?std.mem.Allocator = null,
+};
+pub const XmlDecl = struct {
+    version: []const u8,
+    encoding: ?[]const u8,
+    standalone: ?bool,
+};
+
+pub const Document = struct {
+    arena: ArenaAllocator,
+    xml_decl: ?*XmlDecl,
+    root: *Element,
+
+    pub fn deinit(self: Document) void {
+        var arena = self.arena; // Copy to stack so self can be taken by value.
+        arena.deinit();
+    }
+};
+
+const ParseContext = struct {
+    source: []const u8,
+    offset: usize,
+    line: usize,
+    column: usize,
+
+    fn init(source: []const u8) ParseContext {
+        return .{
+            .source = source,
+            .offset = 0,
+            .line = 0,
+            .column = 0,
+        };
+    }
+
+    fn peek(self: *ParseContext) ?u8 {
+        return if (self.offset < self.source.len) self.source[self.offset] else null;
+    }
+
+    fn consume(self: *ParseContext) !u8 {
+        if (self.offset < self.source.len) {
+            return self.consumeNoEof();
+        }
+
+        return error.UnexpectedEof;
+    }
+
+    fn consumeNoEof(self: *ParseContext) u8 {
+        std.debug.assert(self.offset < self.source.len);
+        const c = self.source[self.offset];
+        self.offset += 1;
+
+        if (c == '\n') {
+            self.line += 1;
+            self.column = 0;
+        } else {
+            self.column += 1;
+        }
+
+        return c;
+    }
+
+    fn eat(self: *ParseContext, char: u8) bool {
+        self.expect(char) catch return false;
+        return true;
+    }
+
+    fn expect(self: *ParseContext, expected: u8) !void {
+        if (self.peek()) |actual| {
+            if (expected != actual) {
+                return error.UnexpectedCharacter;
+            }
+
+            _ = self.consumeNoEof();
+            return;
+        }
+
+        return error.UnexpectedEof;
+    }
+
+    fn eatStr(self: *ParseContext, text: []const u8) bool {
+        self.expectStr(text) catch return false;
+        return true;
+    }
+
+    fn expectStr(self: *ParseContext, text: []const u8) !void {
+        if (self.source.len < self.offset + text.len) {
+            return error.UnexpectedEof;
+        } else if (std.mem.startsWith(u8, self.source[self.offset..], text)) {
+            var i: usize = 0;
+            while (i < text.len) : (i += 1) {
+                _ = self.consumeNoEof();
+            }
+
+            return;
+        }
+
+        return error.UnexpectedCharacter;
+    }
+
+    fn eatWs(self: *ParseContext) bool {
+        var ws = false;
+
+        while (self.peek()) |ch| {
+            switch (ch) {
+                ' ', '\t', '\n', '\r' => {
+                    ws = true;
+                    _ = self.consumeNoEof();
+                },
+                else => break,
+            }
+        }
+
+        return ws;
+    }
+
+    fn expectWs(self: *ParseContext) !void {
+        if (!self.eatWs()) return error.UnexpectedCharacter;
+    }
+
+    fn currentLine(self: ParseContext) []const u8 {
+        var begin: usize = 0;
+        if (mem.lastIndexOfScalar(u8, self.source[0..self.offset], '\n')) |prev_nl| {
+            begin = prev_nl + 1;
+        }
+
+        const end = mem.indexOfScalarPos(u8, self.source, self.offset, '\n') orelse self.source.len;
+        return self.source[begin..end];
+    }
+};
+
+test "ParseContext" {
+    {
+        var ctx = ParseContext.init("I like pythons");
+        try testing.expectEqual(@as(?u8, 'I'), ctx.peek());
+        try testing.expectEqual(@as(u8, 'I'), ctx.consumeNoEof());
+        try testing.expectEqual(@as(?u8, ' '), ctx.peek());
+        try testing.expectEqual(@as(u8, ' '), try ctx.consume());
+
+        try testing.expect(ctx.eat('l'));
+        try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
+        try testing.expectEqual(false, ctx.eat('a'));
+        try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
+
+        try ctx.expect('i');
+        try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
+        try testing.expectError(error.UnexpectedCharacter, ctx.expect('a'));
+        try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
+
+        try testing.expect(ctx.eatStr("ke"));
+        try testing.expectEqual(@as(?u8, ' '), ctx.peek());
+
+        try testing.expect(ctx.eatWs());
+        try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
+        try testing.expectEqual(false, ctx.eatWs());
+        try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
+
+        try testing.expectEqual(false, ctx.eatStr("aaaaaaaaa"));
+        try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
+
+        try testing.expectError(error.UnexpectedEof, ctx.expectStr("aaaaaaaaa"));
+        try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
+        try testing.expectError(error.UnexpectedCharacter, ctx.expectStr("pytn"));
+        try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
+        try ctx.expectStr("python");
+        try testing.expectEqual(@as(?u8, 's'), ctx.peek());
+    }
+
+    {
+        var ctx = ParseContext.init("");
+        try testing.expectEqual(ctx.peek(), null);
+        try testing.expectError(error.UnexpectedEof, ctx.consume());
+        try testing.expectEqual(ctx.eat('p'), false);
+        try testing.expectError(error.UnexpectedEof, ctx.expect('p'));
+    }
+}
+
+pub const ParseError = error{
+    IllegalCharacter,
+    UnexpectedEof,
+    UnexpectedCharacter,
+    UnclosedValue,
+    UnclosedComment,
+    InvalidName,
+    InvalidEntity,
+    InvalidStandaloneValue,
+    NonMatchingClosingTag,
+    InvalidDocument,
+    OutOfMemory,
+};
+
+pub fn parse(backing_allocator: Allocator, source: []const u8) !Document {
+    var ctx = ParseContext.init(source);
+    return try parseDocument(&ctx, backing_allocator);
+}
+
+fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document {
+    var doc = Document{
+        .arena = ArenaAllocator.init(backing_allocator),
+        .xml_decl = null,
+        // SAFETY: assigned below by `try parseDocumentRoot(&doc, ctx)`
+        // before `doc` is returned to the caller. If the parse fails,
+        // we propagate the error and the caller sees an error, not
+        // a half-initialized doc.
+        .root = undefined,
+    };
+
+    errdefer doc.deinit();
+
+    const allocator = doc.arena.allocator();
+
+    try trySkipComments(ctx, allocator);
+
+    doc.xml_decl = try tryParseProlog(ctx, allocator);
+    _ = ctx.eatWs();
+    try trySkipComments(ctx, allocator);
+
+    doc.root = (try tryParseElement(ctx, allocator, null)) orelse return error.InvalidDocument;
+    _ = ctx.eatWs();
+    try trySkipComments(ctx, allocator);
+
+    if (ctx.peek() != null) return error.InvalidDocument;
+
+    return doc;
+}
+
+fn parseAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
+    const quote = try ctx.consume();
+    if (quote != '"' and quote != '\'') return error.UnexpectedCharacter;
+
+    const begin = ctx.offset;
+
+    while (true) {
+        const c = ctx.consume() catch return error.UnclosedValue;
+        if (c == quote) break;
+    }
+
+    const end = ctx.offset - 1;
+
+    return try dupeAndUnescape(alloc, ctx.source[begin..end]);
+}
+
+fn parseEqAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
+    _ = ctx.eatWs();
+    try ctx.expect('=');
+    _ = ctx.eatWs();
+
+    return try parseAttrValue(ctx, alloc);
+}
+
+fn parseNameNoDupe(ctx: *ParseContext) ![]const u8 {
+    // XML's spec on names is very long, so to make this easier
+    // we just take any character that is not special and not whitespace
+    const begin = ctx.offset;
+
+    while (ctx.peek()) |ch| {
+        switch (ch) {
+            ' ', '\t', '\n', '\r' => break,
+            '&', '"', '\'', '<', '>', '?', '=', '/' => break,
+            else => _ = ctx.consumeNoEof(),
+        }
+    }
+
+    const end = ctx.offset;
+    if (begin == end) return error.InvalidName;
+
+    return ctx.source[begin..end];
+}
+
+fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
+    const begin = ctx.offset;
+
+    while (ctx.peek()) |ch| {
+        switch (ch) {
+            '<' => break,
+            else => _ = ctx.consumeNoEof(),
+        }
+    }
+
+    const end = ctx.offset;
+    if (begin == end) return null;
+
+    return try dupeAndUnescape(alloc, ctx.source[begin..end]);
+}
+
+fn parseContent(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) ParseError!Content {
+    if (try tryParseCharData(ctx, alloc)) |cd| {
+        return Content{ .CharData = cd };
+    } else if (try tryParseComment(ctx, alloc)) |comment| {
+        return Content{ .Comment = comment };
+    } else if (try tryParseElement(ctx, alloc, parent)) |elem| {
+        return Content{ .Element = elem };
+    } else {
+        return error.UnexpectedCharacter;
+    }
+}
+
+fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute {
+    const name = parseNameNoDupe(ctx) catch return null;
+    _ = ctx.eatWs();
+    try ctx.expect('=');
+    _ = ctx.eatWs();
+    const value = try parseAttrValue(ctx, alloc);
+
+    const attr = try alloc.create(Attribute);
+    attr.name = try alloc.dupe(u8, name);
+    attr.value = value;
+    return attr;
+}
+
+fn tryParseElement(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) !?*Element {
+    const start = ctx.offset;
+    if (!ctx.eat('<')) return null;
+    const tag = parseNameNoDupe(ctx) catch {
+        ctx.offset = start;
+        return null;
+    };
+
+    const element = try alloc.create(Element);
+    element.* = Element.init(try alloc.dupe(u8, tag), alloc);
+
+    while (ctx.eatWs()) {
+        const attr = (try tryParseAttr(ctx, alloc)) orelse break;
+        try element.attributes.append(element.allocator, attr);
+    }
+
+    if (ctx.eatStr("/>")) {
+        return element;
+    }
+
+    try ctx.expect('>');
+
+    while (true) {
+        if (ctx.peek() == null) {
+            return error.UnexpectedEof;
+        } else if (ctx.eatStr("</")) {
+            break;
+        }
+
+        const content = try parseContent(ctx, alloc, element);
+        try element.children.append(element.allocator, content);
+    }
+
+    const closing_tag = try parseNameNoDupe(ctx);
+    if (!std.mem.eql(u8, tag, closing_tag)) {
+        return error.NonMatchingClosingTag;
+    }
+
+    _ = ctx.eatWs();
+    try ctx.expect('>');
+
+    if (parent) |p| {
+        var last_element: ?*Element = null;
+
+        for (0..p.children.items.len) |i| {
+            const child = p.children.items[p.children.items.len - i - 1];
+            if (child == .Element) {
+                last_element = child.Element;
+                break;
+            }
+        }
+
+        if (last_element) |lc| {
+            lc.next_sibling = element;
+        }
+    }
+
+    return element;
+}
+
+test "tryParseElement" {
+    var arena = std.heap.ArenaAllocator.init(testing.allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
+
+    {
+        var ctx = ParseContext.init("<= a='b'/>");
+        try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc, null));
+        try testing.expectEqual(@as(?u8, '<'), ctx.peek());
+    }
+
+    {
+        var ctx = ParseContext.init("<python size='15' color = \"green\"/>");
+        const elem = try tryParseElement(&ctx, alloc, null);
+        try testing.expectEqualSlices(u8, elem.?.tag, "python");
+
+        const size_attr = elem.?.attributes.items[0];
+        try testing.expectEqualSlices(u8, size_attr.name, "size");
+        try testing.expectEqualSlices(u8, size_attr.value, "15");
+
+        const color_attr = elem.?.attributes.items[1];
+        try testing.expectEqualSlices(u8, color_attr.name, "color");
+        try testing.expectEqualSlices(u8, color_attr.value, "green");
+    }
+
+    {
+        var ctx = ParseContext.init("<python>test</python>");
+        const elem = try tryParseElement(&ctx, alloc, null);
+        try testing.expectEqualSlices(u8, elem.?.tag, "python");
+        try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test");
+    }
+
+    {
+        var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>");
+        const elem = try tryParseElement(&ctx, alloc, null);
+        try testing.expectEqualSlices(u8, elem.?.tag, "a");
+        try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b");
+        try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c");
+        try testing.expectEqualSlices(u8, elem.?.children.items[2].CharData, "d");
+        try testing.expectEqualSlices(u8, elem.?.children.items[3].Element.tag, "e");
+        try testing.expectEqualSlices(u8, elem.?.children.items[4].CharData, "f");
+        try testing.expectEqualSlices(u8, elem.?.children.items[5].Comment, "g");
+    }
+}
+
+fn tryParseProlog(ctx: *ParseContext, alloc: Allocator) !?*XmlDecl {
+    const start = ctx.offset;
+    if (!ctx.eatStr("<?") or !mem.eql(u8, try parseNameNoDupe(ctx), "xml")) {
+        ctx.offset = start;
+        return null;
+    }
+
+    const decl = try alloc.create(XmlDecl);
+    decl.encoding = null;
+    decl.standalone = null;
+
+    // Version info is mandatory
+    try ctx.expectWs();
+    try ctx.expectStr("version");
+    decl.version = try parseEqAttrValue(ctx, alloc);
+
+    if (ctx.eatWs()) {
+        // Optional encoding and standalone info
+        var require_ws = false;
+
+        if (ctx.eatStr("encoding")) {
+            decl.encoding = try parseEqAttrValue(ctx, alloc);
+            require_ws = true;
+        }
+
+        if (require_ws == ctx.eatWs() and ctx.eatStr("standalone")) {
+            const standalone = try parseEqAttrValue(ctx, alloc);
+            if (std.mem.eql(u8, standalone, "yes")) {
+                decl.standalone = true;
+            } else if (std.mem.eql(u8, standalone, "no")) {
+                decl.standalone = false;
+            } else {
+                return error.InvalidStandaloneValue;
+            }
+        }
+
+        _ = ctx.eatWs();
+    }
+
+    try ctx.expectStr("?>");
+    return decl;
+}
+
+test "tryParseProlog" {
+    var arena = std.heap.ArenaAllocator.init(testing.allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
+
+    {
+        var ctx = ParseContext.init("<?xmla version='aa'?>");
+        try testing.expectEqual(@as(?*XmlDecl, null), try tryParseProlog(&ctx, alloc));
+        try testing.expectEqual(@as(?u8, '<'), ctx.peek());
+    }
+
+    {
+        var ctx = ParseContext.init("<?xml version='aa'?>");
+        const decl = try tryParseProlog(&ctx, alloc);
+        try testing.expectEqualSlices(u8, "aa", decl.?.version);
+        try testing.expectEqual(@as(?[]const u8, null), decl.?.encoding);
+        try testing.expectEqual(@as(?bool, null), decl.?.standalone);
+    }
+
+    {
+        var ctx = ParseContext.init("<?xml version=\"aa\" encoding = 'bbb' standalone   \t =   'yes'?>");
+        const decl = try tryParseProlog(&ctx, alloc);
+        try testing.expectEqualSlices(u8, "aa", decl.?.version);
+        try testing.expectEqualSlices(u8, "bbb", decl.?.encoding.?);
+        try testing.expectEqual(@as(?bool, true), decl.?.standalone.?);
+    }
+}
+
+fn trySkipComments(ctx: *ParseContext, alloc: Allocator) !void {
+    while (try tryParseComment(ctx, alloc)) |_| {
+        _ = ctx.eatWs();
+    }
+}
+
+fn tryParseComment(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
+    if (!ctx.eatStr("<!--")) return null;
+
+    const begin = ctx.offset;
+    while (!ctx.eatStr("-->")) {
+        _ = ctx.consume() catch return error.UnclosedComment;
+    }
+
+    const end = ctx.offset - "-->".len;
+    return try alloc.dupe(u8, ctx.source[begin..end]);
+}
+
+fn unescapeEntity(text: []const u8) !u8 {
+    const EntitySubstition = struct { text: []const u8, replacement: u8 };
+
+    const entities = [_]EntitySubstition{
+        .{ .text = "&lt;", .replacement = '<' },
+        .{ .text = "&gt;", .replacement = '>' },
+        .{ .text = "&amp;", .replacement = '&' },
+        .{ .text = "&apos;", .replacement = '\'' },
+        .{ .text = "&quot;", .replacement = '"' },
+    };
+
+    for (entities) |entity| {
+        if (std.mem.eql(u8, text, entity.text)) return entity.replacement;
+    }
+
+    return error.InvalidEntity;
+}
+
+fn dupeAndUnescape(alloc: Allocator, text: []const u8) ![]const u8 {
+    const str = try alloc.alloc(u8, text.len);
+
+    var j: usize = 0;
+    var i: usize = 0;
+    while (i < text.len) : (j += 1) {
+        if (text[i] == '&') {
+            const entity_end = 1 + (mem.indexOfScalarPos(u8, text, i, ';') orelse return error.InvalidEntity);
+            str[j] = try unescapeEntity(text[i..entity_end]);
+            i = entity_end;
+        } else {
+            str[j] = text[i];
+            i += 1;
+        }
+    }
+
+    // This error is not strictly true, but we need to match one of the items
+    // from the error set provided by the other stdlib calls at the calling site
+    if (!alloc.resize(str, j)) {
+        defer alloc.free(str);
+        return alloc.dupe(u8, str[0..j]) catch return error.OutOfMemory;
+    }
+    return str[0..j];
+}
+
+test "dupeAndUnescape" {
+    var arena = std.heap.ArenaAllocator.init(testing.allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
+
+    const duped = try dupeAndUnescape(testing.allocator, "test");
+    defer testing.allocator.free(duped);
+    try testing.expectEqualSlices(u8, "test", duped);
+    const duped2 = try dupeAndUnescape(testing.allocator, "a&lt;b&amp;c&gt;d&quot;e&apos;f&lt;");
+    defer testing.allocator.free(duped2);
+    try testing.expectEqualSlices(u8, "a<b&c>d\"e'f<", duped2);
+    try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&"));
+    try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&&"));
+    try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&test;"));
+    try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&boa"));
+}
+
+test "Top level comments" {
+    var arena = std.heap.ArenaAllocator.init(testing.allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
+
+    const doc = try parse(alloc, "<?xml version='aa'?><!--comment--><python color='green'/><!--another comment-->");
+    try testing.expectEqualSlices(u8, "python", doc.root.tag);
+}
diff --git a/src/service.zig b/src/service.zig
index 98db5e2..ef7beba 100644
--- a/src/service.zig
+++ b/src/service.zig
@@ -1798,12 +1798,16 @@ pub const DataService = struct {
             .splits => "/splits",
             .etf_profile => return false, // not served
             .meta => return false,
-            // New variants wired into the endpoint mapping by
-            // Milestone 1 chunk 3 (DataService methods). For now
-            // they're not yet served; clients fall through to live
-            // provider fetch via getClassification / getEntityFacts /
-            // getEtfMetrics, which don't exist yet.
+            // Endpoint mapping for these will be wired when the
+            // corresponding `getClassification` / `getEntityFacts` /
+            // `getEtfMetrics` service methods land. Until then,
+            // server sync is a no-op for them.
             .classification, .etf_metrics, .entity_facts => return false,
+            // Provider-internal cache files (ticker-map indexes)
+            // are not served — clients fetch them directly from
+            // the SEC. The DataService caches the JSON via
+            // `Store` after fetching; the server has no role.
+            .tickers_funds, .tickers_companies => return false,
         };
 
         const full_url = std.fmt.allocPrint(self.allocator, "{s}/{s}{s}", .{ server_url, symbol, endpoint }) catch return false;