//! DataService -- unified data access layer for zfin. //! //! Encapsulates the "check cache -> fresh? return -> else fetch from provider -> cache -> return" //! pattern that was previously duplicated between CLI and TUI. Both frontends should use this //! as their sole data source. //! //! Provider selection is internal: each data type routes to the appropriate provider //! based on available API keys. Callers never need to know which provider was used. const std = @import("std"); const builtin = @import("builtin"); const log = std.log.scoped(.service); const Date = @import("Date.zig"); const Candle = @import("models/candle.zig").Candle; const Dividend = @import("models/dividend.zig").Dividend; const Split = @import("models/split.zig").Split; const OptionsChain = @import("models/option.zig").OptionsChain; const EarningsEvent = @import("models/earnings.zig").EarningsEvent; const Quote = @import("models/quote.zig").Quote; const EtfProfile = @import("models/etf_profile.zig").EtfProfile; const Holding = @import("models/etf_profile.zig").Holding; const SectorWeight = @import("models/etf_profile.zig").SectorWeight; const Config = @import("Config.zig"); const cache = @import("cache/store.zig"); const srf = @import("srf"); const analysis = @import("analytics/analysis.zig"); const transaction_log = @import("models/transaction_log.zig"); const TwelveData = @import("providers/twelvedata.zig").TwelveData; const Polygon = @import("providers/polygon.zig").Polygon; const Fmp = @import("providers/fmp.zig").Fmp; const Cboe = @import("providers/cboe.zig").Cboe; const OpenFigi = @import("providers/openfigi.zig"); const Yahoo = @import("providers/yahoo.zig").Yahoo; const Tiingo = @import("providers/tiingo.zig").Tiingo; const Wikidata = @import("providers/Wikidata.zig"); const Edgar = @import("providers/Edgar.zig"); const classification = @import("models/classification.zig"); const fmt = @import("format.zig"); const performance = @import("analytics/performance.zig"); const http = @import("net/http.zig"); const atomic = @import("atomic.zig"); // ── Wall-clock policy ──────────────────────────────────────── // // `FetchResult.timestamp` records when a given fetch or cached-read // completed. Each `std.Io.Timestamp.now(self.io, .real)` call in // this file stamps one specific fetch — a single command invocation // produces many fetches, each with its own real-time stamp. Threading // `now_s` in from the caller would collapse all per-fetch timestamps to // the command-entry time, which is not what callers want when they // display "fetched 3s ago" for some symbols and "cached 2d ago" for // others in the same command. pub const DataError = error{ NoApiKey, FetchFailed, CacheError, ParseError, OutOfMemory, /// Transient provider failure (server error, connection issue). /// Caller should stop and retry later. TransientError, /// Provider auth failure (bad API key). Entire refresh should stop. AuthError, /// Provider returned a rate-limit response (e.g. SEC EDGAR's /// 10-req/sec ceiling, or a free-tier candle API's per-minute /// cap). Caller should stop the current batch and surface a /// "try again later" message; /// retrying immediately will just hit the same limit. RateLimited, /// Provider responded but doesn't have data for the requested /// symbol (404, "Error Message" body, or equivalent). Distinct /// from `FetchFailed` so callers (e.g. `enrich`) can tell the /// user "this symbol isn't in the provider's catalog; mark it /// manually" instead of an opaque "fetch failed." NotFound, }; /// Per-call options controlling cache vs network behavior. Drives /// the `--refresh-data` global flag's three modes: /// /// - `--refresh-data=auto` → `.{}` (default; respect TTL, fetch on stale/miss). /// - `--refresh-data=never` → `.{ .skip_network = true }` (offline mode; /// return cached data even if stale, treat cache miss as unavailable). /// - `--refresh-data=force` → `.{ .force_refresh = true }` (ignore cache TTL, /// fetch fresh from provider). /// /// `skip_network` and `force_refresh` represent contradictory intents. /// The CLI flag cannot produce the combination — `RefreshPolicy` is a /// 3-variant enum, so the user can never set both. But because the /// underlying shape is two independent booleans, an internal caller /// constructing `FetchOptions` directly *could* produce the /// combination. When both are true, **`skip_network` wins**: /// /// - The call returns cached data (fresh or stale, whatever's there). /// - `force_refresh` has no effect — no network is touched. /// /// This is the safe default: when in doubt, don't reach the network. /// Internal callers that genuinely want fresh data should set /// `force_refresh = true, skip_network = false`. pub const FetchOptions = struct { /// Skip provider fetches and server sync. Returns cached data /// (even if stale) or null/empty on cache miss. Wins over /// `force_refresh` when both are set. skip_network: bool = false, /// Force a fresh fetch ignoring cache TTL. No-op when /// `skip_network` is also set. force_refresh: bool = false, }; /// Decide whether a provider failure is permanent enough to merit a /// negative-cache entry. Negative entries suppress retries until the /// next manual `--refresh-data=force` / `cache clear`, so writing one is only /// safe when we're confident more attempts won't succeed. /// /// Today the only certain-permanent failure is `NotFound`: the symbol /// just doesn't have data of this type at this provider. Everything /// else (rate limit, network blip, server 5xx, auth, parse error) is /// either transient or fixable; recording a negative entry would /// silently suppress retries for hours/days. /// /// Rate-limit (`error.RateLimited`) is excluded here because callers /// handle it specially (single retry after backoff). Anything that /// reaches this classifier and isn't `NotFound` returns false → /// caller returns `FetchFailed` without poisoning the cache. pub fn isPermanentProviderFailure(err: anyerror) bool { return err == error.NotFound; } /// Result of a CUSIP-to-ticker lookup (provider-agnostic). pub const CusipResult = OpenFigi.FigiResult; /// Result of an EDGAR ticker-map fallback lookup. Returned by /// `DataService.lookupEdgarFallback` so commands consume a /// digested shape instead of pulling in `TickerMap` / /// `MutualFundTickerEntry` / `CompanyTickerEntry` (those are /// provider-internal). /// /// `enrich` uses this to decide what metadata.srf line to emit /// when Wikidata had no match for a symbol. pub const EdgarLookup = union(enum) { /// Symbol matched the EDGAR mutual-fund / managed-fund map. /// Generic "Fund" label (the `tickers_funds.srf` file mixes /// mutual funds and series-of-trust ETFs; we can't tell /// which without digging into submissions metadata). managed_fund, /// Symbol matched the EDGAR company / UIT map. `title` is /// the entry's `title` (e.g. "SPDR S&P 500 ETF TRUST"), /// allocated by the service's allocator — caller frees with /// `freeEdgarLookup` when done. The `is_etf` flag is set /// when the title contains "ETF" or "TRUST" — operating /// companies usually have Wikidata coverage and wouldn't /// reach this fallback, so a UIT-style hit is almost /// certainly an ETF. company_or_uit: struct { title: ?[]const u8, is_etf: bool }, /// Symbol not in either EDGAR map. none, }; /// Free any owned strings inside an `EdgarLookup`. Currently /// only `.company_or_uit.title` is owned; `.managed_fund` and /// `.none` are no-ops. pub fn freeEdgarLookup(allocator: std.mem.Allocator, lookup: EdgarLookup) void { switch (lookup) { .company_or_uit => |c| if (c.title) |t| allocator.free(t), .managed_fund, .none => {}, } } /// Look up `sym` in the supplied EDGAR ticker maps. Pure data /// transform; no I/O. Returns the borrowing-shape result. /// /// Both maps may be null (caller failed to load one or both). /// A null map produces a `none` result for that pass. /// /// On `.company_or_uit`, the returned `title` is duped from the /// underlying entry using `allocator` so the caller can use it /// after the maps are freed. Free with `freeEdgarLookup`. fn lookupInTickerMaps( allocator: std.mem.Allocator, sym: []const u8, mf_map: ?*const Edgar.TickerMap(Edgar.MutualFundTickerEntry), co_map: ?*const Edgar.TickerMap(Edgar.CompanyTickerEntry), ) EdgarLookup { if (mf_map) |m| { if (m.get(sym)) |_| return .managed_fund; } if (co_map) |m| { if (m.get(sym)) |entry| { const title_owned: ?[]const u8 = if (entry.title) |t| allocator.dupe(u8, t) catch null else null; const title_for_check = title_owned orelse ""; const is_etf = std.ascii.indexOfIgnoreCase(title_for_check, "ETF") != null or std.ascii.indexOfIgnoreCase(title_for_check, "TRUST") != null; return .{ .company_or_uit = .{ .title = title_owned, .is_etf = is_etf } }; } } return .none; } /// Indicates whether the returned data came from cache or was freshly fetched. pub const Source = enum { cached, fetched, }; /// In-memory payload shape for a fetched type `T`. /// /// Almost everything is a slice of records (`[]Candle`, `[]Dividend`, /// …) — the same shape the cache stores. `EtfProfile` is the lone /// exception: `getEtfProfile` assembles a single struct from the /// `etf_metrics` cache rather than returning a slice, so its payload /// is the struct itself. The cache layer never stores `EtfProfile` /// directly, which is why this single-struct knowledge lives here in /// the fetch layer rather than in `Store.DataFor`. fn PayloadFor(comptime T: type) type { return if (T == EtfProfile) EtfProfile else []T; } /// Generic result type for all fetch operations: data payload + provenance metadata. /// /// `data` is owned by `allocator` — call `result.deinit()` to release /// it (both the outer slice/struct and any nested owned fields). This /// replaces the earlier "caller frees with whatever allocator they /// happen to have" pattern, which was error-prone when the caller's /// allocator (e.g. an arena) differed from the service's allocator. pub fn FetchResult(comptime T: type) type { return struct { data: PayloadFor(T), source: Source, timestamp: i64, /// Allocator that owns `data`. Populated by the service on /// every return path; callers use it via `deinit` rather than /// touching it directly. allocator: std.mem.Allocator, /// Free `data` and any nested owned fields. /// /// Dispatches at comptime: /// - If `T` has a `freeSlice` helper (Dividend, OptionsChain), /// call it — handles element deinit plus the outer slice. /// - Else if `data` is a slice (Candle, Split, EarningsEvent), /// do a simple slice free. /// - Else if `T` has a `deinit` method (EtfProfile), call it /// on the struct itself. pub fn deinit(self: @This()) void { const DT = @TypeOf(self.data); if (@hasDecl(T, "freeSlice")) { T.freeSlice(self.allocator, self.data); } else if (@typeInfo(DT) == .pointer) { self.allocator.free(self.data); } else if (@hasDecl(T, "deinit")) { self.data.deinit(self.allocator); } } }; } // ── PostProcess callbacks ──────────────────────────────────── // `Store.read` parses with `parse_allocator = .{ .allocator = ... }`, // so SRF dupes every owned string into the caller's allocator // automatically. PostProcess callbacks remain only for non-trivial // post-parse logic (e.g. recomputing derived fields). String duping // is NOT a valid reason to add a postProcess. /// Recompute surprise/surprise_percent from actual and estimate fields. /// SRF only stores actual and estimate; surprise is derived. fn earningsPostProcess(ev: *EarningsEvent, _: std.mem.Allocator) anyerror!void { if (ev.actual != null and ev.estimate != null) { ev.surprise = ev.actual.? - ev.estimate.?; if (ev.estimate.? != 0) { ev.surprise_percent = (ev.surprise.? / @abs(ev.estimate.?)) * 100.0; } } } pub const DataService = struct { /// Thread-safe wrapper over the caller-provided base allocator. /// /// Why this exists: `parallelServerSync` spawns worker threads that /// each allocate through `DataService` — HTTP client init, TLS cert /// bundle parsing, request/response buffers, and `Store.writeRaw` /// path joins. The CLI's root allocator is an `ArenaAllocator` /// (`src/main.zig`), which is NOT thread-safe. Unsynchronized /// concurrent allocs from workers corrupt the arena's free list. /// Symptoms seen in the wild: /// /// thread N panic: reached unreachable code /// std/mem/Allocator.zig:147 grow /// std/hash_map.zig:1296 addCertsFromFile /// std/crypto/Certificate/Bundle.zig:206 request /// std/http/Client.zig:1789 request /// src/net/http.zig:43 syncFromServer /// /// and bare segfaults mid-heap on whatever pointer the arena /// scrambled that run. /// /// The wrapper serializes every allocation with a mutex. Cost is /// one lock acquire/release per alloc — negligible next to the I/O /// Thread-safe allocator used for all DataService-internal allocations. /// /// In Zig 0.16, the Juicy-Main-provided `init.gpa` (DebugAllocator) /// is thread-safe by default when not single-threaded, and /// `ArenaAllocator` is thread-safe and lock-free. Callers should /// pass whichever thread-safe allocator is appropriate — we no /// longer wrap it ourselves. /// /// DO NOT add an "unwrap" method or pass a non-thread-safe /// allocator. The point is that internal callers don't need to /// know whether they're running under threads — the allocator /// itself guarantees safety. allocator: std.mem.Allocator, io: std.Io, config: Config, // Lazily initialized providers (null until first use) td: ?TwelveData = null, pg: ?Polygon = null, fmp: ?Fmp = null, cboe: ?Cboe = null, yh: ?Yahoo = null, tg: ?Tiingo = null, wikidata: ?Wikidata = null, edgar: ?Edgar = null, /// Test-only guard: when true, any code path that would touch /// the network panics with a clear message. Used by offline-mode /// tests to verify that `FetchOptions.skip_network = true` /// genuinely doesn't reach the network. Default false; never /// set in production. panic_on_network_attempt: bool = false, pub fn init(io: std.Io, allocator: std.mem.Allocator, config: Config) DataService { const self = DataService{ .allocator = allocator, .io = io, .config = config, }; // Missing-key warnings are noise under `zig build test` where // every test that spins up a DataService re-emits the whole // block. Real users always see them at CLI/TUI startup. if (!builtin.is_test) self.logMissingKeys(); return self; } /// Log warnings for missing API keys so users know which features are unavailable. fn logMissingKeys(self: DataService) void { // Primary candle provider if (self.config.tiingo_key == null) { log.warn("TIINGO_API_KEY not set — candle data will fall back to TwelveData/Yahoo", .{}); } // Dividend/split data if (self.config.polygon_key == null) { log.warn("POLYGON_API_KEY not set — dividend and split data unavailable", .{}); } // Earnings data if (self.config.fmp_key == null) { log.warn("FMP_API_KEY not set — earnings data unavailable", .{}); } // ETF profiles + portfolio enrichment now go through public // SEC EDGAR + Wikidata. Both require a contact email in // outbound User-Agents (SEC's policy). if (self.config.user_email == null) { log.warn("ZFIN_USER_EMAIL not set — ETF profiles + enrichment unavailable", .{}); } // Candle fallback if (self.config.twelvedata_key == null and self.config.tiingo_key == null) { log.warn("TWELVEDATA_API_KEY not set — no candle fallback if Yahoo fails", .{}); } // CUSIP lookups if (self.config.openfigi_key == null) { log.info("OPENFIGI_API_KEY not set — CUSIP lookups will use anonymous rate limits", .{}); } } pub fn deinit(self: *DataService) void { if (self.td) |*td| td.deinit(); if (self.pg) |*pg| pg.deinit(); if (self.fmp) |*fmp| fmp.deinit(); if (self.cboe) |*c| c.deinit(); if (self.yh) |*yh| yh.deinit(); if (self.tg) |*tg| tg.deinit(); if (self.wikidata) |*w| w.deinit(); if (self.edgar) |*e| e.deinit(); } // ── Provider accessor ────────────────────────────────────────── fn getProvider(self: *DataService, comptime T: type) DataError!*T { const field_name = comptime providerField(T); if (@field(self, field_name)) |*p| return p; if (T == Cboe or T == Yahoo) { // CBOE and Yahoo have no API key @field(self, field_name) = T.init(self.io, self.allocator); } else if (T == Wikidata or T == Edgar) { // Open-data providers identified by contact email rather // than an API key. The email goes in User-Agent + From // headers per each provider's politeness contract. const email = self.config.user_email orelse return DataError.NoApiKey; @field(self, field_name) = T.init(self.io, self.allocator, email); } else { // All we're doing here is lower casing the type name, then // appending _key to it, so Tiingo -> tiingo_key const config_key = comptime blk: { const full = @typeName(T); var start: usize = 0; for (full, 0..) |c, i| { if (c == '.') start = i + 1; } const short = full[start..]; var buf: [short.len + 4]u8 = undefined; _ = std.ascii.lowerString(buf[0..short.len], short); @memcpy(buf[short.len..][0..4], "_key"); break :blk buf[0 .. short.len + 4]; }; const key = @field(self.config, config_key) orelse return DataError.NoApiKey; @field(self, field_name) = T.init(self.io, self.allocator, key); } return &@field(self, field_name).?; } fn providerField(comptime T: type) []const u8 { inline for (std.meta.fields(DataService)) |f| { if (f.type == ?T) return f.name; } @compileError("unknown provider type"); } // ── Cache helper ───────────────────────────────────────────── fn store(self: *DataService) cache.Store { return cache.Store.init(self.io, self.allocator, self.config.cache_dir); } /// Generic fetch-or-cache for simple data types (dividends, splits, options). /// Checks cache first; on miss, fetches from the appropriate provider, /// writes to cache, and returns. On permanent fetch failure, writes a negative /// cache entry. Rate limit failures are retried once. /// /// `opts.skip_network = true` → returns cached data even if stale, /// returns FetchFailed on cache miss without touching the network. /// `opts.force_refresh = true` → treats cache as stale and fetches. fn fetchCached( self: *DataService, comptime T: type, symbol: []const u8, comptime postProcess: ?*const fn (*T, std.mem.Allocator) anyerror!void, opts: FetchOptions, ) DataError!FetchResult(T) { var s = self.store(); const data_type = comptime cache.Store.dataTypeFor(T); // Force-refresh skips the fresh-cache early return; falls // through to provider fetch. Skip-network does the opposite: // returns cached even if stale, never touches the network. if (!opts.force_refresh) { if (s.read(self.allocator, T, symbol, postProcess, .fresh_only)) |cached| { log.debug("{s}: {s} fresh in local cache", .{ symbol, @tagName(data_type) }); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } } if (opts.skip_network) { // Offline mode: return whatever's cached, even if stale. // Cache miss is FetchFailed (not a network error). if (s.read(self.allocator, T, symbol, postProcess, .any)) |cached| { log.info("{s}: {s} stale-cached returned (skip_network)", .{ symbol, @tagName(data_type) }); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } return DataError.FetchFailed; } // Try server sync before hitting providers (skipped on force_refresh). if (!opts.force_refresh and self.syncFromServer(symbol, data_type)) { if (s.read(self.allocator, T, symbol, postProcess, .fresh_only)) |cached| { log.debug("{s}: {s} synced from server and fresh", .{ symbol, @tagName(data_type) }); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } log.debug("{s}: {s} synced from server but stale, falling through to provider", .{ symbol, @tagName(data_type) }); } log.debug("{s}: fetching {s} from provider", .{ symbol, @tagName(data_type) }); self.assertNetworkAllowed("fetchCached fetchFromProvider"); const fetched = self.fetchFromProvider(T, symbol) catch |err| { if (err == error.RateLimited) { // Wait and retry once self.rateLimitBackoff(); const retried = self.fetchFromProvider(T, symbol) catch { return DataError.FetchFailed; }; s.writeWithSource(T, symbol, retried, data_type.ttl(), sourceHintFor(T)); return .{ .data = retried, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } // Only NotFound (provider says "this symbol genuinely has // no data of this type") gets a negative-cache entry. // Transient failures (network, 5xx, auth misconfig, parse // error) propagate as FetchFailed without poisoning the // cache, so the next call retries naturally. if (isPermanentProviderFailure(err)) { s.writeNegative(symbol, data_type); } return DataError.FetchFailed; }; s.writeWithSource(T, symbol, fetched, data_type.ttl(), sourceHintFor(T)); return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } /// Map the model type fetched via `fetchCached` back to the /// provider it came from, so the merge primitive's `info(cache)` /// log lines can attribute new entries / field upgrades to a /// named source. Returns null for types where the source name /// isn't useful (the merge primitive only consults this for /// Dividend and Split). fn sourceHintFor(comptime T: type) ?[]const u8 { return switch (T) { Dividend, Split => "polygon", else => null, }; } /// Dispatch a fetch to the correct provider based on model type. fn fetchFromProvider(self: *DataService, comptime T: type, symbol: []const u8) !cache.Store.DataFor(T) { return switch (T) { Dividend => { // Polygon is the primary source: it carries // forward-looking declared dividends (e.g. ARCC's // 2026-06-15 ex_date), which Tiingo's price-series // response does not. Tiingo opportunistically // supplements the cache via `populateAllFromTiingo` // when candle fetches happen — that path uses the // sorted-union write semantics in // `cache.Store.writeMerged`, so Polygon's entries // and Tiingo's entries coexist in `dividends.srf` // without overwriting each other. var pg = try self.getProvider(Polygon); return pg.fetchDividends(self.allocator, symbol, null, null); }, Split => { // Same rationale as Dividend above. Polygon also // carries forward-looking split announcements that // Tiingo's price-series doesn't surface. var pg = try self.getProvider(Polygon); return pg.fetchSplits(self.allocator, symbol); }, OptionsChain => { var cboe = try self.getProvider(Cboe); return cboe.fetchOptionsChain(self.allocator, symbol); }, else => @compileError("unsupported type for fetchFromProvider"), }; } /// Fetch candles, dividends, and splits from Tiingo in a single /// HTTP call and write all three caches. Returns the triple so /// the caller can use the data without re-reading from disk. /// /// This is the orchestrated "cold cache" path. `getCandles` /// (cold-cache full fetch) calls this so a single Tiingo HTTP /// request populates `candles_daily.srf`, `candles_meta.srf`, /// `dividends.srf`, and `splits.srf` together. Tiingo's /// per-row `divCash` and `splitFactor` make this almost free. /// /// For dividends and splits the writes go through /// `writeWithSource` with `"tiingo"` as the source hint. The /// underlying `writeMerged` primitive merges Tiingo's view /// into whatever's already on disk (typically Polygon-sourced /// records), preserving forward-looking entries Polygon /// uniquely carries. New entries trigger an `info(cache)` log /// line attributing the discovery to Tiingo — useful when /// Tiingo surfaces a corporate action Polygon missed (the /// canonical case is SPYM's 2017-10-16 4:1 split). /// /// `from` is fixed at 2000-01-01 to cover any 10Y trailing-return /// window even when `--as-of` back-dates the reference to the /// earliest imported portfolio data (currently 2014). The extra /// few years of pre-2004 candles cost ~150 KB per symbol on disk /// and a one-time bandwidth bump on cold-cache fetch, both /// trivial. Also gives a comfortable buffer for older corporate /// actions (e.g. SPYM's 2017-10-16 split, deep-history reverse /// splits on legacy tickers). fn populateAllFromTiingo(self: *DataService, symbol: []const u8) !@import("providers/tiingo.zig").CandleAndCorporateActions { var tg = try self.getProvider(Tiingo); const today = fmt.todayDate(self.io); const from = Date.fromYmd(2000, 1, 1); const triple = try tg.fetchCandlesAndCorporateActions(self.allocator, symbol, from, today); var s = self.store(); // Candles + meta — `cacheCandles` writes both candles_daily.srf // and candles_meta.srf in one shot (last_close, last_date, // provider, fail_count=0). if (triple.candles.len > 0) { s.cacheCandles(symbol, triple.candles, .tiingo, 0); } // Dividends and splits use the merge write path so Tiingo's // view supplements rather than replaces existing (typically // Polygon-sourced) records. New entries are logged with // "tiingo" attribution. s.writeWithSource(Dividend, symbol, triple.dividends, cache.DataType.dividends.ttl(), "tiingo"); s.writeWithSource(Split, symbol, triple.splits, cache.DataType.splits.ttl(), "tiingo"); return triple; } /// Invalidate cached data for a symbol so the next get* call forces a fresh fetch. pub fn invalidate(self: *DataService, symbol: []const u8, data_type: cache.DataType) void { var s = self.store(); s.clearData(symbol, data_type); // Also clear candle metadata when invalidating candle data if (data_type == .candles_daily) { s.clearData(symbol, .candles_meta); } } // ── Public data methods ────────────────────────────────────── /// Fetch candles from providers with error classification. /// /// Error handling: /// - ServerError/RateLimited/RequestFailed from Tiingo → TransientError (stop refresh, retry later) /// - NotFound/ParseError/InvalidResponse from Tiingo → try Yahoo (symbol-level issue) /// - Unauthorized → TransientError (config problem, stop refresh) /// /// The `preferred` param controls incremental fetch consistency: use the same /// provider that sourced the existing cache data. fn fetchCandlesFromProviders( self: *DataService, symbol: []const u8, from: Date, to: Date, preferred: cache.Store.CandleProvider, ) (DataError || error{NotFound})!struct { candles: []Candle, provider: cache.Store.CandleProvider } { // If preferred is Yahoo (degraded symbol), try Yahoo first if (preferred == .yahoo) { if (self.getProvider(Yahoo)) |yh| { if (yh.fetchCandles(self.allocator, symbol, from, to)) |candles| { log.debug("{s}: candles from Yahoo (preferred)", .{symbol}); return .{ .candles = candles, .provider = .yahoo }; } else |err| { log.warn("{s}: Yahoo (preferred) failed: {s}", .{ symbol, @errorName(err) }); } } else |_| {} } // Primary: Tiingo if (self.getProvider(Tiingo)) |tg| { if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| { log.debug("{s}: candles from Tiingo", .{symbol}); return .{ .candles = candles, .provider = .tiingo }; } else |err| { log.warn("{s}: Tiingo failed: {s}", .{ symbol, @errorName(err) }); if (err == error.Unauthorized) { log.err("{s}: Tiingo auth failed — check TIINGO_API_KEY", .{symbol}); return DataError.AuthError; } if (err == error.RateLimited) { // Rate limited: back off and retry — this is expected, not a failure log.info("{s}: Tiingo rate limited, backing off", .{symbol}); self.rateLimitBackoff(); if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| { log.debug("{s}: candles from Tiingo (after rate limit backoff)", .{symbol}); return .{ .candles = candles, .provider = .tiingo }; } else |retry_err| { log.warn("{s}: Tiingo retry after backoff failed: {s}", .{ symbol, @errorName(retry_err) }); if (retry_err == error.RateLimited) { // Still rate limited after backoff — one more try self.rateLimitBackoff(); if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| { log.debug("{s}: candles from Tiingo (after second backoff)", .{symbol}); return .{ .candles = candles, .provider = .tiingo }; } else |_| {} } // Exhausted rate limit retries — treat as transient return DataError.TransientError; } } if (isTransientError(err)) { // Server error or connection failure — stop, don't fall back return DataError.TransientError; } // NotFound, ParseError, InvalidResponse — symbol-level issue, try Yahoo log.info("{s}: Tiingo does not have this symbol, trying Yahoo", .{symbol}); } } else |_| { log.warn("{s}: Tiingo provider not available (no API key?)", .{symbol}); } // Fallback: Yahoo (symbol not on Tiingo) if (preferred != .yahoo) { if (self.getProvider(Yahoo)) |yh| { if (yh.fetchCandles(self.allocator, symbol, from, to)) |candles| { log.info("{s}: candles from Yahoo (Tiingo fallback)", .{symbol}); return .{ .candles = candles, .provider = .yahoo }; } else |err| { log.warn("{s}: Yahoo fallback also failed: {s}", .{ symbol, @errorName(err) }); } } else |_| { log.warn("{s}: Yahoo provider not available", .{symbol}); } } return DataError.FetchFailed; } /// Classify whether a provider error is transient (provider is down). /// ServerError = HTTP 5xx, RequestFailed = connection/network failure. /// Note: RateLimited and Unauthorized are handled separately. fn isTransientError(err: anyerror) bool { return err == error.ServerError or err == error.RequestFailed; } /// Centralized "are we about to touch the network?" gate. Tests /// set `panic_on_network_attempt` to assert that offline-mode /// paths never reach this site. Production callers always pass. /// Inline so the panic body is only generated when the field is /// actually checked (no overhead on the false branch). inline fn assertNetworkAllowed(self: *DataService, context: []const u8) void { if (self.panic_on_network_attempt) { std.debug.panic("network attempted in offline-mode test: {s}", .{context}); } } /// Fetch daily candles for a symbol (10+ years for trailing returns). /// Checks cache first; fetches from Tiingo (primary) or Yahoo (fallback) if stale/missing. /// Uses incremental updates: when the cache is stale, only fetches /// candles newer than the last cached date rather than re-fetching /// the entire history. /// /// `opts.skip_network = true` → returns cached data even if stale, /// returns FetchFailed on cache miss without touching the network. /// `opts.force_refresh = true` → treats cache as stale and fetches. pub fn getCandles(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Candle) { var s = self.store(); const today = fmt.todayDate(self.io); // Check candle metadata for freshness (tiny file, no candle deserialization) const meta_result = s.readCandleMeta(symbol); if (meta_result) |mr| { const m = mr.meta; // Offline mode: return cached data without touching the // network. Cache miss / TwelveData-only cache is treated // as unavailable. if (opts.skip_network) { if (m.provider == .twelvedata) { log.debug("{s}: skip_network and only TwelveData cached — treating as unavailable", .{symbol}); return DataError.FetchFailed; } if (s.read(self.allocator, Candle, symbol, null, .any)) |r| { if (!s.isCandleMetaFresh(symbol)) { log.info("{s}: candles stale-cached returned (skip_network)", .{symbol}); } return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator }; } return DataError.FetchFailed; } // If cached data is from TwelveData (deprecated for candles due to // unreliable adj_close), skip cache and fall through to full re-fetch. if (m.provider == .twelvedata) { log.debug("{s}: cached candles from TwelveData — forcing full re-fetch", .{symbol}); } else if (!opts.force_refresh and s.isCandleMetaFresh(symbol)) { // Fresh — deserialize candles and return log.debug("{s}: candles fresh in local cache", .{symbol}); if (s.read(self.allocator, Candle, symbol, null, .any)) |r| return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator }; } else { // Stale — try server sync before incremental fetch. // (Force-refresh skips server sync too: the user explicitly // asked for fresh provider data.) if (!opts.force_refresh and self.syncCandlesFromServer(symbol)) { if (s.isCandleMetaFresh(symbol)) { log.debug("{s}: candles synced from server and fresh", .{symbol}); if (s.read(self.allocator, Candle, symbol, null, .any)) |r| return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } log.debug("{s}: candles synced from server but stale, falling through to incremental fetch", .{symbol}); } // Stale — try incremental update using last_date from meta const fetch_from = m.last_date.addDays(1); // If last cached date is today or later, just refresh the TTL (meta only) if (!fetch_from.lessThan(today)) { s.updateCandleMeta(symbol, m.last_close, m.last_date, m.provider, m.fail_count); if (s.read(self.allocator, Candle, symbol, null, .any)) |r| return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } else { // Incremental fetch from day after last cached candle self.assertNetworkAllowed("getCandles incremental fetchCandlesFromProviders"); const result = self.fetchCandlesFromProviders(symbol, fetch_from, today, m.provider) catch |err| { if (err == DataError.TransientError) { // Increment fail_count for this symbol const new_fail_count = m.fail_count +| 1; // saturating add log.warn("{s}: transient failure (fail_count now {d})", .{ symbol, new_fail_count }); s.updateCandleMeta(symbol, m.last_close, m.last_date, m.provider, new_fail_count); // If degraded (fail_count >= 3), return stale data rather than failing if (new_fail_count >= 3) { log.warn("{s}: degraded after {d} consecutive failures, returning stale data", .{ symbol, new_fail_count }); if (s.read(self.allocator, Candle, symbol, null, .any)) |r| return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator }; } return DataError.TransientError; } // Non-transient failure — return stale data if available if (s.read(self.allocator, Candle, symbol, null, .any)) |r| return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator }; return DataError.FetchFailed; }; const new_candles = result.candles; if (new_candles.len == 0) { // No new candles (weekend/holiday) — refresh TTL, reset fail_count self.allocator.free(new_candles); s.updateCandleMeta(symbol, m.last_close, m.last_date, result.provider, 0); if (s.read(self.allocator, Candle, symbol, null, .any)) |r| return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } else { // Append new candles to existing file + update meta, reset fail_count s.appendCandles(symbol, new_candles, result.provider, 0); if (s.read(self.allocator, Candle, symbol, null, .any)) |r| { self.allocator.free(new_candles); return .{ .data = r.data, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } return .{ .data = new_candles, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } } } } // Offline mode + no usable cache — give up. if (opts.skip_network) { log.debug("{s}: skip_network and no cached candles — unavailable", .{symbol}); return DataError.FetchFailed; } // No usable cache — try server sync first (skipped on force_refresh). if (!opts.force_refresh and self.syncCandlesFromServer(symbol)) { if (s.isCandleMetaFresh(symbol)) { log.debug("{s}: candles synced from server and fresh (no prior cache)", .{symbol}); if (s.read(self.allocator, Candle, symbol, null, .any)) |r| return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } log.debug("{s}: candles synced from server but stale, falling through to full fetch", .{symbol}); } // No usable cache — full fetch via the orchestrated Tiingo // helper, which writes candles + dividends + splits caches in // one shot from a single HTTP response. The fixed start date // (see `populateAllFromTiingo`) is 2000-01-01, deep enough to // cover a 10Y trailing-return window even when `--as-of` // back-dates the reference into 2014-era imported portfolio // history, plus a buffer for older corporate actions like // SPYM's 2017-10-16 split. log.debug("{s}: fetching full candle history from provider", .{symbol}); self.assertNetworkAllowed("getCandles full populateAllFromTiingo"); const triple = self.populateAllFromTiingo(symbol) catch |err| { if (err == error.RateLimited or err == error.ServerError or err == error.RequestFailed) { // Transient: increment fail_count on existing meta so // we know to back off if this keeps happening. if (meta_result) |mr| { const new_fail_count = mr.meta.fail_count +| 1; s.updateCandleMeta(symbol, mr.meta.last_close, mr.meta.last_date, mr.meta.provider, new_fail_count); } return DataError.TransientError; } // NotFound, ParseError, InvalidResponse, AuthError — // symbol genuinely has no candle data on Tiingo (the only // provider for historical candles since the 2026-05 // audit). Negative-cache so we don't keep retrying. s.writeNegative(symbol, .candles_daily); return DataError.FetchFailed; }; // populateAllFromTiingo writes all three caches itself; we // free the slices we don't return. defer Dividend.freeSlice(self.allocator, triple.dividends); defer self.allocator.free(triple.splits); return .{ .data = triple.candles, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } /// Fetch dividend history for a symbol. pub fn getDividends(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Dividend) { return self.fetchCached(Dividend, symbol, null, opts); } /// Fetch split history for a symbol. pub fn getSplits(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Split) { return self.fetchCached(Split, symbol, null, opts); } /// Fetch options chain for a symbol (all expirations, no API key needed). pub fn getOptions(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(OptionsChain) { return self.fetchCached(OptionsChain, symbol, null, opts); } /// Fetch earnings history for a symbol. /// Checks cache first; fetches from FMP if stale/missing. /// Smart refresh: even if cache is fresh, re-fetches when a past earnings /// date has no actual results yet (i.e. results just came out). /// /// `opts.skip_network = true` → returns cached data even if stale, /// returns FetchFailed on cache miss without touching the network. /// `opts.force_refresh = true` → treats cache as stale and fetches. pub fn getEarnings(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(EarningsEvent) { // Mutual funds (5-letter tickers ending in X) don't have quarterly earnings. if (isMutualFund(symbol)) { return .{ .data = &.{}, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } var s = self.store(); const today = fmt.todayDate(self.io); if (!opts.force_refresh) { if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .fresh_only)) |cached| { // Check if any past/today earnings event is still missing actual results. // If so, the announcement likely just happened — force a refresh. // (Suppressed when opts.skip_network — offline mode never refetches.) const needs_refresh = if (opts.skip_network) false else for (cached.data) |ev| { if (ev.actual == null and !today.lessThan(ev.date)) break true; } else false; if (!needs_refresh) { log.debug("{s}: earnings fresh in local cache", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } // Stale: free cached events and re-fetch below self.allocator.free(cached.data); } } if (opts.skip_network) { // Offline mode: fall back to any cached entry (even stale) before giving up. if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .any)) |cached| { log.info("{s}: earnings stale-cached returned (skip_network)", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } return DataError.FetchFailed; } // Try server sync before hitting FMP (skipped on force_refresh). if (!opts.force_refresh and self.syncFromServer(symbol, .earnings)) { if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .fresh_only)) |cached| { log.debug("{s}: earnings synced from server and fresh", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } log.debug("{s}: earnings synced from server but stale, falling through to provider", .{symbol}); } log.debug("{s}: fetching earnings from provider", .{symbol}); self.assertNetworkAllowed("getEarnings fmp.fetchEarnings"); var fmp = try self.getProvider(Fmp); const fetched = fmp.fetchEarnings(self.allocator, symbol) catch |err| blk: { if (err == error.RateLimited) { self.rateLimitBackoff(); break :blk fmp.fetchEarnings(self.allocator, symbol) catch { return DataError.FetchFailed; }; } if (isPermanentProviderFailure(err)) { s.writeNegative(symbol, .earnings); } return DataError.FetchFailed; }; s.write(EarningsEvent, symbol, fetched, .{ .seconds = cache.Ttl.earnings }); return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } /// Fetch ETF profile for a symbol. Assembles a unified /// `EtfProfile` view from the EDGAR `etf_metrics` cache (profile /// + sectors + holdings) plus the Wikidata `classification` /// cache (inception_date, fund name fallback). Both underlying /// caches are managed by `getEtfMetrics` / `getClassification`; /// this function does not maintain its own cache. /// /// Several legacy fields that AlphaVantage used to populate /// (`expense_ratio`, `dividend_yield`, `portfolio_turnover`, /// `leveraged`) remain on `EtfProfile` but stay null here — /// EDGAR NPORT-P doesn't carry them. They'll fill in once a /// prospectus parser lands. /// /// `opts.skip_network = true` and `opts.force_refresh = true` /// are forwarded to `getEtfMetrics`. pub fn getEtfProfile(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(EtfProfile) { // Primary source: EDGAR ETF metrics. If the symbol isn't a // fund (or isn't in EDGAR), surface NotFound to the caller — // matches the old AlphaVantage behavior of returning empty // profiles for non-ETFs. const metrics = try self.getEtfMetrics(symbol, opts); defer metrics.deinit(); // Walk the EtfMetricRecord slice to extract profile + sectors // + holdings. The slice shape is "one .profile, then N // .sector, then M .holding" per `appendEtfMetricRecords`. var name: ?[]const u8 = null; errdefer if (name) |n| self.allocator.free(n); var net_assets: ?f64 = null; var sectors_buf: std.ArrayList(SectorWeight) = .empty; errdefer { for (sectors_buf.items) |s| self.allocator.free(s.name); sectors_buf.deinit(self.allocator); } var holdings_buf: std.ArrayList(Holding) = .empty; errdefer { for (holdings_buf.items) |h| { self.allocator.free(h.name); if (h.symbol) |s| self.allocator.free(s); if (h.cusip) |c| self.allocator.free(c); } holdings_buf.deinit(self.allocator); } for (metrics.data) |rec| switch (rec) { .profile => |p| { if (p.series_name) |sn| name = try self.allocator.dupe(u8, sn); net_assets = p.net_assets; }, .sector => |s| { try sectors_buf.append(self.allocator, .{ .name = try self.allocator.dupe(u8, s.description), .weight = s.pct_of_portfolio / 100.0, }); }, .holding => |h| { const sym_dup: ?[]const u8 = if (h.ticker) |t| try self.allocator.dupe(u8, t) else null; errdefer if (sym_dup) |s| self.allocator.free(s); const cusip_dup: ?[]const u8 = if (h.cusip) |c| try self.allocator.dupe(u8, c) else null; errdefer if (cusip_dup) |c| self.allocator.free(c); const name_dup = try self.allocator.dupe(u8, h.name); errdefer self.allocator.free(name_dup); try holdings_buf.append(self.allocator, .{ .symbol = sym_dup, .name = name_dup, .weight = h.pct_of_portfolio / 100.0, .cusip = cusip_dup, }); }, }; // Wikidata classification provides inception_date and a // higher-quality name. Best-effort: if the fetch fails we // still return the EDGAR-only profile. var inception_date: ?Date = null; if (self.getClassification(symbol, opts)) |class_result| { defer class_result.deinit(); for (class_result.data) |c| { if (c.inception_date) |idate_str| { if (Date.parse(idate_str)) |d| inception_date = d else |_| {} } // Prefer Wikidata's name if EDGAR didn't provide one. if (name == null) { if (c.name) |n| name = try self.allocator.dupe(u8, n); } } } else |_| {} const sectors_count = sectors_buf.items.len; const holdings_count = holdings_buf.items.len; const profile: EtfProfile = .{ .symbol = try self.allocator.dupe(u8, symbol), .name = name, .net_assets = net_assets, .holdings = if (holdings_count > 0) try holdings_buf.toOwnedSlice(self.allocator) else null, .total_holdings = if (holdings_count > 0) @intCast(holdings_count) else null, .sectors = if (sectors_count > 0) try sectors_buf.toOwnedSlice(self.allocator) else null, .inception_date = inception_date, }; // Free the empty ArrayLists we didn't consume via toOwnedSlice // (they own no allocations but the ArrayList struct itself // needs deinit when not handed off). if (holdings_count == 0) holdings_buf.deinit(self.allocator); if (sectors_count == 0) sectors_buf.deinit(self.allocator); return .{ .data = profile, .source = metrics.source, .timestamp = metrics.timestamp, .allocator = self.allocator, }; } // ── Wikidata + EDGAR providers ───────────────────────────────── /// Fetch the Wikidata classification record for a single symbol /// (name, sector, industry, country, inception date, CIK, /// instance-of). Cache-first; on miss, runs a 1-symbol batched /// SPARQL query. /// /// `opts.skip_network = true` returns cached data even if stale, /// `FetchFailed` on cache miss. `opts.force_refresh = true` /// ignores the cache and re-fetches. pub fn getClassification(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Wikidata.ClassificationRecord) { var s = self.store(); if (!opts.force_refresh) { if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| { log.debug("{s}: classification fresh in local cache", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } } if (opts.skip_network) { if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .any)) |cached| { log.info("{s}: classification stale-cached returned (skip_network)", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } return DataError.FetchFailed; } // Try server sync before hitting Wikidata. if (!opts.force_refresh and self.syncFromServer(symbol, .classification)) { if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| { log.debug("{s}: classification synced from server", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } } log.debug("{s}: fetching classification from Wikidata", .{symbol}); self.assertNetworkAllowed("getClassification wikidata.fetch"); var wd = try self.getProvider(Wikidata); const symbols = [_][]const u8{symbol}; const fetched = wd.fetch(self.allocator, &symbols) catch |err| { if (err == error.RateLimited) { self.rateLimitBackoff(); if (wd.fetch(self.allocator, &symbols)) |retried| { return self.finalizeClassification(symbol, retried, opts); } else |_| {} } log.warn("{s}: wikidata fetch failed: {s}", .{ symbol, @errorName(err) }); return DataError.FetchFailed; }; return self.finalizeClassification(symbol, fetched, opts); } /// Common post-Wikidata path: decide if the result is useful as /// returned, otherwise consult EDGAR to fill in the gaps, /// otherwise negative-cache. Either way the cache gets written /// and a `FetchResult` is returned (or `DataError.NotFound`). /// /// Takes ownership of `wikidata_records`. The slice is either /// returned as the result data, freed and replaced by a /// synthesized slice, or freed and the symbol negative-cached. fn finalizeClassification( self: *DataService, symbol: []const u8, wikidata_records: []Wikidata.ClassificationRecord, opts: FetchOptions, ) DataError!FetchResult(Wikidata.ClassificationRecord) { var s = self.store(); const ttl = cache.DataType.classification.ttl(); // Wikidata returned a useful row -> populate geo from // geoFor(country) and cache as-is. if (wikidata_records.len > 0 and wikidataLooksUseful(wikidata_records[0])) { try self.populateGeo(&wikidata_records[0]); s.write(Wikidata.ClassificationRecord, symbol, wikidata_records, ttl); return .{ .data = wikidata_records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } // Sparse or empty: try EDGAR fallback. `synthesizeClassification` // takes ownership of the wikidata slice (frees it, returns a // new one-element slice with the merged record). Returns // `error.NotFound` when even EDGAR has nothing. const merged = self.synthesizeClassification(symbol, wikidata_records, opts) catch |err| { if (err == error.NotFound) { s.writeNegative(symbol, .classification); return DataError.NotFound; } return DataError.FetchFailed; }; s.write(Wikidata.ClassificationRecord, symbol, merged, ttl); return .{ .data = merged, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } /// Populate `record.geo` from `geoFor(record.country)` when it /// isn't already set. Best-effort: if duping the geo string /// fails, leaves the field null and propagates the error so the /// caller can decide whether to bail. fn populateGeo(self: *DataService, record: *Wikidata.ClassificationRecord) !void { if (record.geo != null) return; const country = record.country orelse return; const g = classification.geoFor(country); if (std.mem.eql(u8, g, classification.geo.unknown)) return; record.geo = try self.allocator.dupe(u8, g); } /// Whether a Wikidata classification record carries enough /// downstream-usable data to skip the EDGAR fallback. A record /// with at least one of `is_etf`, `sector`, `country`, or /// `asset_class` set is "useful"; sparse records (e.g. SOXX /// getting only a `name` from Wikidata) need the EDGAR /// ticker-map fallback to fill in `is_etf=true, /// asset_class=ETF, country=US`. fn wikidataLooksUseful(c: Wikidata.ClassificationRecord) bool { if (c.is_etf) return true; if (c.asset_class != null) return true; if (c.country != null) return true; if (c.sector != null) return true; return false; } /// Synthesize a `ClassificationRecord` for a symbol that /// Wikidata couldn't classify usefully. Consults the EDGAR /// ticker maps; if found, also fetches `getEtfMetrics` to /// recover the NPORT-P series_name (more authoritative than /// the company_tickers title). Title-keyword inference fills /// in `sector` and `geo` when the name carries an unambiguous /// keyword. /// /// Takes ownership of `wikidata_records`: frees them at exit. /// Wikidata's `name`/`industry`/`inception_date`/`cik` fields /// are preserved into the synthesized record when present. /// Returns `error.NotFound` when EDGAR has nothing either. fn synthesizeClassification( self: *DataService, symbol: []const u8, wikidata_records: []Wikidata.ClassificationRecord, opts: FetchOptions, ) !cache.Store.DataFor(Wikidata.ClassificationRecord) { defer Wikidata.ClassificationRecord.freeSlice(self.allocator, wikidata_records); const lookup = self.lookupEdgarFallback(symbol, opts); defer freeEdgarLookup(self.allocator, lookup); if (lookup == .none) return error.NotFound; // For ETF/fund hits, try to get the richer series_name from // NPORT-P. Cache hit is cheap; cache miss triggers an EDGAR // fetch but is bounded by EDGAR's rate limiter. If the call // fails (e.g. money-market funds with no NPORT-P), we fall // back to the ticker-map title. var etf_metrics_result: ?FetchResult(Edgar.EtfMetricRecord) = null; defer if (etf_metrics_result) |*r| r.deinit(); etf_metrics_result = self.getEtfMetrics(symbol, opts) catch null; // Extract series_name and cik from the etf_metrics profile row. var series_name: ?[]const u8 = null; var etf_cik: ?[]const u8 = null; if (etf_metrics_result) |r| { for (r.data) |rec| switch (rec) { .profile => |p| { if (p.series_name) |sn| series_name = sn; etf_cik = p.cik; break; }, else => {}, }; } // Pull whatever Wikidata's sparse record carried so we // don't lose data on the merge. const wd: ?Wikidata.ClassificationRecord = if (wikidata_records.len > 0) wikidata_records[0] else null; // Pick the best name source: NPORT-P series_name > // EDGAR ticker-map title > Wikidata name > nothing. // // We're on the EDGAR-fallback path because Wikidata's // record was sparse. For funds, Wikidata's `name` (when // present) is frequently the underlying INDEX rather than // the FUND itself -- e.g. SOXX's Wikidata `name` is "PHLX // Semiconductor Sector" but the fund is "iShares // Semiconductor ETF" per NPORT-P seriesName. Prefer the // fund-authoritative source so downstream comments and // labels show the fund name, not the index name. const ticker_title: ?[]const u8 = switch (lookup) { .company_or_uit => |c| c.title, else => null, }; const best_name: ?[]const u8 = blk: { if (series_name) |n| break :blk n; if (ticker_title) |n| break :blk n; if (wd) |w| { if (w.name) |n| break :blk n; } break :blk null; }; // Name source for title-keyword inference: prefer the // most-authoritative source for fund-style classification // even when Wikidata supplied a (different) name. Wikidata's // name for a fund is often less informative than NPORT-P's // seriesName (e.g. SOXX's Wikidata name is "PHLX // Semiconductor Sector" which is the index name, not the // fund name). const inference_name: ?[]const u8 = series_name orelse ticker_title orelse if (wd) |w| w.name else null; const inferred_sector = classification.inferSectorFromTitle(inference_name); const inferred_geo = classification.inferGeoFromTitle(inference_name); // `is_etf` here means "this is fund-shaped, emit multi-row // breakdown" -- true for ANY EDGAR-found symbol. The // `tickers_funds.srf` map mixes mutual funds and // series-of-trust ETFs alike. The `tickers_companies.srf` // map carries operating companies, closed-end funds, and // UITs; operating companies usually have Wikidata coverage // and wouldn't reach this fallback, so anything that // dropped here is also fund-shaped (e.g. PIMCO closed-end // funds whose title says "FUND" but not "ETF" or "TRUST"). // // The ETF/TRUST keyword in the title still drives the // asset_class label below ("ETF" vs "Fund"), but the // fund-shaped routing decision applies regardless. const is_etf = true; const asset_class: []const u8 = switch (lookup) { .managed_fund => "Fund", .company_or_uit => |c| if (c.is_etf) "ETF" else "Fund", .none => unreachable, }; // Country: prefer Wikidata's. Default to "US" for // EDGAR-found symbols (they're SEC filers). const country_str: []const u8 = if (wd) |w| (w.country orelse "US") else "US"; // Sector: prefer Wikidata's existing sector (rare in this // sparse-fallback path), else fall back to inferred. const sector_str: ?[]const u8 = blk: { if (wd) |w| { if (w.sector) |sec| break :blk sec; } break :blk inferred_sector; }; // CIK: prefer Wikidata's, fall back to NPORT-P's. const cik_str: ?[]const u8 = blk: { if (wd) |w| { if (w.cik) |c| break :blk c; } if (etf_cik) |c| break :blk c; break :blk null; }; // Geo: prefer the Wikidata-derived geo (computed from // `geoFor(country)` against the country code), else use // title-keyword inference. Default to "US" when neither // is available -- EDGAR-found symbols are SEC filers. const geo_str: []const u8 = blk: { if (wd) |w| { if (w.country) |c| { const g = classification.geoFor(c); if (!std.mem.eql(u8, g, classification.geo.unknown)) break :blk g; } } if (inferred_geo) |g| break :blk g; break :blk classification.geo.us; }; const today = fmt.todayDate(self.io); var as_of_buf: [10]u8 = undefined; const as_of_str = try std.fmt.bufPrint(&as_of_buf, "{f}", .{today}); // Allocate each owned field up front with its own errdefer // so a partial-build on OOM doesn't leak the earlier // successful dupes. Once all dupes succeed we assemble the // record (no fallible ops below this point). const symbol_owned = try self.allocator.dupe(u8, symbol); errdefer self.allocator.free(symbol_owned); const name_owned: ?[]const u8 = if (best_name) |n| try self.allocator.dupe(u8, n) else null; errdefer if (name_owned) |s| self.allocator.free(s); const sector_owned: ?[]const u8 = if (sector_str) |s| try self.allocator.dupe(u8, s) else null; errdefer if (sector_owned) |s| self.allocator.free(s); const industry_owned: ?[]const u8 = if (wd) |w| (if (w.industry) |i| try self.allocator.dupe(u8, i) else null) else null; errdefer if (industry_owned) |s| self.allocator.free(s); const country_owned = try self.allocator.dupe(u8, country_str); errdefer self.allocator.free(country_owned); const geo_owned = try self.allocator.dupe(u8, geo_str); errdefer self.allocator.free(geo_owned); const asset_class_owned = try self.allocator.dupe(u8, asset_class); errdefer self.allocator.free(asset_class_owned); const inception_owned: ?[]const u8 = if (wd) |w| (if (w.inception_date) |i| try self.allocator.dupe(u8, i) else null) else null; errdefer if (inception_owned) |s| self.allocator.free(s); const cik_owned: ?[]const u8 = if (cik_str) |c| try self.allocator.dupe(u8, c) else null; errdefer if (cik_owned) |s| self.allocator.free(s); const as_of_owned = try self.allocator.dupe(u8, as_of_str); errdefer self.allocator.free(as_of_owned); const source_owned = try self.allocator.dupe(u8, "edgar_fallback"); errdefer self.allocator.free(source_owned); const result = try self.allocator.alloc(Wikidata.ClassificationRecord, 1); result[0] = .{ .symbol = symbol_owned, .name = name_owned, .sector = sector_owned, .industry = industry_owned, .country = country_owned, .geo = geo_owned, .asset_class = asset_class_owned, .is_etf = is_etf, .inception_date = inception_owned, .cik = cik_owned, .as_of = as_of_owned, .source = source_owned, }; return result; } /// Fetch XBRL-derived entity facts for a CIK (currently /// shares-outstanding; extensible to revenue / net income / EPS /// as new variants are added to `Edgar.EntityFactRecord`). /// /// CIK is the cache key — the file lives at /// `//entity_facts.srf`. A single dual-class /// issuer (BRK.A / BRK.B) shares one entity_facts file because /// both class symbols resolve to the same CIK. pub fn getEntityFacts(self: *DataService, cik: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EntityFactRecord) { var s = self.store(); if (!opts.force_refresh) { if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| { log.debug("CIK {s}: entity_facts fresh in local cache", .{cik}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } } if (opts.skip_network) { if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .any)) |cached| { log.info("CIK {s}: entity_facts stale-cached returned (skip_network)", .{cik}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } return DataError.FetchFailed; } if (!opts.force_refresh and self.syncFromServer(cik, .entity_facts)) { if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| { log.debug("CIK {s}: entity_facts synced from server", .{cik}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; } } log.debug("CIK {s}: fetching entity facts from EDGAR", .{cik}); self.assertNetworkAllowed("getEntityFacts edgar.fetchSharesOutstanding"); var edgar = try self.getProvider(Edgar); const so_opt = edgar.fetchSharesOutstanding(self.allocator, cik) catch |err| { log.warn("CIK {s}: shares fetch failed: {s}", .{ cik, @errorName(err) }); return DataError.FetchFailed; }; if (so_opt) |so_in| { var so = so_in; defer so.deinit(self.allocator); const today = fmt.todayDate(self.io); var as_of_buf: [10]u8 = undefined; // [10]u8 always fits "YYYY-MM-DD" (10 chars exactly). const as_of = std.fmt.bufPrint(&as_of_buf, "{f}", .{today}) catch @panic("getEntityFacts: 10-byte buffer cannot hold YYYY-MM-DD — unreachable"); const form_dup: ?[]u8 = if (so.form.len > 0) try self.allocator.dupe(u8, so.form) else null; const shares_record = Edgar.SharesRecord{ .symbol = try self.allocator.dupe(u8, ""), .shares_outstanding = so.value, .period_end = try self.allocator.dupe(u8, so.period_end), .form = form_dup, .cik = try self.allocator.dupe(u8, cik), .as_of = try self.allocator.dupe(u8, as_of), .source = "edgar_xbrl", }; const records = try self.allocator.alloc(Edgar.EntityFactRecord, 1); records[0] = .{ .shares_outstanding = shares_record }; s.write(Edgar.EntityFactRecord, cik, records, cache.DataType.entity_facts.ttl()); return .{ .data = records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } // No shares-outstanding data for this CIK (e.g. 20-F-only // filers like BP, XBRL-light filers like META). Negative- // cache so we don't keep retrying. s.writeNegative(cik, .entity_facts); return DataError.NotFound; } /// Fetch ETF metrics (NPORT-P profile + sectors + holdings) for /// a fund symbol. Cache-first via `/etf_metrics.srf`. /// /// On cache miss, looks up the symbol in the EDGAR ticker maps /// (fetched on demand via `getTickerMap*`), then runs the full /// `Edgar.fetchEtfMetrics` cascade. pub fn getEtfMetrics(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EtfMetricRecord) { var s = self.store(); if (!opts.force_refresh) { if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| { log.debug("{s}: etf_metrics fresh in local cache", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator, }; } } if (opts.skip_network) { if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .any)) |cached| { log.info("{s}: etf_metrics stale-cached returned (skip_network)", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator, }; } return DataError.FetchFailed; } if (!opts.force_refresh and self.syncFromServer(symbol, .etf_metrics)) { if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| { log.debug("{s}: etf_metrics synced from server", .{symbol}); return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator, }; } } log.debug("{s}: fetching ETF metrics from EDGAR", .{symbol}); self.assertNetworkAllowed("getEtfMetrics edgar.fetchEtfMetrics"); // Load the ticker maps. These are big (3-5 MB each) but the // load happens once per CLI invocation and the parsed // TickerMap stays alive across all getEtfMetrics calls in // the same process. var mf_map = self.loadMutualFundTickerMap(opts) catch |err| { log.warn("failed to load mutual-fund ticker map: {s}", .{@errorName(err)}); return DataError.FetchFailed; }; defer mf_map.deinit(); var co_map = self.loadCompanyTickerMap(opts) catch |err| { log.warn("failed to load company ticker map: {s}", .{@errorName(err)}); return DataError.FetchFailed; }; defer co_map.deinit(); var edgar = try self.getProvider(Edgar); const result = edgar.fetchEtfMetrics( self.io, self.allocator, &mf_map, &co_map, symbol, 20, ) catch |err| { log.warn("{s}: etf_metrics fetch failed: {s}", .{ symbol, @errorName(err) }); return DataError.FetchFailed; }; switch (result) { .full => |m_in| { var m = m_in; defer m.deinit(self.allocator); var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty; errdefer { for (records.items) |*r| r.deinit(self.allocator); records.deinit(self.allocator); } try Edgar.appendEtfMetricRecords(self.allocator, &records, m); const owned = try records.toOwnedSlice(self.allocator); s.write(Edgar.EtfMetricRecord, symbol, owned, cache.DataType.etf_metrics.ttl()); return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; }, .profile_only => |m_in| { var m = m_in; defer m.deinit(self.allocator); var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty; errdefer { for (records.items) |*r| r.deinit(self.allocator); records.deinit(self.allocator); } try Edgar.appendEtfMetricRecords(self.allocator, &records, m); const owned = try records.toOwnedSlice(self.allocator); s.write(Edgar.EtfMetricRecord, symbol, owned, cache.DataType.etf_metrics.ttl()); return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; }, .not_a_fund => { // Not a fund — write a negative entry to suppress // retries. The user can ask `getEntityFacts(cik)` // separately for stock-level facts. s.writeNegative(symbol, .etf_metrics); return DataError.NotFound; }, .not_in_edgar => { // Symbol isn't in either ticker map. No EDGAR data // available; negative-cache. s.writeNegative(symbol, .etf_metrics); return DataError.NotFound; }, } } /// Load the EDGAR mutual-fund ticker map. Reads `[]MutualFundTickerEntry` /// from cache when fresh; otherwise fetches via the provider /// and writes the parsed slice to cache. The returned /// `TickerMap` takes ownership of the entries; caller frees via /// a single `mf_map.deinit()`. /// /// Heavy: ~28k entries. Cheap on cache hit (fast SRF read); /// expensive on miss (one HTTP round-trip + JSON parse). /// Exposed publicly so commands like `enrich` can use the /// ticker map as a fallback classifier when Wikidata returns /// no rows for a symbol. pub fn loadMutualFundTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.MutualFundTickerEntry) { var s = self.store(); if (!opts.force_refresh) { if (s.read(self.allocator, Edgar.MutualFundTickerEntry, "_edgar", null, .fresh_only)) |cached| { if (cached.data.len > 0) { return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, cached.data); } Edgar.MutualFundTickerEntry.freeSlice(self.allocator, cached.data); } } log.debug("fetching EDGAR mutual-fund ticker map", .{}); self.assertNetworkAllowed("loadMutualFundTickerMap edgar.fetchMutualFundTickerMap"); var edgar = try self.getProvider(Edgar); // Fetch + parse via the provider (correct UA + From + Accept // + rate-limit token), cache the parsed slice, then build // the lookup map (which takes ownership of the slice). const entries = try edgar.fetchMutualFundTickerMap(self.allocator); s.write(Edgar.MutualFundTickerEntry, "_edgar", entries, cache.DataType.tickers_funds.ttl()); return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, entries); } /// Load the EDGAR company ticker map (stocks + UITs). Same shape /// as `loadMutualFundTickerMap` for the `CompanyTickerEntry` /// type. See that function's doc-comment for cost / use-case /// guidance. pub fn loadCompanyTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.CompanyTickerEntry) { var s = self.store(); if (!opts.force_refresh) { if (s.read(self.allocator, Edgar.CompanyTickerEntry, "_edgar", null, .fresh_only)) |cached| { if (cached.data.len > 0) { return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, cached.data); } Edgar.CompanyTickerEntry.freeSlice(self.allocator, cached.data); } } log.debug("fetching EDGAR company ticker map", .{}); self.assertNetworkAllowed("loadCompanyTickerMap edgar.fetchCompanyTickerMap"); var edgar = try self.getProvider(Edgar); const entries = try edgar.fetchCompanyTickerMap(self.allocator); s.write(Edgar.CompanyTickerEntry, "_edgar", entries, cache.DataType.tickers_companies.ttl()); return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, entries); } /// Look up a symbol in the EDGAR ticker maps. Used by the /// `enrich` command as a fallback classifier when Wikidata /// returns no rows for the symbol. Loads both maps (cache or /// network), runs the lookup, frees the maps, returns the /// digested `EdgarLookup` union. /// /// Commands consume the union directly — they never see /// `TickerMap` / `MutualFundTickerEntry` / `CompanyTickerEntry` /// shapes. Provider details stay inside the service layer. /// /// Caller owns the `title` string when the result is /// `.company_or_uit{ .title = non-null }`. Free with the /// allocator passed to this method (typically the same one /// the service was initialized with). pub fn lookupEdgarFallback( self: *DataService, sym: []const u8, opts: FetchOptions, ) EdgarLookup { var mf_opt: ?Edgar.TickerMap(Edgar.MutualFundTickerEntry) = self.loadMutualFundTickerMap(opts) catch null; defer if (mf_opt) |*m| m.deinit(); var co_opt: ?Edgar.TickerMap(Edgar.CompanyTickerEntry) = self.loadCompanyTickerMap(opts) catch null; defer if (co_opt) |*m| m.deinit(); return lookupInTickerMaps( self.allocator, sym, if (mf_opt) |*m| m else null, if (co_opt) |*m| m else null, ); } // ────────────────────────────────────────────────────────────── /// Fetch a real-time quote for a symbol. /// Yahoo Finance is primary (free, no API key, no 15-min delay). /// Falls back to TwelveData if Yahoo fails. /// /// Quotes are never cached, so `opts.force_refresh` is a no-op /// (every call goes to the provider). `opts.skip_network = true` /// returns FetchFailed unconditionally — there's no cached price /// to fall back to. pub fn getQuote(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!Quote { if (opts.skip_network) { log.debug("{s}: skip_network — quote unavailable (never cached)", .{symbol}); return DataError.FetchFailed; } self.assertNetworkAllowed("getQuote"); // Primary: Yahoo Finance (free, real-time) if (self.getProvider(Yahoo)) |yh| { if (yh.fetchQuote(self.allocator, symbol)) |quote| { log.debug("{s}: quote from Yahoo", .{symbol}); return quote; } else |_| {} } else |_| {} // Fallback: TwelveData (requires API key, may be 15-min delayed) var td = try self.getProvider(TwelveData); log.debug("{s}: quote fallback to TwelveData", .{symbol}); return td.fetchQuote(self.allocator, symbol) catch return DataError.FetchFailed; } /// Compute trailing returns for a symbol (fetches candles + dividends). /// Returns both as-of-date and month-end trailing returns. /// As-of-date: end = latest close. Matches Morningstar "Trailing Returns" page. /// Month-end: end = last business day of prior month. Matches Morningstar "Performance" page. /// Compute trailing returns for a symbol (fetches candles + dividends + splits). /// Returns both as-of-date and month-end trailing returns. /// As-of-date: end = latest close. Matches Morningstar "Trailing Returns" page. /// Month-end: end = last business day of prior month. Matches Morningstar "Performance" page. /// /// `*_price` columns are split-adjusted, NOT dividend-adjusted (matches the /// "price return" numbers public sources like Yahoo's chart-bar / FMP / Barchart /// publish). `*_total` columns include dividend reinvestment (matches Morningstar /// "Trailing Returns" / Yahoo "Performance Overview" / Koyfin "Total Return"). /// See `tmp/multi-ticker-audit.md` for the cross-validation evidence. pub fn getTrailingReturns(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!struct { asof_price: performance.TrailingReturns, asof_total: ?performance.TrailingReturns, me_price: performance.TrailingReturns, me_total: ?performance.TrailingReturns, candles: []Candle, dividends: ?[]Dividend, source: Source, timestamp: i64, } { const candle_result = try self.getCandles(symbol, opts); const c = candle_result.data; if (c.len == 0) return DataError.FetchFailed; const today = fmt.todayDate(self.io); // Splits: needed to make raw `close` ratios meaningful across // split boundaries (e.g. NVDA 10:1 on 2024-06-10). If the // splits fetch fails, fall back to a no-splits empty slice — // the price-return calculation will still be correct for // tickers with no splits in the window (i.e. most of them). var splits_buf: ?FetchResult(Split) = null; defer if (splits_buf) |sb| sb.deinit(); const splits: []const Split = if (self.getSplits(symbol, opts)) |sr| blk: { splits_buf = sr; break :blk sr.data; } else |_| &.{}; // As-of-date (end = last candle) const asof_price = performance.trailingReturnsPriceOnly(c, splits); // Month-end (end = last business day of prior month) const me_price = performance.trailingReturnsPriceOnlyMonthEnd(c, splits, today); // Total return: dividend-reinvested when dividends are // available; otherwise fall back to adj_close-based total // return (which captures dividends for providers like Tiingo // that bake dividends into adj_close). var divs: ?[]Dividend = null; var asof_total: ?performance.TrailingReturns = null; var me_total: ?performance.TrailingReturns = null; // adj_close-based total return is the fallback. With Tiingo // (the default provider) adj_close is already dividend- // adjusted, so this gives a reasonable total-return estimate // even when explicit dividend records are missing. const asof_adj = performance.trailingReturns(c); const me_adj = performance.trailingReturnsMonthEnd(c, today); if (self.getDividends(symbol, opts)) |div_result| { divs = div_result.data; const asof_div = performance.trailingReturnsWithDividends(c, div_result.data); const me_div = performance.trailingReturnsMonthEndWithDividends(c, div_result.data, today); asof_total = performance.withDividendFallback(asof_div, asof_adj); me_total = performance.withDividendFallback(me_div, me_adj); } else |_| { // No dividend data: still surface the adj_close-based // total return rather than null, since Tiingo's // adj_close already includes dividend adjustment. asof_total = asof_adj; me_total = me_adj; } return .{ .asof_price = asof_price, .asof_total = asof_total, .me_price = me_price, .me_total = me_total, .candles = c, .dividends = divs, .source = candle_result.source, .timestamp = candle_result.timestamp, }; } /// Check if candle data is fresh in cache without full deserialization. pub fn isCandleCacheFresh(self: *DataService, symbol: []const u8) bool { var s = self.store(); return s.isCandleMetaFresh(symbol); } /// Read only the latest close price from cached candles (no full deserialization). /// Returns null if no cached data exists. pub fn getCachedLastClose(self: *DataService, symbol: []const u8) ?f64 { var s = self.store(); return s.readLastClose(symbol); } /// Read the latest cached candle date for `symbol` without deserializing /// the full candle history. Returns null if no cached metadata exists. /// /// Callers should pair this with `isCandleCacheFresh` before trusting /// the date: a stale cache entry can return a date from days or weeks /// ago, which is fine for diagnostics but wrong for anything that /// needs "the current market date". pub fn getCachedLastDate(self: *DataService, symbol: []const u8) ?Date { var s = self.store(); const mr = s.readCandleMeta(symbol) orelse return null; return mr.meta.last_date; } /// Estimate wait time (in seconds) before a fetch for `data_type` /// can proceed without blocking on its provider's rate limiter. /// Returns 0 if a request can be made immediately, or if the /// provider for this data type has no rate limiter. Returns null /// if the relevant provider isn't instantiated yet (e.g., no API /// key, or first call hasn't happened to lazy-init it). /// /// The caller asks "how long until getX can proceed?" -- the /// service maps data type to provider internally so the caller /// doesn't have to know which provider serves which data. pub fn estimateWaitSeconds(self: *DataService, data_type: cache.DataType) ?u64 { const ns: u64 = switch (data_type) { // Polygon-served: dividends and splits. .dividends, .splits => if (self.pg) |*pg| pg.rate_limiter.estimateWaitNs() else return null, // FMP-served: earnings. .earnings => if (self.fmp) |*fmp| fmp.rate_limiter.estimateWaitNs() else return null, // Cboe-served: options chains. .options => if (self.cboe) |*cboe| cboe.rate_limiter.estimateWaitNs() else return null, // EDGAR-served: ETF metrics, entity facts, ticker maps. .etf_metrics, .entity_facts, .tickers_funds, .tickers_companies => if (self.edgar) |*e| e.rate_limiter.estimateWaitNs() else return null, // Tiingo-served candles: 50/hour token bucket. When Tiingo // isn't instantiated (no key), candles fall back to keyless // Yahoo with no proactive limiter, so report 0 rather than // null. `candles_meta` shares Tiingo's budget; `meta` isn't // fetched; Wikidata (classification) has no published quota. .candles_daily, .candles_meta => if (self.tg) |*tg| tg.rate_limiter.estimateWaitNs() else 0, .classification, .meta => 0, }; return if (ns == 0) 0 else @max(1, ns / std.time.ns_per_s); } /// Read candles from cache only (no network fetch). Used by TUI for display. /// Returns null if no cached data exists or if the entry is a negative cache (fetch_failed). /// /// `allocator` owns the returned `FetchResult.data`. Pass an /// arena for "lives until reload" use cases (TUI per-portfolio /// data); pass a per-call arena for CLI batch commands. pub fn getCachedCandles(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(Candle) { var s = self.store(); if (s.isNegative(symbol, .candles_daily)) return null; const result = s.read(allocator, Candle, symbol, null, .any) orelse return null; return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator }; } /// Read dividends from cache only (no network fetch). See /// `getCachedCandles` for the allocator contract. pub fn getCachedDividends(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(Dividend) { var s = self.store(); const result = s.read(allocator, Dividend, symbol, null, .any) orelse return null; return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator }; } /// Read earnings from cache only (no network fetch). See /// `getCachedCandles` for the allocator contract. pub fn getCachedEarnings(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(EarningsEvent) { var s = self.store(); const result = s.read(allocator, EarningsEvent, symbol, earningsPostProcess, .any) orelse return null; return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator }; } /// Read options from cache only (no network fetch). See /// `getCachedCandles` for the allocator contract. pub fn getCachedOptions(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(OptionsChain) { var s = self.store(); const result = s.read(allocator, OptionsChain, symbol, null, .any) orelse return null; return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator }; } // ── Portfolio price loading ────────────────────────────────── /// Status emitted for each symbol during price loading. pub const SymbolStatus = enum { /// Price resolved from fresh cache. cached, /// About to attempt an API fetch (emitted before the network call). fetching, /// Price fetched successfully from API. fetched, /// API fetch failed but stale cached price was used. failed_used_stale, /// API fetch failed and no cached price exists. failed, }; /// Callback for progress reporting during price loading. /// `context` is an opaque pointer to caller-owned state. pub const ProgressCallback = struct { context: *anyopaque, on_progress: *const fn (ctx: *anyopaque, index: usize, total: usize, symbol: []const u8, status: SymbolStatus) void, fn emit(self: ProgressCallback, index: usize, total: usize, symbol: []const u8, status: SymbolStatus) void { self.on_progress(self.context, index, total, symbol, status); } }; // ── Consolidated Price Loading (Parallel Server + Sequential Provider) ── /// Configuration for loadAllPrices. pub const LoadAllConfig = struct { force_refresh: bool = false, /// Skip provider fetches and server sync. Returns cached /// data (even if stale) and treats cache miss as failure. /// Drives `--refresh-data=never`. skip_network: bool = false, color: bool = true, /// Map this config to the per-call `FetchOptions` shape. /// Convenience for paths that need to pass through to /// `getCandles`/`getDividends`/etc. pub fn fetchOptions(self: LoadAllConfig) FetchOptions { return .{ .skip_network = self.skip_network, .force_refresh = self.force_refresh }; } }; /// Result of loadAllPrices operation. pub const LoadAllResult = struct { prices: std.StringHashMap(f64), /// Number of symbols resolved from fresh local cache. cached_count: usize, /// Number of symbols synced from server. server_synced_count: usize, /// Number of symbols fetched from providers (rate-limited APIs). provider_fetched_count: usize, /// Number of symbols that failed all sources but used stale cache. stale_count: usize, /// Number of symbols that failed completely (no data). failed_count: usize, /// Latest candle date seen. latest_date: ?Date, /// Free the prices hashmap. Call this if you don't transfer ownership. pub fn deinit(self: *LoadAllResult) void { self.prices.deinit(); } }; /// Progress callback for aggregate (parallel) progress reporting. /// Called periodically during parallel operations with current counts. pub const AggregateProgressCallback = struct { context: *anyopaque, on_progress: *const fn (ctx: *anyopaque, completed: usize, total: usize, phase: Phase) void, pub const Phase = enum { /// Checking local cache cache_check, /// Syncing from ZFIN_SERVER server_sync, /// Fetching from rate-limited providers provider_fetch, /// Done complete, }; fn emit(self: AggregateProgressCallback, completed: usize, total: usize, phase: Phase) void { self.on_progress(self.context, completed, total, phase); } }; /// Thread-safe counter for parallel progress tracking. const AtomicCounter = struct { value: std.atomic.Value(usize) = std.atomic.Value(usize).init(0), fn increment(self: *AtomicCounter) usize { return self.value.fetchAdd(1, .monotonic); } fn load(self: *const AtomicCounter) usize { return self.value.load(.monotonic); } }; /// Per-symbol result from parallel server sync. const ServerSyncResult = struct { symbol: []const u8, success: bool, }; /// Load prices for portfolio and watchlist symbols with automatic parallelization. /// /// When ZFIN_SERVER is configured: /// 1. Check local cache (fast, parallel-safe) /// 2. Parallel sync from server for cache misses /// 3. Sequential provider fallback for server failures /// /// When ZFIN_SERVER is not configured: /// Falls back to sequential loading with per-symbol progress. /// /// Progress is reported via `aggregate_progress` for parallel phases /// and `symbol_progress` for sequential provider fallback. pub fn loadAllPrices( self: *DataService, portfolio_syms: ?[]const []const u8, watch_syms: []const []const u8, config: LoadAllConfig, aggregate_progress: ?AggregateProgressCallback, symbol_progress: ?ProgressCallback, ) LoadAllResult { var result = LoadAllResult{ .prices = std.StringHashMap(f64).init(self.allocator), .cached_count = 0, .server_synced_count = 0, .provider_fetched_count = 0, .stale_count = 0, .failed_count = 0, .latest_date = null, }; // Combine all symbols const portfolio_count = if (portfolio_syms) |ps| ps.len else 0; const watch_count = watch_syms.len; const total_count = portfolio_count + watch_count; if (total_count == 0) return result; // Build combined symbol list var all_symbols = std.ArrayList([]const u8).initCapacity(self.allocator, total_count) catch return result; defer all_symbols.deinit(self.allocator); if (portfolio_syms) |ps| { for (ps) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append portfolio sym({s}): {t}", .{ sym, err }); } for (watch_syms) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append watch sym({s}): {t}", .{ sym, err }); // force_refresh does NOT wipe the candle cache. It flows // through to getCandles (via config.fetchOptions()), which // ignores the TTL and does an incremental top-up — see the // `--refresh-data=force` contract. The Phase-1 fast path below // is skipped on force_refresh so every symbol is re-validated // against the provider. A full wipe + re-download from scratch // is reserved for `cache clear`. // Phase 1: Check local cache (fast path) var needs_fetch: std.ArrayList([]const u8) = .empty; defer needs_fetch.deinit(self.allocator); if (aggregate_progress) |p| p.emit(0, total_count, .cache_check); for (all_symbols.items) |sym| { if (!config.force_refresh and self.isCandleCacheFresh(sym)) { if (self.getCachedLastClose(sym)) |close| { result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err }); self.updateLatestDate(&result, sym); } result.cached_count += 1; } else { needs_fetch.append(self.allocator, sym) catch |err| log.warn("loadAllPrices needs_fetch append({s}): {t}", .{ sym, err }); } } if (aggregate_progress) |p| p.emit(result.cached_count, total_count, .cache_check); if (needs_fetch.items.len == 0) { if (aggregate_progress) |p| p.emit(total_count, total_count, .complete); return result; } // Offline mode: skip server sync and provider fetch entirely. // For symbols without a fresh cache, fall back to stale cache // before giving up. if (config.skip_network) { for (needs_fetch.items) |sym| { if (self.getCachedLastClose(sym)) |close| { result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err }); self.updateLatestDate(&result, sym); result.stale_count += 1; } else { result.failed_count += 1; } } if (aggregate_progress) |p| p.emit(total_count, total_count, .complete); return result; } // Phase 2: Server sync (parallel if server configured) var server_failures: std.ArrayList([]const u8) = .empty; defer server_failures.deinit(self.allocator); if (self.config.server_url != null) { self.parallelServerSync( needs_fetch.items, &result, &server_failures, aggregate_progress, total_count, ); } else { // No server — all need provider fetch for (needs_fetch.items) |sym| { server_failures.append(self.allocator, sym) catch |err| log.warn("loadAllPrices server_failures append({s}): {t}", .{ sym, err }); } } // Phase 3: Sequential provider fallback for server failures if (server_failures.items.len > 0) { if (aggregate_progress) |p| p.emit( result.cached_count + result.server_synced_count, total_count, .provider_fetch, ); self.sequentialProviderFetch( server_failures.items, &result, symbol_progress, total_count - server_failures.items.len, // offset for progress display config.fetchOptions(), ); } if (aggregate_progress) |p| p.emit(total_count, total_count, .complete); return result; } /// Fetch live intraday quotes for `symbols` in parallel, returning /// a map of symbol → live last price. Symbols whose quote fetch /// fails (or that the provider can't price) are simply absent; the /// caller falls back to the last cached close. /// /// This is a pure live-price fetch: quotes are never cached, so it /// neither reads nor writes the candle cache. It exists for the /// TUI refresh key (`r`), whose job is "give me current prices," /// distinct from candle-history maintenance (TTL/startup) and from /// `--refresh-data=force` (incremental candle top-up). /// /// Unlike `getQuote` (single-symbol, Yahoo→TwelveData fallback), /// this is Yahoo-only: Yahoo is keyless with no shared rate /// limiter, so each worker can safely own its HTTP client. /// TwelveData's shared rate limiter makes it unsafe to fan out, and /// its fallback role isn't worth the complexity for a bulk refresh. /// /// Concurrency mirrors `parallelServerSync`: one task per symbol in /// a single `std.Io.Group`, each with its own `Yahoo` client (a /// shared `std.http.Client` is not safe across threads — see /// `tryOneSync`). Relies on a thread-safe `allocator`/`io`, the /// same assumption the server-sync fan-out already makes. /// /// The returned map's keys borrow `symbols`: keep `symbols` alive /// while using the map, and `deinit()` the map when done. pub fn loadLiveQuotes(self: *DataService, symbols: []const []const u8) std.StringHashMap(f64) { var prices = std.StringHashMap(f64).init(self.allocator); if (symbols.len == 0) return prices; self.assertNetworkAllowed("loadLiveQuotes"); const QuoteSlot = struct { symbol: []const u8, price: ?f64 = null }; const slots = self.allocator.alloc(QuoteSlot, symbols.len) catch return prices; defer self.allocator.free(slots); for (slots, 0..) |*slot, i| slot.* = .{ .symbol = symbols[i] }; const worker = struct { fn run(io: std.Io, allocator: std.mem.Allocator, slot: *QuoteSlot) std.Io.Cancelable!void { try io.checkCancel(); var yh = Yahoo.init(io, allocator); defer yh.deinit(); // Quote borrows `symbol` and carries no owned memory, // so the f64 close is all we keep — nothing to free. slot.price = if (yh.fetchQuote(allocator, slot.symbol)) |q| q.close else |_| null; } }; var group: std.Io.Group = .init; for (slots) |*slot| group.async(self.io, worker.run, .{ self.io, self.allocator, slot }); group.await(self.io) catch |err| log.debug("loadLiveQuotes group await: {t}", .{err}); for (slots) |slot| { if (slot.price) |p| prices.put(slot.symbol, p) catch |err| log.warn("loadLiveQuotes put({s}): {t}", .{ slot.symbol, err }); } return prices; } /// Parallel server sync via `std.Io.Group`. /// /// Concurrency shape: one task per symbol, spawned into a /// single `Group`. The `std.Io` implementation owns /// scheduling and concurrency limits (e.g. `Io.Threaded` /// sizes its pool from CPU count); we don't second-guess it /// with our own worker cap or work-stealing queue. /// /// Each task hits `io.checkCancel()` before its sync, so a /// cancelation request propagating through `Group.await` /// stops pending work at task granularity. fn parallelServerSync( self: *DataService, symbols: []const []const u8, result: *LoadAllResult, failures: *std.ArrayList([]const u8), aggregate_progress: ?AggregateProgressCallback, total_count: usize, ) void { if (aggregate_progress) |p| p.emit(result.cached_count, total_count, .server_sync); // Shared state for tasks var completed = AtomicCounter{}; const sync_results = self.allocator.alloc(ServerSyncResult, symbols.len) catch { // Allocation failed — fall back to marking all as failures for (symbols) |sym| failures.append(self.allocator, sym) catch |err| log.warn("parallelServerSync slots-alloc-fallback failures append({s}): {t}", .{ sym, err }); return; }; defer self.allocator.free(sync_results); // Initialize results for (sync_results, 0..) |*sr, i| { sr.* = .{ .symbol = symbols[i], .success = false }; } const worker = struct { fn run(io: std.Io, svc: *DataService, slot: *ServerSyncResult, done: *AtomicCounter) std.Io.Cancelable!void { defer _ = done.increment(); try io.checkCancel(); slot.success = svc.syncCandlesFromServer(slot.symbol); } }; // Spawn one task per symbol. Group.async requires an // eventual Group.await/cancel to release resources; the // single await below covers all paths. var group: std.Io.Group = .init; for (sync_results) |*sr| { group.async(self.io, worker.run, .{ self.io, self, sr, &completed }); } // Progress reporting while the group runs if (aggregate_progress) |p| { while (completed.load() < symbols.len) { std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(50), .awake) catch |err| { log.debug("parallelServerSync progress-poll sleep interrupted: {t}", .{err}); break; }; p.emit(result.cached_count + completed.load(), total_count, .server_sync); } } // Wait for all tasks. On cancelation the unstarted tasks // exit at their checkCancel point; partial results (slots // that completed) are still processed below — they came // from successful cache writes. group.await(self.io) catch |err| { log.debug("parallelServerSync group await: {t}", .{err}); }; // Process results for (sync_results) |sr| { if (sr.success) { // Server sync succeeded — read from cache if (self.getCachedLastClose(sr.symbol)) |close| { result.prices.put(sr.symbol, close) catch |err| log.warn("syncFromServer cache-after-sync put({s}): {t}", .{ sr.symbol, err }); self.updateLatestDate(result, sr.symbol); result.server_synced_count += 1; } else { // Sync said success but can't read cache — treat as failure failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer success-but-no-cache failures append({s}): {t}", .{ sr.symbol, err }); } } else { failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer fail-result failures append({s}): {t}", .{ sr.symbol, err }); } } } /// Sequential provider fetch for symbols that failed server sync. fn sequentialProviderFetch( self: *DataService, symbols: []const []const u8, result: *LoadAllResult, progress: ?ProgressCallback, index_offset: usize, opts: FetchOptions, ) void { const total = index_offset + symbols.len; for (symbols, 0..) |sym, i| { const display_idx = index_offset + i; // Notify: about to fetch if (progress) |p| p.emit(display_idx, total, sym, .fetching); // Try provider fetch if (self.getCandles(sym, opts)) |candle_result| { defer self.allocator.free(candle_result.data); if (candle_result.data.len > 0) { const last = candle_result.data[candle_result.data.len - 1]; result.prices.put(sym, last.close) catch |err| log.warn("loadAllPrices candle-close put({s}): {t}", .{ sym, err }); if (result.latest_date == null or last.date.days > result.latest_date.?.days) { result.latest_date = last.date; } } result.provider_fetched_count += 1; if (progress) |p| p.emit(display_idx, total, sym, .fetched); continue; } else |_| {} // Provider failed — try stale cache result.failed_count += 1; if (self.getCachedLastClose(sym)) |close| { result.prices.put(sym, close) catch |err| log.warn("loadAllPrices stale-fallback put({s}): {t}", .{ sym, err }); result.stale_count += 1; if (progress) |p| p.emit(display_idx, total, sym, .failed_used_stale); } else { if (progress) |p| p.emit(display_idx, total, sym, .failed); } } } /// Update latest_date in result from cached candle metadata. fn updateLatestDate(self: *DataService, result: *LoadAllResult, symbol: []const u8) void { var s = self.store(); if (s.readCandleMeta(symbol)) |cm| { const d = cm.meta.last_date; if (result.latest_date == null or d.days > result.latest_date.?.days) { result.latest_date = d; } } } // ── CUSIP Resolution ────────────────────────────────────────── /// Look up multiple CUSIPs in a single batch request via OpenFIGI. /// Results array is parallel to the input cusips array (same length, same order). /// Caller owns the returned slice and all strings within each CusipResult. pub fn lookupCusips(self: *DataService, cusips: []const []const u8) DataError![]CusipResult { return OpenFigi.lookupCusips(self.io, self.allocator, cusips, self.config.openfigi_key) catch return DataError.FetchFailed; } /// A single CUSIP-to-ticker mapping record in the cache file. const CusipEntry = struct { cusip: []const u8 = "", ticker: []const u8 = "", }; /// CUSIP->ticker lookup table loaded from `cusip_tickers.srf`. /// /// Zero-copy: keys and values are slices into `backing` (the raw /// file bytes parsed with `parse_allocator = .none`). Nothing is /// duped per entry — the whole-file buffer IS the storage, and it /// stays alive for the table's lifetime, released together with /// the map table in `deinit`. /// /// This is the L1 tier of CUSIP resolution: callers consult it /// before reaching for the server or OpenFIGI. pub const CusipTickerMap = struct { map: std.StringHashMap([]const u8), /// Raw bytes of `cusip_tickers.srf`; every map key and value /// points into this buffer. `&.{}` when the file was missing /// or unreadable (freeing a zero-length slice is a no-op). backing: []const u8, pub fn get(self: CusipTickerMap, cusip: []const u8) ?[]const u8 { return self.map.get(cusip); } pub fn contains(self: CusipTickerMap, cusip: []const u8) bool { return self.map.contains(cusip); } pub fn count(self: CusipTickerMap) u32 { return self.map.count(); } /// Release the map table and the backing buffer. Both were /// allocated with the map's allocator at load time, so we /// reuse it here — the two lifetimes are bound together by /// construction, which is the whole point of the wrapper. pub fn deinit(self: *CusipTickerMap) void { const allocator = self.map.allocator; self.map.deinit(); allocator.free(self.backing); } }; /// Load the CUSIP->ticker cache file into a `CusipTickerMap`. The /// returned table owns the file bytes; release it with /// `CusipTickerMap.deinit`. /// /// Missing file → empty table (the common first-run case). First /// occurrence wins on duplicate CUSIPs, which tolerates the /// historical double-append bug in cache files written before /// `cacheCusipTicker` learned to dedup. /// /// The on-disk format is CUSIP-keyed (`cusip::X,ticker::Y`); the /// returned map is keyed the same way for O(1) forward lookup. pub fn loadCusipTickerMap(self: *DataService, allocator: std.mem.Allocator) CusipTickerMap { const map = std.StringHashMap([]const u8).init(allocator); const path = std.fs.path.join(allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return .{ .map = map, .backing = &.{} }; defer allocator.free(path); const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, allocator, .limited(4 * 1024 * 1024)) catch return .{ .map = map, .backing = &.{} }; // From here `data` is the table's backing store: keys and // values are slices into it (parse_allocator = .none, so the // parser borrows rather than copies). Freed by // `CusipTickerMap.deinit`, never here — that's the lifetime // contract that lets us skip per-entry dupes entirely. var result: CusipTickerMap = .{ .map = map, .backing = data }; var reader = std.Io.Reader.fixed(data); var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return result; defer it.deinit(); while (it.next() catch return result) |fields| { const entry = fields.to(CusipEntry, .{}) catch continue; if (entry.cusip.len == 0 or entry.ticker.len == 0) continue; // First occurrence wins; getOrPut stores the borrowed // slices directly — they live in `backing`, no dupe. const gop = result.map.getOrPut(entry.cusip) catch continue; if (!gop.found_existing) gop.value_ptr.* = entry.ticker; } return result; } /// Append CUSIP->ticker mappings to `cusip_tickers.srf`, skipping /// any whose CUSIP is already on disk and any duplicates within /// `entries`. One read + one atomic write regardless of batch size. /// /// Read-append-atomic-write (rather than open-for-append) so a /// concurrent reader never sees a valid header plus a partial /// trailing record — see `cache/store.zig appendRaw` for the same /// pattern and rationale. `#!srfv1` directives are emitted only /// when the file is being created. fn appendCusipEntries(self: *DataService, entries: []const CusipEntry) void { if (entries.len == 0) return; // One load gives us both the dedup set and the existing bytes // to concat (`backing`). Missing/empty file → empty map + empty // backing → directives emitted below. var existing_map = self.loadCusipTickerMap(self.allocator); defer existing_map.deinit(); const existing = existing_map.backing; // Keep only entries new to the file and unique within the batch. var seen = std.StringHashMap(void).init(self.allocator); defer seen.deinit(); var to_write: std.ArrayList(CusipEntry) = .empty; defer to_write.deinit(self.allocator); for (entries) |e| { if (e.cusip.len == 0 or e.ticker.len == 0) continue; if (existing_map.contains(e.cusip)) continue; const gop = seen.getOrPut(e.cusip) catch continue; if (gop.found_existing) continue; to_write.append(self.allocator, e) catch continue; } if (to_write.items.len == 0) return; const path = std.fs.path.join(self.allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return; defer self.allocator.free(path); if (std.fs.path.dirnamePosix(path)) |dir| { std.Io.Dir.cwd().createDirPath(self.io, dir) catch |err| log.warn("cusip-cache createDirPath({s}): {t}", .{ dir, err }); } const emit_directives = existing.len == 0; var aw: std.Io.Writer.Allocating = .init(self.allocator); defer aw.deinit(); aw.writer.print("{f}", .{srf.fmt(CusipEntry, to_write.items, .{ .emit_directives = emit_directives })}) catch return; const encoded = aw.writer.buffered(); if (encoded.len == 0) return; // Concat existing + new, then atomic-write. const combined = self.allocator.alloc(u8, existing.len + encoded.len) catch return; defer self.allocator.free(combined); @memcpy(combined[0..existing.len], existing); @memcpy(combined[existing.len..], encoded); atomic.writeFileAtomic(self.io, self.allocator, path, combined) catch |err| log.warn("cusip-cache writeFileAtomic({s}): {t}", .{ path, err }); } /// Append a single CUSIP->ticker mapping to the cache file /// (dedup-aware). Thin wrapper over `appendCusipEntries`; the /// `lookup` command's single-CUSIP path. pub fn cacheCusipTicker(self: *DataService, cusip: []const u8, ticker: []const u8) void { self.appendCusipEntries(&.{.{ .cusip = cusip, .ticker = ticker }}); } /// Resolve a set of CUSIPs to tickers via the three-tier cascade, /// persisting newly-learned mappings to `cusip_tickers.srf` (union /// policy: the local file accumulates everything it ever learns and /// converges toward the shared server set). /// /// Tiers, cheapest first: /// L1 local `cusip_tickers.srf` (always; no network) /// L2 server `GET /cusips` whole-file sync (if ZFIN_SERVER set) /// L3 OpenFIGI batch lookup (whatever still misses) /// /// `skip_network = true` restricts resolution to L1 (the local /// cache) — for offline mode (`--refresh-data=never`). L2/L3 and /// the persist-back are skipped entirely; cached CUSIPs still /// resolve, uncached ones stay unresolved. /// /// Best-effort: network failures degrade to "fewer entries /// resolved" rather than erroring. The returned `CusipTickerMap` is /// a zero-copy view over the (possibly just-rewritten) local file /// and covers every CUSIP any tier could resolve. Callers resolve /// forward-per-holding: look up each holding's CUSIP against it, /// which sidesteps the "do I have every CUSIP for this ticker?" /// completeness problem entirely. /// /// Empty/duplicate CUSIPs in `cusips` are ignored. The caller owns /// the returned map (`deinit`); pass a scratch allocator to scope /// it to a single command invocation. pub fn resolveCusips(self: *DataService, allocator: std.mem.Allocator, cusips: []const []const u8, skip_network: bool) CusipTickerMap { var result = self.loadCusipTickerMap(allocator); // Offline mode serves only L1. Also the warm-cache fast path: // when nothing is missing there's no scratch, no network, no // rewrite. if (skip_network or !anyMissing(result, cusips)) return result; // Scratch arena for minted entries; decouples their lifetime // from the server body / OpenFIGI result buffers freed below. var scratch = std.heap.ArenaAllocator.init(self.allocator); defer scratch.deinit(); const sa = scratch.allocator(); var minted = std.StringHashMap([]const u8).init(sa); // cusip -> ticker // L2: server whole-file sync. Degrades to no-op until the // `GET /cusips` route exists (a 404 surfaces as NotFound from // client.get); when it lands it's purely additive — no change // here. The server is expected to serve the file via its // existing `handleStaticSrfFile` machinery (same shape as // `/_edgar/tickers_funds`). if (self.config.server_url) |server_url| { if (self.fetchServerCusips(server_url)) |body| { defer self.allocator.free(body); mergeCusipBody(sa, &minted, result, body); } } // L3: OpenFIGI for whatever still misses. self.mintMissingViaOpenFigi(sa, &minted, result, cusips); if (minted.count() == 0) return result; // nothing new learned // Persist the union, then reload so the returned map is a clean // single-buffer zero-copy view over the updated file. var ents: std.ArrayList(CusipEntry) = .empty; // Reserve up front so the collection loop is infallible. On OOM // (vanishingly unlikely for a small list), skip persistence and // return the L1 view — some CUSIPs stay unresolved this run // rather than erroring. ents.ensureTotalCapacity(sa, minted.count()) catch return result; var mit = minted.iterator(); while (mit.next()) |kv| ents.appendAssumeCapacity(.{ .cusip = kv.key_ptr.*, .ticker = kv.value_ptr.* }); self.appendCusipEntries(ents.items); result.deinit(); return self.loadCusipTickerMap(allocator); } /// True if any non-empty CUSIP in `cusips` is absent from `map`. fn anyMissing(map: CusipTickerMap, cusips: []const []const u8) bool { for (cusips) |c| { if (c.len == 0) continue; if (!map.contains(c)) return true; } return false; } /// Merge a CUSIP->ticker SRF body (as served by `GET /cusips`) into /// `out`, skipping any CUSIP already present in `have` or `out`. /// Strings are duped into `arena`. Pure with respect to I/O, so it's /// unit-tested directly with fixture bytes (the live L2 path can't /// be exercised until the server route exists). fn mergeCusipBody(arena: std.mem.Allocator, out: *std.StringHashMap([]const u8), have: CusipTickerMap, body: []const u8) void { var reader = std.Io.Reader.fixed(body); var it = srf.iterator(&reader, arena, .{ .parse_allocator = .none }) catch return; defer it.deinit(); while (it.next() catch return) |fields| { const e = fields.to(CusipEntry, .{}) catch continue; if (e.cusip.len == 0 or e.ticker.len == 0) continue; if (have.contains(e.cusip) or out.contains(e.cusip)) continue; const kc = arena.dupe(u8, e.cusip) catch continue; const vc = arena.dupe(u8, e.ticker) catch continue; out.put(kc, vc) catch continue; } } /// L2 seam: fetch the whole CUSIP->ticker map from the server via /// `GET {server}/cusips`. Returns the raw SRF body (caller frees /// with `self.allocator`) or null on any failure. Best-effort: no /// retry and no torn-body archival (this is a shared reference /// file, not per-symbol cache) — a bad/absent response just /// degrades to the OpenFIGI tier. fn fetchServerCusips(self: *DataService, server_url: []const u8) ?[]u8 { const url = std.fmt.allocPrint(self.allocator, "{s}/cusips", .{server_url}) catch return null; defer self.allocator.free(url); var client = http.Client.init(self.io, self.allocator); defer client.deinit(); var response = client.get(url) catch |err| { log.debug("cusips server sync failed: {s}", .{@errorName(err)}); return null; }; defer response.deinit(); if (!cache.Store.looksCompleteSrf(response.body)) { log.debug("cusips server response not complete SRF ({d} bytes) — ignoring", .{response.body.len}); return null; } return self.allocator.dupe(u8, response.body) catch null; } /// L3: resolve still-missing CUSIPs through OpenFIGI (batched 100 /// per request, the API's job limit), recording hits into `out` /// (duped into `arena`). De-dups the lookup set against `have`, /// `out`, and itself. Best-effort: a failed batch logs and is /// skipped; remaining batches still run. fn mintMissingViaOpenFigi(self: *DataService, arena: std.mem.Allocator, out: *std.StringHashMap([]const u8), have: CusipTickerMap, cusips: []const []const u8) void { var seen = std.StringHashMap(void).init(arena); var to_lookup: std.ArrayList([]const u8) = .empty; for (cusips) |c| { if (c.len == 0) continue; if (have.contains(c) or out.contains(c)) continue; const gop = seen.getOrPut(c) catch continue; if (gop.found_existing) continue; to_lookup.append(arena, c) catch continue; } if (to_lookup.items.len == 0) return; const batch_size = 100; // OpenFIGI accepts up to 100 jobs/request. var start: usize = 0; while (start < to_lookup.items.len) : (start += batch_size) { const end = @min(start + batch_size, to_lookup.items.len); const batch = to_lookup.items[start..end]; const figi = self.lookupCusips(batch) catch |err| { log.warn("resolveCusips: OpenFIGI lookup of {d} CUSIP(s) failed: {s}", .{ batch.len, @errorName(err) }); continue; }; defer { for (figi) |r| { if (r.ticker) |t| self.allocator.free(t); if (r.name) |n| self.allocator.free(n); if (r.security_type) |s| self.allocator.free(s); } self.allocator.free(figi); } // Results are parallel to `batch` (same length + order). for (figi, 0..) |r, i| { if (!r.found) continue; const ticker = r.ticker orelse continue; const kc = arena.dupe(u8, batch[i]) catch continue; const vc = arena.dupe(u8, ticker) catch continue; out.put(kc, vc) catch continue; } } } // ── Utility ────────────────────────────────────────────────── /// Sleep before retrying after a rate limit error. /// Uses the provider's rate limiter if available, otherwise a fixed 10s backoff. fn rateLimitBackoff(self: *DataService) void { if (self.td) |*td| { td.rate_limiter.backoff(); } else { std.Io.sleep(self.io, std.Io.Duration.fromSeconds(10), .awake) catch |err| log.debug("rate-limit backoff sleep interrupted: {t}", .{err}); } } // ── Server sync ────────────────────────────────────────────── /// Try to sync a cache file from the configured zfin-server. /// Returns true if the file was successfully synced, false on any error. /// Silently returns false if no server is configured. /// /// Applies a single retry with a short delay when the first attempt /// fails at the HTTP layer OR produces a torn body (integrity /// mismatch / `looksCompleteSrf` rejection). Motivation: refreshes /// fan out 20+ symbols across 8 parallel threads, and the tear /// pattern we've observed so far looks transient per-connection. /// One retry papers over single-packet hiccups without dramatically /// extending refresh wall time. If the retry also fails the /// archive grows by one more `.bin`/`.meta` pair — two captures /// from the same refresh are the most valuable diagnostic signal /// we can produce (same body shape? same byte offset? same time /// delta? all answers we can't get from a single failure). fn syncFromServer(self: *DataService, symbol: []const u8, data_type: cache.DataType) bool { const server_url = self.config.server_url orelse return false; const endpoint = switch (data_type) { .candles_daily => "/candles", .candles_meta => "/candles_meta", .dividends => "/dividends", .earnings => "/earnings", .options => "/options", .splits => "/splits", .meta => return false, .classification => "/classification", .etf_metrics => "/etf_metrics", .entity_facts => "/entity_facts", // Provider-internal cache files (ticker-map indexes) // are not served — clients fetch them directly from // the SEC. The DataService caches the JSON via // `Store` after fetching; the server has no role. .tickers_funds, .tickers_companies => return false, }; const full_url = std.fmt.allocPrint(self.allocator, "{s}/{s}{s}", .{ server_url, symbol, endpoint }) catch return false; defer self.allocator.free(full_url); const max_attempts: u8 = 2; const retry_delay_ms: u64 = 250; var attempt: u8 = 0; while (attempt < max_attempts) : (attempt += 1) { if (attempt > 0) { log.debug( "{s}: retrying {s} server sync (attempt {d}/{d}) after {d}ms delay", .{ symbol, @tagName(data_type), attempt + 1, max_attempts, retry_delay_ms }, ); std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(retry_delay_ms), .awake) catch |err| log.debug("syncFromServer retry-delay sleep interrupted: {t}", .{err}); } switch (self.tryOneSync(symbol, data_type, full_url)) { .ok => return true, // Torn or network error — retry if attempts remain. .torn, .net_err => {}, } } return false; } const SyncAttempt = enum { ok, torn, net_err }; /// One attempt at syncing a file from the server. Archives a torn /// body when detected but does NOT retry — the caller decides that. fn tryOneSync(self: *DataService, symbol: []const u8, data_type: cache.DataType, full_url: []const u8) SyncAttempt { // Per-attempt start/finish trace. The "started" line emits // before any blocking call; the "finished" line emits on every // exit path. If a sync wedges in `client.get`, you'll see the // started line with no matching finished line — the missing // finished entries identify which symbols are stuck. Pair this // with the per-stage `http: stage=...` lines from `net/http.zig` // to pinpoint which transport stage stalled. // // wall-clock required: per-attempt elapsed for diagnosing // partial-success/stall patterns under parallel fan-out. // `.awake` (monotonic) avoids spurious negatives on clock skew. const t_start = std.Io.Timestamp.now(self.io, .awake).nanoseconds; log.debug("{s}: tryOneSync started ({s})", .{ symbol, @tagName(data_type) }); var client = http.Client.init(self.io, self.allocator); defer client.deinit(); var response = client.get(full_url) catch |err| { const elapsed_ms = @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms); // Operator-visible: surfaces meaningful failures // (`NoAddressReturned`, `ConnectionRefused`, // `TlsInitializationFailed`, etc.) instead of swallowing // them. Network-shaped errors are exactly what the user // needs to see when sync stops working — keeping this at // debug level meant a DNS-truncation bug was visible only // to anyone running with debug logging on, which cost // hours of diagnosis time. log.warn("{s}: server sync failed for {s}: {s} (elapsed_ms={d})", .{ symbol, @tagName(data_type), @errorName(err), elapsed_ms }); log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), elapsed_ms }); return .net_err; }; defer response.deinit(); // Integrity check: if the server advertised an ETag in // `"sha256:"` form, compare the body's actual sha256 // against it. Catches mid-stream truncation that Zig's // std.http.Client.fetch silently accepts on the Content-Length // path (EndOfStream from a cut transport is swallowed as a // normal termination). Archive the mismatching body with the // advertised etag so post-mortem can see exactly what was // promised vs what arrived. Deployments with no ETag or a // non-sha256 etag fall through to `looksCompleteSrf` below // (backward-compatible with pre-fix servers). switch (response.verifyIntegrity()) { .mismatch => |m| { cache.Store.archiveTornBody( self.io, self.allocator, self.config.cache_dir, symbol, data_type, response.body, .{ .failure_reason = .etag_mismatch, .http_status = @intFromEnum(response.status), .server_url = full_url, .server_etag = response.etag, }, ) catch |err| { log.debug( "{s}: failed to archive etag-mismatch {s} body: {s}", .{ symbol, @tagName(data_type), @errorName(err) }, ); }; log.debug( "{s}: {s} server response failed integrity check ({d} bytes, expected sha256={s}, actual={s}) — archived under _torn/, not writing to cache", .{ symbol, @tagName(data_type), response.body.len, m.expected_hex, m.actual_hex }, ); log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) }); return .torn; }, .ok, .not_applicable => {}, } // Validate the response body looks like a complete SRF file before // writing it to cache. This guards against HTTP body truncation // (TCP reset, Content-Length mismatch, proxy that flushed a // partial response, etc.) — torn bodies get written atomically // to the cache otherwise, producing the classic SRF parse error // on the next read: // error(srf): custom parse of value YYYY-MM failed : InvalidDateFormat // // When the check rejects a body, archive the raw bytes + context // under `{cache_dir}/_torn/` so the next time this recurs we // have ammunition for root-cause analysis. The log line is kept // at debug level on purpose — user explicitly asked that routine // rejections not be noisy in production runs. The `.meta` // sidecar on disk is the durable signal. if (!cache.Store.looksCompleteSrf(response.body)) { cache.Store.archiveTornBody( self.io, self.allocator, self.config.cache_dir, symbol, data_type, response.body, .{ .failure_reason = .looks_complete_srf_failed, .http_status = @intFromEnum(response.status), .server_url = full_url, .server_etag = response.etag, }, ) catch |err| { log.debug( "{s}: failed to archive torn {s} body: {s}", .{ symbol, @tagName(data_type), @errorName(err) }, ); }; log.debug( "{s}: rejecting torn {s} server response ({d} bytes) — archived under _torn/, not writing to cache", .{ symbol, @tagName(data_type), response.body.len }, ); log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) }); return .torn; } // Write to local cache var s = self.store(); s.writeRaw(symbol, data_type, response.body) catch |err| { log.debug("{s}: failed to write synced {s} to cache: {s}", .{ symbol, @tagName(data_type), @errorName(err) }); log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) }); return .net_err; }; log.debug("{s}: synced {s} from server ({d} bytes)", .{ symbol, @tagName(data_type), response.body.len }); log.debug("{s}: tryOneSync finished ({s}) result=ok elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) }); return .ok; } /// Sync candle data (both daily and meta) from the server. fn syncCandlesFromServer(self: *DataService, symbol: []const u8) bool { const daily = self.syncFromServer(symbol, .candles_daily); const meta = self.syncFromServer(symbol, .candles_meta); return daily and meta; } /// Mutual funds use 5-letter tickers ending in X (e.g. FDSCX, VSTCX, FAGIX). /// These don't have quarterly earnings — skip the fetch rather than /// round-tripping to the provider just to get an empty response. fn isMutualFund(symbol: []const u8) bool { return symbol.len == 5 and symbol[4] == 'X'; } // ── User config files ───────────────────────────────────────── /// Load and parse accounts.srf from the same directory as the given portfolio path. /// Returns null if the file doesn't exist or can't be parsed. /// Caller owns the returned AccountMap and must call deinit(). pub fn loadAccountMap(self: *DataService, allocator: std.mem.Allocator, portfolio_path: []const u8) ?analysis.AccountMap { const dir_end = if (std.mem.lastIndexOfScalar(u8, portfolio_path, std.fs.path.sep)) |idx| idx + 1 else 0; const acct_path = std.fmt.allocPrint(self.allocator, "{s}accounts.srf", .{portfolio_path[0..dir_end]}) catch return null; defer self.allocator.free(acct_path); const data = std.Io.Dir.cwd().readFileAlloc(self.io, acct_path, self.allocator, .limited(1024 * 1024)) catch return null; defer self.allocator.free(data); return analysis.parseAccountsFile(allocator, data) catch null; } /// Load and parse `transaction_log.srf` from the same directory as /// the given portfolio path. Returns null if the file doesn't /// exist or can't be parsed — the contributions pipeline falls /// back to the pre-transaction-log behavior (no transfer netting) /// when null is returned. /// /// Caller owns the returned `TransactionLog` and must call /// `deinit()`. pub fn loadTransferLog(self: *DataService, portfolio_path: []const u8) ?transaction_log.TransactionLog { const dir_end = if (std.mem.lastIndexOfScalar(u8, portfolio_path, std.fs.path.sep)) |idx| idx + 1 else 0; const path = std.fmt.allocPrint(self.allocator, "{s}transaction_log.srf", .{portfolio_path[0..dir_end]}) catch return null; defer self.allocator.free(path); const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, self.allocator, .limited(1024 * 1024)) catch return null; defer self.allocator.free(data); return transaction_log.parseTransactionLogFile(self.allocator, data) catch null; } }; // ── Tests ───────────────────────────────────────────────────────── test "isPermanentProviderFailure: NotFound is permanent" { try std.testing.expect(isPermanentProviderFailure(error.NotFound)); } test "isPermanentProviderFailure: RequestFailed is transient" { try std.testing.expect(!isPermanentProviderFailure(error.RequestFailed)); } test "isPermanentProviderFailure: ServerError is transient" { try std.testing.expect(!isPermanentProviderFailure(error.ServerError)); } test "isPermanentProviderFailure: Unauthorized is transient" { // Auth misconfigs are user-fixable (set the API key); not a reason // to permanently suppress retries. try std.testing.expect(!isPermanentProviderFailure(error.Unauthorized)); } test "isPermanentProviderFailure: InvalidResponse is transient" { // Parse errors are usually a provider format change or one-off // garbage response — retrying later is fine. try std.testing.expect(!isPermanentProviderFailure(error.InvalidResponse)); } test "isPermanentProviderFailure: PaymentRequired is transient" { // FMP marks plan-locked symbols with HTTP 402; user can upgrade // their plan or rotate providers, so don't poison the cache. try std.testing.expect(!isPermanentProviderFailure(error.PaymentRequired)); } test "isPermanentProviderFailure: RateLimited is transient" { // Rate-limit is the textbook transient case; the caller already // handles it specially with backoff + retry. try std.testing.expect(!isPermanentProviderFailure(error.RateLimited)); } test "isMutualFund identifies mutual funds" { // Standard mutual fund tickers (5 letters ending in X) try std.testing.expect(DataService.isMutualFund("FDSCX")); try std.testing.expect(DataService.isMutualFund("VSTCX")); try std.testing.expect(DataService.isMutualFund("FAGIX")); try std.testing.expect(DataService.isMutualFund("VFINX")); // Not mutual funds try std.testing.expect(!DataService.isMutualFund("AAPL")); try std.testing.expect(!DataService.isMutualFund("VTI")); try std.testing.expect(!DataService.isMutualFund("SPY")); try std.testing.expect(!DataService.isMutualFund("GOOGL")); try std.testing.expect(!DataService.isMutualFund("")); // empty try std.testing.expect(!DataService.isMutualFund("X")); // too short try std.testing.expect(!DataService.isMutualFund("FDSCA")); // 5 letters but not ending in X try std.testing.expect(!DataService.isMutualFund("FDSCXA")); // 6 letters ending in A } test "DataService init/deinit lifecycle" { const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache", }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); // Should be able to access config try std.testing.expectEqualStrings("/tmp/zfin-test-cache", svc.config.cache_dir); // Providers should be null (lazy init) try std.testing.expect(svc.td == null); try std.testing.expect(svc.pg == null); try std.testing.expect(svc.fmp == null); try std.testing.expect(svc.yh == null); try std.testing.expect(svc.tg == null); } test "DataService store helper creates valid store" { const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache", }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); const s = svc.store(); try std.testing.expectEqualStrings("/tmp/zfin-test-cache", s.cache_dir); } test "DataService getProvider returns NoApiKey without key" { const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache", // No API keys set }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); // TwelveData requires API key const td_result = svc.getProvider(TwelveData); try std.testing.expectError(DataError.NoApiKey, td_result); // Polygon requires API key const pg_result = svc.getProvider(Polygon); try std.testing.expectError(DataError.NoApiKey, pg_result); // Yahoo doesn't require API key const yh_result = svc.getProvider(Yahoo); try std.testing.expect(yh_result != error.NoApiKey); } test "DataService getProvider initializes provider with key" { const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache", .tiingo_key = "test-tiingo-key", }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); // First call initializes const tg1 = try svc.getProvider(Tiingo); try std.testing.expect(svc.tg != null); // Second call returns same instance const tg2 = try svc.getProvider(Tiingo); try std.testing.expect(tg1 == tg2); } test "DataService LoadAllResult default values" { const allocator = std.testing.allocator; var result = DataService.LoadAllResult{ .prices = std.StringHashMap(f64).init(allocator), .cached_count = 0, .server_synced_count = 0, .provider_fetched_count = 0, .stale_count = 0, .failed_count = 0, .latest_date = null, }; defer result.deinit(); try std.testing.expectEqual(@as(usize, 0), result.prices.count()); } test "FetchResult type construction" { // Verify FetchResult works for different types const candle_result = FetchResult(Candle){ .data = &.{}, .source = .cached, .timestamp = 0, .allocator = std.testing.allocator, }; try std.testing.expect(candle_result.source == .cached); const div_result = FetchResult(Dividend){ .data = &.{}, .source = .fetched, .timestamp = 12345, .allocator = std.testing.allocator, }; try std.testing.expect(div_result.source == .fetched); try std.testing.expectEqual(@as(i64, 12345), div_result.timestamp); } test "FetchOptions default is fully permissive" { // Default-init should allow normal fetch behavior. const opts: FetchOptions = .{}; try std.testing.expect(!opts.skip_network); try std.testing.expect(!opts.force_refresh); } test "LoadAllConfig.fetchOptions maps fields through" { const cfg = DataService.LoadAllConfig{ .force_refresh = true, .skip_network = false, }; const opts = cfg.fetchOptions(); try std.testing.expect(opts.force_refresh); try std.testing.expect(!opts.skip_network); const cfg2 = DataService.LoadAllConfig{ .skip_network = true, }; const opts2 = cfg2.fetchOptions(); try std.testing.expect(opts2.skip_network); try std.testing.expect(!opts2.force_refresh); } test "getCandles offline mode returns cached data without network" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); // Construct a service with a cache pre-populated with candle data. const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); // Pre-populate cache via the Store API. var store = svc.store(); var candles = [_]Candle{ .{ .date = Date.fromYmd(2026, 5, 19), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 }, .{ .date = Date.fromYmd(2026, 5, 20), .open = 104, .high = 106, .low = 103, .close = 105, .adj_close = 105, .volume = 1100 }, }; store.cacheCandles("TEST", candles[0..], .tiingo, 0); // Set the test guard: any network call would panic. We expect // the offline-mode path NOT to touch the network. svc.panic_on_network_attempt = true; const result = try svc.getCandles("TEST", .{ .skip_network = true }); defer result.deinit(); try std.testing.expectEqual(@as(usize, 2), result.data.len); try std.testing.expect(result.data[0].date.eql(Date.fromYmd(2026, 5, 19))); try std.testing.expect(result.data[1].date.eql(Date.fromYmd(2026, 5, 20))); try std.testing.expectEqual(Source.cached, result.source); } test "getCandles offline mode with no cache returns FetchFailed" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); // Network guard is on. With no cache and skip_network=true, // we must return FetchFailed without panicking. svc.panic_on_network_attempt = true; const err = svc.getCandles("NEVERHEARDOFIT", .{ .skip_network = true }); try std.testing.expectError(DataError.FetchFailed, err); } test "fetchCached offline mode returns stale-cached data" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); // Pre-populate dividend cache with a TTL in the past (stale). var store = svc.store(); var divs = [_]Dividend{ .{ .ex_date = Date.fromYmd(2026, 3, 15), .amount = 0.50, .type = .regular }, }; // Manually set TTL to 1 second (long since expired) by writing // through writeWithSource with a tiny TTL. store.writeWithSource(Dividend, "TEST", divs[0..], .{ .seconds = -1_000_000 }, "test"); svc.panic_on_network_attempt = true; // Even though the cache is stale, skip_network must return it // rather than touching the network. const result = try svc.getDividends("TEST", .{ .skip_network = true }); defer result.deinit(); try std.testing.expectEqual(@as(usize, 1), result.data.len); try std.testing.expectEqual(Source.cached, result.source); } test "getQuote offline mode returns FetchFailed (quotes never cached)" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); svc.panic_on_network_attempt = true; // Quotes have no cache to fall back to in offline mode. const err = svc.getQuote("AAPL", .{ .skip_network = true }); try std.testing.expectError(DataError.FetchFailed, err); } test "loadAllPrices offline mode skips network and returns cached" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var store = svc.store(); // Symbol with fresh cache. var fresh_candles = [_]Candle{ .{ .date = Date.fromYmd(2026, 5, 20), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 }, }; store.cacheCandles("FRESH", fresh_candles[0..], .tiingo, 0); // Symbol with no cache at all. // (no setup needed — just passes a symbol that doesn't exist) svc.panic_on_network_attempt = true; const symbols = [_][]const u8{ "FRESH", "MISSING" }; var result = svc.loadAllPrices( symbols[0..], &.{}, .{ .skip_network = true }, null, null, ); defer result.prices.deinit(); // FRESH should resolve from cache. try std.testing.expect(result.prices.contains("FRESH")); try std.testing.expectEqual(@as(f64, 104), result.prices.get("FRESH").?); // MISSING should not be in the prices map. try std.testing.expect(!result.prices.contains("MISSING")); // failed_count should reflect MISSING. try std.testing.expectEqual(@as(usize, 1), result.failed_count); } test "loadAllPrices force_refresh tops up without wiping the candle cache" { // Regression: force_refresh must mean "ignore TTL + incremental // top-up", NOT "delete the cache and re-download from scratch". // The old behavior invalidated (deleted) candles_daily before the // fetch, which forced a full network re-download. With the cache // already covering through today, force_refresh must serve from // the surviving cache and touch no network. const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var store = svc.store(); // Dated far in the future so getCandles' "last cached date is // today-or-later" branch fires deterministically regardless of the // test clock — an incremental fetch would have nothing to pull and // never reaches the network. var candles = [_]Candle{ .{ .date = Date.fromYmd(2099, 12, 31), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 }, }; store.cacheCandles("HELD", candles[0..], .tiingo, 0); // Any provider/network attempt now panics. If force_refresh wiped // the cache (old behavior), getCandles would fall through to a full // re-fetch and trip this. svc.panic_on_network_attempt = true; const symbols = [_][]const u8{"HELD"}; var result = svc.loadAllPrices( symbols[0..], &.{}, .{ .force_refresh = true }, null, null, ); defer result.prices.deinit(); // Served from the (un-wiped) cache. try std.testing.expect(result.prices.contains("HELD")); try std.testing.expectEqual(@as(f64, 104), result.prices.get("HELD").?); // The candle cache survived the force-refresh. try std.testing.expect(svc.getCachedLastClose("HELD") != null); } test "getClassification: skip_network with no cache returns FetchFailed" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); svc.panic_on_network_attempt = true; const err = svc.getClassification("NEVERHEARDOFIT", .{ .skip_network = true }); try std.testing.expectError(DataError.FetchFailed, err); } test "getClassification: cache hit returns cached data without network" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); // Pre-populate the classification cache. var s = svc.store(); var records = [_]Wikidata.ClassificationRecord{.{ .symbol = "AAPL", .name = "Apple Inc.", .country = "US", .as_of = "2026-05-25", .source = "wikidata", }}; s.write(Wikidata.ClassificationRecord, "AAPL", records[0..], .{ .seconds = cache.Ttl.classification }); // Network guard on — must return from cache without touching network. svc.panic_on_network_attempt = true; const result = try svc.getClassification("AAPL", .{}); defer result.deinit(); try std.testing.expectEqual(@as(usize, 1), result.data.len); try std.testing.expectEqualStrings("AAPL", result.data[0].symbol); try std.testing.expectEqualStrings("Apple Inc.", result.data[0].name.?); try std.testing.expectEqual(Source.cached, result.source); } test "populateGeo: country US -> geo US" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var record: Wikidata.ClassificationRecord = .{ .symbol = try allocator.dupe(u8, "TEST"), .country = try allocator.dupe(u8, "US"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; defer record.deinit(allocator); try svc.populateGeo(&record); try std.testing.expect(record.geo != null); try std.testing.expectEqualStrings("US", record.geo.?); } test "populateGeo: country GB -> geo International Developed" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var record: Wikidata.ClassificationRecord = .{ .symbol = try allocator.dupe(u8, "TEST"), .country = try allocator.dupe(u8, "GB"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; defer record.deinit(allocator); try svc.populateGeo(&record); try std.testing.expect(record.geo != null); try std.testing.expectEqualStrings("International Developed", record.geo.?); } test "populateGeo: null country -> noop" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var record: Wikidata.ClassificationRecord = .{ .symbol = try allocator.dupe(u8, "TEST"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; defer record.deinit(allocator); try svc.populateGeo(&record); try std.testing.expectEqual(@as(?[]const u8, null), record.geo); } test "populateGeo: existing geo not overwritten" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var record: Wikidata.ClassificationRecord = .{ .symbol = try allocator.dupe(u8, "TEST"), .country = try allocator.dupe(u8, "US"), .geo = try allocator.dupe(u8, "Already Set"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; defer record.deinit(allocator); try svc.populateGeo(&record); try std.testing.expectEqualStrings("Already Set", record.geo.?); } test "getClassification: sparse Wikidata + EDGAR managed_fund hit produces merged record" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); // Seed both EDGAR ticker map caches with at least one entry // each so the synthesizeClassification path doesn't try to // fetch them (the load helpers treat empty cached slices as // "miss" and fall through to a network fetch). var s = svc.store(); var mf_entries = [_]Edgar.MutualFundTickerEntry{.{ .symbol = "FAGIX", .cik = "0000275309", }}; s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl()); var co_entries = [_]Edgar.CompanyTickerEntry{.{ .symbol = "DUMMY", .cik = "0000000001", }}; s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl()); // Seed an etf_metrics negative cache so getEtfMetrics doesn't // try to fetch from the network. s.writeNegative("FAGIX", .etf_metrics); // Sparse Wikidata records (length 1, only name set -- not useful). var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1); sparse[0] = .{ .symbol = try allocator.dupe(u8, "FAGIX"), .name = try allocator.dupe(u8, "Test Fund"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; // Drive directly through synthesizeClassification (skip the // Wikidata fetch). It takes ownership of `sparse`. svc.panic_on_network_attempt = true; // any provider call -> panic const merged = try svc.synthesizeClassification("FAGIX", sparse, .{ .skip_network = true }); defer Wikidata.ClassificationRecord.freeSlice(allocator, merged); try std.testing.expectEqual(@as(usize, 1), merged.len); const c = merged[0]; try std.testing.expectEqualStrings("FAGIX", c.symbol); try std.testing.expect(c.is_etf); try std.testing.expectEqualStrings("Fund", c.asset_class.?); try std.testing.expectEqualStrings("US", c.country.?); try std.testing.expectEqualStrings("US", c.geo.?); try std.testing.expectEqualStrings("edgar_fallback", c.source); // Wikidata's name preserved on merge. try std.testing.expectEqualStrings("Test Fund", c.name.?); } test "synthesizeClassification: no EDGAR hit returns NotFound" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); // Seed both ticker maps with throwaway entries so the // EDGAR lookup returns .none for our test symbol but doesn't // try to fetch the maps from the network. var s = svc.store(); var mf_entries = [_]Edgar.MutualFundTickerEntry{.{ .symbol = "DUMMY1", .cik = "0000000001", }}; s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl()); var co_entries = [_]Edgar.CompanyTickerEntry{.{ .symbol = "DUMMY2", .cik = "0000000002", }}; s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl()); var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1); sparse[0] = .{ .symbol = try allocator.dupe(u8, "NEVERHEARDOFIT"), .name = try allocator.dupe(u8, "ghost"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; svc.panic_on_network_attempt = true; try std.testing.expectError(error.NotFound, svc.synthesizeClassification("NEVERHEARDOFIT", sparse, .{ .skip_network = true })); } test "synthesizeClassification: company_or_uit without ETF/TRUST keyword still routes to multi-row" { // PTY shape: closed-end fund whose company_tickers title is // "PIMCO CORPORATE & INCOME OPPORTUNITY FUND" -- no "ETF" or // "TRUST" in the title, so lookupInTickerMaps returns // .company_or_uit{is_etf=false}. But it's still fund-shaped // and should produce multi-row metadata in enrich. // // The downstream signal for "fund-like, emit multi-row" is // ClassificationRecord.is_etf. Set it to true for any // EDGAR-found .company_or_uit hit (even when the title // doesn't carry the ETF/TRUST keyword), so PTY-shape // closed-end funds get the same treatment as ETFs. const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var s = svc.store(); // Throwaway MF entry so the MF lookup returns null. var mf_entries = [_]Edgar.MutualFundTickerEntry{.{ .symbol = "DUMMY", .cik = "0000000001", }}; s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl()); // PTY in the company map with NO ETF/TRUST in title. var co_entries = [_]Edgar.CompanyTickerEntry{.{ .symbol = "PTY", .cik = "0001202604", .title = "PIMCO CORPORATE & INCOME OPPORTUNITY FUND", }}; s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl()); s.writeNegative("PTY", .etf_metrics); var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1); sparse[0] = .{ .symbol = try allocator.dupe(u8, "PTY"), .name = try allocator.dupe(u8, "PIMCO Corporate & Income Opportunity Fund"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; svc.panic_on_network_attempt = true; const merged = try svc.synthesizeClassification("PTY", sparse, .{ .skip_network = true }); defer Wikidata.ClassificationRecord.freeSlice(allocator, merged); try std.testing.expectEqual(@as(usize, 1), merged.len); const c = merged[0]; // is_etf MUST be true so enrich routes through emitEtfRows // (multi-row sleeve breakdown). The asset_class stays "Fund" // because no ETF/TRUST keyword in title. try std.testing.expect(c.is_etf); try std.testing.expectEqualStrings("Fund", c.asset_class.?); } test "synthesizeClassification: NPORT-P series_name beats Wikidata's index name for funds" { // SOXX shape: Wikidata returns the underlying INDEX name // ("PHLX Semiconductor Sector") which is technically what the // ticker symbol is for, but downstream consumers want the // FUND name ("iShares Semiconductor ETF") that NPORT-P // carries. Series_name is more authoritative // for the fund itself. const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var s = svc.store(); var mf_entries = [_]Edgar.MutualFundTickerEntry{.{ .symbol = "DUMMY", .cik = "0000000001", }}; s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl()); var co_entries = [_]Edgar.CompanyTickerEntry{.{ .symbol = "SOXX", .cik = "0001100663", .title = "iShares Trust", }}; s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl()); // Pre-seed etf_metrics with a profile row carrying the // NPORT-P seriesName. var etf_records = [_]Edgar.EtfMetricRecord{ .{ .profile = .{ .symbol = try allocator.dupe(u8, "SOXX"), .series_name = try allocator.dupe(u8, "iShares Semiconductor ETF"), .cik = try allocator.dupe(u8, "0001100663"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "edgar"), } }, }; defer for (etf_records) |r| r.deinit(allocator); s.write(Edgar.EtfMetricRecord, "SOXX", etf_records[0..], cache.DataType.etf_metrics.ttl()); // Wikidata returned only the index name (sparse). var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1); sparse[0] = .{ .symbol = try allocator.dupe(u8, "SOXX"), .name = try allocator.dupe(u8, "PHLX Semiconductor Sector"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "wikidata"), }; svc.panic_on_network_attempt = true; const merged = try svc.synthesizeClassification("SOXX", sparse, .{ .skip_network = true }); defer Wikidata.ClassificationRecord.freeSlice(allocator, merged); try std.testing.expectEqual(@as(usize, 1), merged.len); const c = merged[0]; // Series_name from NPORT-P wins -- not Wikidata's index name. try std.testing.expectEqualStrings("iShares Semiconductor ETF", c.name.?); } test "getEntityFacts: skip_network with no cache returns FetchFailed" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); svc.panic_on_network_attempt = true; const err = svc.getEntityFacts("0000999999", .{ .skip_network = true }); try std.testing.expectError(DataError.FetchFailed, err); } test "getEntityFacts: cache hit returns cached shares-outstanding" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var s = svc.store(); var records = [_]Edgar.EntityFactRecord{ .{ .shares_outstanding = .{ .symbol = "", .shares_outstanding = 14687356000, .period_end = "2026-04-17", .form = "10-Q", .cik = "0000320193", .as_of = "2026-05-25", .source = "edgar_xbrl", } }, }; s.write(Edgar.EntityFactRecord, "0000320193", records[0..], .{ .seconds = cache.Ttl.entity_facts }); svc.panic_on_network_attempt = true; const result = try svc.getEntityFacts("0000320193", .{}); defer result.deinit(); try std.testing.expectEqual(@as(usize, 1), result.data.len); switch (result.data[0]) { .shares_outstanding => |so| { try std.testing.expectEqual(@as(u64, 14687356000), so.shares_outstanding); try std.testing.expectEqualStrings("0000320193", so.cik); }, } try std.testing.expectEqual(Source.cached, result.source); } test "getEtfMetrics: skip_network with no cache returns FetchFailed" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); svc.panic_on_network_attempt = true; const err = svc.getEtfMetrics("NEVERHEARDOFIT", .{ .skip_network = true }); try std.testing.expectError(DataError.FetchFailed, err); } test "getEtfMetrics: cache hit returns cached profile + sectors + holdings" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); const config = Config{ .cache_dir = dir_path }; var svc = DataService.init(io, allocator, config); defer svc.deinit(); var s = svc.store(); var records = [_]Edgar.EtfMetricRecord{ .{ .profile = .{ .symbol = "VTI", .cik = "0000036405", .as_of = "2026-05-25", .source = "edgar", } }, .{ .sector = .{ .symbol = "VTI", .code = "EC/CORP", .description = "Equity / Corporate", .pct_of_portfolio = 99.7, .as_of = "2026-05-25", .source = "edgar", } }, .{ .holding = .{ .symbol = "VTI", .name = "NVIDIA Corp", .pct_of_portfolio = 6.57, .as_of = "2026-05-25", .source = "edgar", } }, }; s.write(Edgar.EtfMetricRecord, "VTI", records[0..], .{ .seconds = cache.Ttl.etf_metrics }); svc.panic_on_network_attempt = true; const result = try svc.getEtfMetrics("VTI", .{}); defer result.deinit(); try std.testing.expectEqual(@as(usize, 3), result.data.len); try std.testing.expect(result.data[0] == .profile); try std.testing.expect(result.data[1] == .sector); try std.testing.expect(result.data[2] == .holding); try std.testing.expectEqualStrings("VTI", result.data[0].profile.symbol); try std.testing.expectEqual(Source.cached, result.source); } test "DataService getProvider initializes Wikidata with user_email" { const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache", .user_email = "test@example.com", }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); const wd1 = try svc.getProvider(Wikidata); try std.testing.expect(svc.wikidata != null); try std.testing.expectEqualStrings("test@example.com", wd1.user_email); // Second call returns same instance. const wd2 = try svc.getProvider(Wikidata); try std.testing.expect(wd1 == wd2); } test "DataService getProvider returns NoApiKey for Wikidata without user_email" { const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache" }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); const wd_result = svc.getProvider(Wikidata); try std.testing.expectError(DataError.NoApiKey, wd_result); const ed_result = svc.getProvider(Edgar); try std.testing.expectError(DataError.NoApiKey, ed_result); } test "estimateWaitSeconds returns null when relevant provider not instantiated" { const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache" }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); // No providers initialized yet (lazy). Each rate-limited data // type returns null because its provider is missing. try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.dividends)); try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.splits)); try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.earnings)); try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.options)); try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.etf_metrics)); try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.entity_facts)); } test "estimateWaitSeconds returns 0 for types without rate limiters" { // candles_daily, classification, etc. are served by providers // that don't have a rate limiter (Tiingo, Wikidata). The // function returns 0 for these regardless of provider state -- // there's nothing to wait for. const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache" }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.candles_daily)); try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.candles_meta)); try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.classification)); try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.meta)); } test "estimateWaitSeconds returns 0 for fresh rate-limited providers" { // Once the provider is instantiated, an unused rate limiter // returns 0 (no wait). This is the steady-state happy path // for the call at the top of each refresh iteration. const allocator = std.testing.allocator; const config = Config{ .cache_dir = "/tmp/zfin-test-cache", .polygon_key = "test-polygon-key", .fmp_key = "test-fmp-key", }; var svc = DataService.init(std.testing.io, allocator, config); defer svc.deinit(); // Touch each provider to lazy-init it. We don't care about the // returned pointer; just need svc.pg / svc.fmp to be non-null. _ = try svc.getProvider(Polygon); _ = try svc.getProvider(Fmp); // Fresh limiters have full token bucket -> 0 wait. try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.dividends)); try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.splits)); try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.earnings)); } // ── lookupInTickerMaps ──────────────────────────────────────── // // Pure function — no I/O. Consumed by `lookupEdgarFallback`, // which loads the maps then calls this. Tests construct // synthetic ticker-map data directly to exercise every branch // without touching the cache or network. fn testNewMfEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8) !Edgar.MutualFundTickerEntry { return .{ .symbol = try allocator.dupe(u8, symbol), .cik = try allocator.dupe(u8, cik), }; } fn testNewCoEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8, title: ?[]const u8) !Edgar.CompanyTickerEntry { return .{ .symbol = try allocator.dupe(u8, symbol), .cik = try allocator.dupe(u8, cik), .title = if (title) |t| try allocator.dupe(u8, t) else null, }; } test "lookupInTickerMaps: both maps null -> .none" { const allocator = std.testing.allocator; const result = lookupInTickerMaps(allocator, "ANY", null, null); defer freeEdgarLookup(allocator, result); try std.testing.expect(result == .none); } test "lookupInTickerMaps: symbol in MF map -> .managed_fund" { const allocator = std.testing.allocator; const entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1); entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322"); var map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, entries); defer map.deinit(); const result = lookupInTickerMaps(allocator, "FAGIX", &map, null); defer freeEdgarLookup(allocator, result); try std.testing.expect(result == .managed_fund); } test "lookupInTickerMaps: symbol in company map with TRUST title -> ETF hint" { const allocator = std.testing.allocator; const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1); entries[0] = try testNewCoEntry(allocator, "SPY", "0000884394", "SPDR S&P 500 ETF TRUST"); var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries); defer map.deinit(); const result = lookupInTickerMaps(allocator, "SPY", null, &map); defer freeEdgarLookup(allocator, result); try std.testing.expect(result == .company_or_uit); try std.testing.expect(result.company_or_uit.is_etf); try std.testing.expectEqualStrings("SPDR S&P 500 ETF TRUST", result.company_or_uit.title.?); } test "lookupInTickerMaps: company map with operating-company title -> not ETF" { const allocator = std.testing.allocator; const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1); entries[0] = try testNewCoEntry(allocator, "AAPL", "0000320193", "Apple Inc."); var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries); defer map.deinit(); const result = lookupInTickerMaps(allocator, "AAPL", null, &map); defer freeEdgarLookup(allocator, result); try std.testing.expect(result == .company_or_uit); try std.testing.expect(!result.company_or_uit.is_etf); } test "lookupInTickerMaps: not in either map -> .none" { const allocator = std.testing.allocator; const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1); mf_entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322"); var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries); defer mf_map.deinit(); const result = lookupInTickerMaps(allocator, "MISSING", &mf_map, null); defer freeEdgarLookup(allocator, result); try std.testing.expect(result == .none); } test "lookupInTickerMaps: MF map takes precedence over company map" { // If a symbol appears in both (rare but possible — class // shares of an open-end fund vs the fund's parent company), // we prefer the MF answer. Lock in the contract. const allocator = std.testing.allocator; const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1); mf_entries[0] = try testNewMfEntry(allocator, "DUP", "0000000001"); const co_entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1); co_entries[0] = try testNewCoEntry(allocator, "DUP", "0000000002", "DUP TRUST"); var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries); defer mf_map.deinit(); var co_map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, co_entries); defer co_map.deinit(); const result = lookupInTickerMaps(allocator, "DUP", &mf_map, &co_map); defer freeEdgarLookup(allocator, result); try std.testing.expect(result == .managed_fund); } test "lookupInTickerMaps: company map with null title -> .company_or_uit, no ETF" { // Defensive: if EDGAR's company file has a row with no // title, we still return the lookup but can't infer ETF // status from a missing string. const allocator = std.testing.allocator; const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1); entries[0] = try testNewCoEntry(allocator, "BARE", "0000000001", null); var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries); defer map.deinit(); const result = lookupInTickerMaps(allocator, "BARE", null, &map); defer freeEdgarLookup(allocator, result); try std.testing.expect(result == .company_or_uit); try std.testing.expect(!result.company_or_uit.is_etf); try std.testing.expect(result.company_or_uit.title == null); } test "lookupInTickerMaps: returned title is owned (survives map deinit)" { // Critical for the service.lookupEdgarFallback contract: // the maps get freed before the EdgarLookup is returned to // the caller. The title must survive that. const allocator = std.testing.allocator; const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1); entries[0] = try testNewCoEntry(allocator, "VTI", "0000884394", "VANGUARD TOTAL STOCK MARKET ETF"); const result = blk: { var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries); defer map.deinit(); break :blk lookupInTickerMaps(allocator, "VTI", null, &map); }; defer freeEdgarLookup(allocator, result); // Map is gone. Title must still be readable. try std.testing.expect(result == .company_or_uit); try std.testing.expectEqualStrings("VANGUARD TOTAL STOCK MARKET ETF", result.company_or_uit.title.?); try std.testing.expect(result.company_or_uit.is_etf); } test "freeEdgarLookup: handles all three union variants without leak" { const allocator = std.testing.allocator; // .managed_fund — no-op freeEdgarLookup(allocator, .managed_fund); // .none — no-op freeEdgarLookup(allocator, .none); // .company_or_uit with null title — no-op freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = null, .is_etf = false } }); // .company_or_uit with non-null title — frees the title. const owned = try allocator.dupe(u8, "Some Title"); freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = owned, .is_etf = true } }); // testing.allocator panics on leak — passing this test means // the title was freed. } // ── CUSIP->ticker cache (loadCusipTickerMap / cacheCusipTicker) ── test "loadCusipTickerMap: missing file returns empty map" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); var map = svc.loadCusipTickerMap(allocator); defer map.deinit(); try std.testing.expectEqual(@as(usize, 0), map.count()); } test "cacheCusipTicker + loadCusipTickerMap: write/read round-trip" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); // Placeholder CUSIPs/tickers — never real PII. svc.cacheCusipTicker("111111111", "AAA"); svc.cacheCusipTicker("222222222", "BBB"); var map = svc.loadCusipTickerMap(allocator); defer map.deinit(); try std.testing.expectEqual(@as(usize, 2), map.count()); try std.testing.expectEqualStrings("AAA", map.get("111111111").?); try std.testing.expectEqualStrings("BBB", map.get("222222222").?); } test "cacheCusipTicker: dedups repeated CUSIP (the historical bug)" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); // Write the same CUSIP three times — must collapse to one row. svc.cacheCusipTicker("111111111", "AAA"); svc.cacheCusipTicker("111111111", "AAA"); svc.cacheCusipTicker("111111111", "AAA"); var map = svc.loadCusipTickerMap(allocator); defer map.deinit(); try std.testing.expectEqual(@as(usize, 1), map.count()); try std.testing.expectEqualStrings("AAA", map.get("111111111").?); // The on-disk file should physically contain exactly one data // row (plus the directive header), proving dedup at the writer. const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" }); defer allocator.free(path); const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024)); defer allocator.free(data); var row_count: usize = 0; var lines = std.mem.splitScalar(u8, data, '\n'); while (lines.next()) |line| { if (std.mem.indexOf(u8, line, "cusip::") != null) row_count += 1; } try std.testing.expectEqual(@as(usize, 1), row_count); } test "loadCusipTickerMap: first occurrence wins on duplicate rows" { // Tolerate a pre-existing file written by the buggy appender // (duplicate rows). The reader must not crash and must keep the // first mapping. const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); // Hand-write a file with a duplicate row (as the old bug did). const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" }); defer allocator.free(path); try std.Io.Dir.cwd().writeFile(io, .{ .sub_path = path, .data = "#!srfv1\ncusip::111111111,ticker::AAA\ncusip::111111111,ticker::AAA\n", }); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); var map = svc.loadCusipTickerMap(allocator); defer map.deinit(); try std.testing.expectEqual(@as(usize, 1), map.count()); try std.testing.expectEqualStrings("AAA", map.get("111111111").?); } // ── CUSIP resolution cascade (resolveCusips / appendCusipEntries) ── test "appendCusipEntries: batches, dedups vs file and within batch" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); // Seed one entry on disk. svc.cacheCusipTicker("111111111", "AAA"); // Batch: 111 already on disk (skip), 222 + 333 new, 222 repeated // within the batch (skip the second). const batch = [_]DataService.CusipEntry{ .{ .cusip = "111111111", .ticker = "ZZZ" }, .{ .cusip = "222222222", .ticker = "BBB" }, .{ .cusip = "333333333", .ticker = "CCC" }, .{ .cusip = "222222222", .ticker = "BBB" }, }; svc.appendCusipEntries(batch[0..]); var map = svc.loadCusipTickerMap(allocator); defer map.deinit(); try std.testing.expectEqual(@as(u32, 3), map.count()); try std.testing.expectEqualStrings("AAA", map.get("111111111").?); // file wins try std.testing.expectEqualStrings("BBB", map.get("222222222").?); try std.testing.expectEqualStrings("CCC", map.get("333333333").?); // Physically exactly 3 data rows (plus the directive header). const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" }); defer allocator.free(path); const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024)); defer allocator.free(data); var rows: usize = 0; var lines = std.mem.splitScalar(u8, data, '\n'); while (lines.next()) |line| { if (std.mem.indexOf(u8, line, "cusip::") != null) rows += 1; } try std.testing.expectEqual(@as(usize, 3), rows); } test "mergeCusipBody: merges new entries, skips those already in `have` or the batch" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); // `have` already maps 111 -> AAA (local is authoritative). svc.cacheCusipTicker("111111111", "AAA"); var have = svc.loadCusipTickerMap(allocator); defer have.deinit(); var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); var out = std.StringHashMap([]const u8).init(arena.allocator()); // Server body: 111 conflicts with `have` (ignored), 222 + 333 are // new, 222 repeated (the second is skipped). const body = "#!srfv1\n" ++ "cusip::111111111,ticker::ZZZ\n" ++ "cusip::222222222,ticker::BBB\n" ++ "cusip::333333333,ticker::CCC\n" ++ "cusip::222222222,ticker::BBB\n"; DataService.mergeCusipBody(arena.allocator(), &out, have, body); try std.testing.expectEqual(@as(u32, 2), out.count()); try std.testing.expectEqualStrings("BBB", out.get("222222222").?); try std.testing.expectEqualStrings("CCC", out.get("333333333").?); try std.testing.expect(out.get("111111111") == null); // have wins } test "resolveCusips: warm cache resolves without touching the network" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); // No server_url; assert L2/L3 are never reached for an all-hit set. svc.panic_on_network_attempt = true; svc.cacheCusipTicker("111111111", "AAA"); svc.cacheCusipTicker("222222222", "BBB"); // Duplicate + empty CUSIP in the request must be tolerated. const want = [_][]const u8{ "111111111", "222222222", "111111111", "" }; var map = svc.resolveCusips(allocator, want[0..], false); defer map.deinit(); try std.testing.expectEqualStrings("AAA", map.get("111111111").?); try std.testing.expectEqualStrings("BBB", map.get("222222222").?); } test "resolveCusips: skip_network serves L1 only, never hits the network" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); // A miss would normally fall through to L2/L3; skip_network must // prevent any network attempt even so. svc.panic_on_network_attempt = true; svc.cacheCusipTicker("111111111", "AAA"); // "999999999" is absent from L1 — with skip_network it stays // unresolved rather than triggering a server/OpenFIGI lookup. const want = [_][]const u8{ "111111111", "999999999" }; var map = svc.resolveCusips(allocator, want[0..], true); defer map.deinit(); try std.testing.expectEqualStrings("AAA", map.get("111111111").?); try std.testing.expect(map.get("999999999") == null); } test "getEtfProfile: carries holding CUSIP through the model boundary" { const allocator = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); defer allocator.free(dir_path); var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); defer svc.deinit(); // Seed etf_metrics: a profile row + a holding carrying a CUSIP but // no ticker (the common NPORT-P shape — placeholder values only). var etf_records = [_]Edgar.EtfMetricRecord{ .{ .profile = .{ .symbol = try allocator.dupe(u8, "TESTF"), .series_name = try allocator.dupe(u8, "Test Fund"), .cik = try allocator.dupe(u8, "0000000002"), .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "edgar"), } }, .{ .holding = .{ .symbol = try allocator.dupe(u8, "TESTF"), .name = try allocator.dupe(u8, "Placeholder Corp"), .cusip = try allocator.dupe(u8, "999999999"), .pct_of_portfolio = 12.5, .as_of = try allocator.dupe(u8, "2026-06-01"), .source = try allocator.dupe(u8, "edgar"), } }, }; defer for (etf_records) |r| r.deinit(allocator); var s = svc.store(); s.write(Edgar.EtfMetricRecord, "TESTF", etf_records[0..], cache.DataType.etf_metrics.ttl()); svc.panic_on_network_attempt = true; const result = try svc.getEtfProfile("TESTF", .{ .skip_network = true }); defer result.deinit(); const holdings = result.data.holdings orelse return error.NoHoldings; try std.testing.expectEqual(@as(usize, 1), holdings.len); try std.testing.expectEqualStrings("999999999", holdings[0].cusip orelse return error.NoCusip); try std.testing.expect(holdings[0].symbol == null); // filing had no ticker }