zfin/src/service.zig

4366 lines
198 KiB
Zig

//! DataService -- unified data access layer for zfin.
//!
//! Encapsulates the "check cache -> fresh? return -> else fetch from provider -> cache -> return"
//! pattern that was previously duplicated between CLI and TUI. Both frontends should use this
//! as their sole data source.
//!
//! Provider selection is internal: each data type routes to the appropriate provider
//! based on available API keys. Callers never need to know which provider was used.
const std = @import("std");
const builtin = @import("builtin");
const log = std.log.scoped(.service);
const Date = @import("Date.zig");
const Candle = @import("models/candle.zig").Candle;
const Dividend = @import("models/dividend.zig").Dividend;
const Split = @import("models/split.zig").Split;
const OptionsChain = @import("models/option.zig").OptionsChain;
const EarningsEvent = @import("models/earnings.zig").EarningsEvent;
const Quote = @import("models/quote.zig").Quote;
const EtfProfile = @import("models/etf_profile.zig").EtfProfile;
const Holding = @import("models/etf_profile.zig").Holding;
const SectorWeight = @import("models/etf_profile.zig").SectorWeight;
const Config = @import("Config.zig");
const cache = @import("cache/store.zig");
const srf = @import("srf");
const analysis = @import("analytics/analysis.zig");
const transaction_log = @import("models/transaction_log.zig");
const TwelveData = @import("providers/twelvedata.zig").TwelveData;
const Polygon = @import("providers/polygon.zig").Polygon;
const Fmp = @import("providers/fmp.zig").Fmp;
const Cboe = @import("providers/cboe.zig").Cboe;
const OpenFigi = @import("providers/openfigi.zig");
const Yahoo = @import("providers/yahoo.zig").Yahoo;
const Tiingo = @import("providers/tiingo.zig").Tiingo;
const Wikidata = @import("providers/Wikidata.zig");
const Edgar = @import("providers/Edgar.zig");
const classification = @import("models/classification.zig");
const fmt = @import("format.zig");
const performance = @import("analytics/performance.zig");
const http = @import("net/http.zig");
const atomic = @import("atomic.zig");
// ── Wall-clock policy ────────────────────────────────────────
//
// `FetchResult.timestamp` records when a given fetch or cached-read
// completed. Each `std.Io.Timestamp.now(self.io, .real)` call in
// this file stamps one specific fetch — a single command invocation
// produces many fetches, each with its own real-time stamp. Threading
// `now_s` in from the caller would collapse all per-fetch timestamps to
// the command-entry time, which is not what callers want when they
// display "fetched 3s ago" for some symbols and "cached 2d ago" for
// others in the same command.
pub const DataError = error{
NoApiKey,
FetchFailed,
CacheError,
ParseError,
OutOfMemory,
/// Transient provider failure (server error, connection issue).
/// Caller should stop and retry later.
TransientError,
/// Provider auth failure (bad API key). Entire refresh should stop.
AuthError,
/// Provider returned a rate-limit response (e.g. SEC EDGAR's
/// 10-req/sec ceiling, or a free-tier candle API's per-minute
/// cap). Caller should stop the current batch and surface a
/// "try again later" message;
/// retrying immediately will just hit the same limit.
RateLimited,
/// Provider responded but doesn't have data for the requested
/// symbol (404, "Error Message" body, or equivalent). Distinct
/// from `FetchFailed` so callers (e.g. `enrich`) can tell the
/// user "this symbol isn't in the provider's catalog; mark it
/// manually" instead of an opaque "fetch failed."
NotFound,
};
/// Per-call options controlling cache vs network behavior. Drives
/// the `--refresh-data` global flag's three modes:
///
/// - `--refresh-data=auto` → `.{}` (default; respect TTL, fetch on stale/miss).
/// - `--refresh-data=never` → `.{ .skip_network = true }` (offline mode;
/// return cached data even if stale, treat cache miss as unavailable).
/// - `--refresh-data=force` → `.{ .force_refresh = true }` (ignore cache TTL,
/// fetch fresh from provider).
///
/// `skip_network` and `force_refresh` represent contradictory intents.
/// The CLI flag cannot produce the combination — `RefreshPolicy` is a
/// 3-variant enum, so the user can never set both. But because the
/// underlying shape is two independent booleans, an internal caller
/// constructing `FetchOptions` directly *could* produce the
/// combination. When both are true, **`skip_network` wins**:
///
/// - The call returns cached data (fresh or stale, whatever's there).
/// - `force_refresh` has no effect — no network is touched.
///
/// This is the safe default: when in doubt, don't reach the network.
/// Internal callers that genuinely want fresh data should set
/// `force_refresh = true, skip_network = false`.
pub const FetchOptions = struct {
/// Skip provider fetches and server sync. Returns cached data
/// (even if stale) or null/empty on cache miss. Wins over
/// `force_refresh` when both are set.
skip_network: bool = false,
/// Force a fresh fetch ignoring cache TTL. No-op when
/// `skip_network` is also set.
force_refresh: bool = false,
};
/// Decide whether a provider failure is permanent enough to merit a
/// negative-cache entry. Negative entries suppress retries until the
/// next manual `--refresh-data=force` / `cache clear`, so writing one is only
/// safe when we're confident more attempts won't succeed.
///
/// Today the only certain-permanent failure is `NotFound`: the symbol
/// just doesn't have data of this type at this provider. Everything
/// else (rate limit, network blip, server 5xx, auth, parse error) is
/// either transient or fixable; recording a negative entry would
/// silently suppress retries for hours/days.
///
/// Rate-limit (`error.RateLimited`) is excluded here because callers
/// handle it specially (single retry after backoff). Anything that
/// reaches this classifier and isn't `NotFound` returns false →
/// caller returns `FetchFailed` without poisoning the cache.
pub fn isPermanentProviderFailure(err: anyerror) bool {
return err == error.NotFound;
}
/// Result of a CUSIP-to-ticker lookup (provider-agnostic).
pub const CusipResult = OpenFigi.FigiResult;
/// Result of an EDGAR ticker-map fallback lookup. Returned by
/// `DataService.lookupEdgarFallback` so commands consume a
/// digested shape instead of pulling in `TickerMap` /
/// `MutualFundTickerEntry` / `CompanyTickerEntry` (those are
/// provider-internal).
///
/// `enrich` uses this to decide what metadata.srf line to emit
/// when Wikidata had no match for a symbol.
pub const EdgarLookup = union(enum) {
/// Symbol matched the EDGAR mutual-fund / managed-fund map.
/// Generic "Fund" label (the `tickers_funds.srf` file mixes
/// mutual funds and series-of-trust ETFs; we can't tell
/// which without digging into submissions metadata).
managed_fund,
/// Symbol matched the EDGAR company / UIT map. `title` is
/// the entry's `title` (e.g. "SPDR S&P 500 ETF TRUST"),
/// allocated by the service's allocator — caller frees with
/// `freeEdgarLookup` when done. The `is_etf` flag is set
/// when the title contains "ETF" or "TRUST" — operating
/// companies usually have Wikidata coverage and wouldn't
/// reach this fallback, so a UIT-style hit is almost
/// certainly an ETF.
company_or_uit: struct { title: ?[]const u8, is_etf: bool },
/// Symbol not in either EDGAR map.
none,
};
/// Free any owned strings inside an `EdgarLookup`. Currently
/// only `.company_or_uit.title` is owned; `.managed_fund` and
/// `.none` are no-ops.
pub fn freeEdgarLookup(allocator: std.mem.Allocator, lookup: EdgarLookup) void {
switch (lookup) {
.company_or_uit => |c| if (c.title) |t| allocator.free(t),
.managed_fund, .none => {},
}
}
/// Look up `sym` in the supplied EDGAR ticker maps. Pure data
/// transform; no I/O. Returns the borrowing-shape result.
///
/// Both maps may be null (caller failed to load one or both).
/// A null map produces a `none` result for that pass.
///
/// On `.company_or_uit`, the returned `title` is duped from the
/// underlying entry using `allocator` so the caller can use it
/// after the maps are freed. Free with `freeEdgarLookup`.
fn lookupInTickerMaps(
allocator: std.mem.Allocator,
sym: []const u8,
mf_map: ?*const Edgar.TickerMap(Edgar.MutualFundTickerEntry),
co_map: ?*const Edgar.TickerMap(Edgar.CompanyTickerEntry),
) EdgarLookup {
if (mf_map) |m| {
if (m.get(sym)) |_| return .managed_fund;
}
if (co_map) |m| {
if (m.get(sym)) |entry| {
const title_owned: ?[]const u8 = if (entry.title) |t|
allocator.dupe(u8, t) catch null
else
null;
const title_for_check = title_owned orelse "";
const is_etf =
std.ascii.indexOfIgnoreCase(title_for_check, "ETF") != null or
std.ascii.indexOfIgnoreCase(title_for_check, "TRUST") != null;
return .{ .company_or_uit = .{ .title = title_owned, .is_etf = is_etf } };
}
}
return .none;
}
/// Indicates whether the returned data came from cache or was freshly fetched.
pub const Source = enum {
cached,
fetched,
};
/// In-memory payload shape for a fetched type `T`.
///
/// Almost everything is a slice of records (`[]Candle`, `[]Dividend`,
/// …) — the same shape the cache stores. `EtfProfile` is the lone
/// exception: `getEtfProfile` assembles a single struct from the
/// `etf_metrics` cache rather than returning a slice, so its payload
/// is the struct itself. The cache layer never stores `EtfProfile`
/// directly, which is why this single-struct knowledge lives here in
/// the fetch layer rather than in `Store.DataFor`.
fn PayloadFor(comptime T: type) type {
return if (T == EtfProfile) EtfProfile else []T;
}
/// Generic result type for all fetch operations: data payload + provenance metadata.
///
/// `data` is owned by `allocator` — call `result.deinit()` to release
/// it (both the outer slice/struct and any nested owned fields). This
/// replaces the earlier "caller frees with whatever allocator they
/// happen to have" pattern, which was error-prone when the caller's
/// allocator (e.g. an arena) differed from the service's allocator.
pub fn FetchResult(comptime T: type) type {
return struct {
data: PayloadFor(T),
source: Source,
timestamp: i64,
/// Allocator that owns `data`. Populated by the service on
/// every return path; callers use it via `deinit` rather than
/// touching it directly.
allocator: std.mem.Allocator,
/// Free `data` and any nested owned fields.
///
/// Dispatches at comptime:
/// - If `T` has a `freeSlice` helper (Dividend, OptionsChain),
/// call it — handles element deinit plus the outer slice.
/// - Else if `data` is a slice (Candle, Split, EarningsEvent),
/// do a simple slice free.
/// - Else if `T` has a `deinit` method (EtfProfile), call it
/// on the struct itself.
pub fn deinit(self: @This()) void {
const DT = @TypeOf(self.data);
if (@hasDecl(T, "freeSlice")) {
T.freeSlice(self.allocator, self.data);
} else if (@typeInfo(DT) == .pointer) {
self.allocator.free(self.data);
} else if (@hasDecl(T, "deinit")) {
self.data.deinit(self.allocator);
}
}
};
}
// ── PostProcess callbacks ────────────────────────────────────
// `Store.read` parses with `parse_allocator = .{ .allocator = ... }`,
// so SRF dupes every owned string into the caller's allocator
// automatically. PostProcess callbacks remain only for non-trivial
// post-parse logic (e.g. recomputing derived fields). String duping
// is NOT a valid reason to add a postProcess.
/// Recompute surprise/surprise_percent from actual and estimate fields.
/// SRF only stores actual and estimate; surprise is derived.
fn earningsPostProcess(ev: *EarningsEvent, _: std.mem.Allocator) anyerror!void {
if (ev.actual != null and ev.estimate != null) {
ev.surprise = ev.actual.? - ev.estimate.?;
if (ev.estimate.? != 0) {
ev.surprise_percent = (ev.surprise.? / @abs(ev.estimate.?)) * 100.0;
}
}
}
pub const DataService = struct {
/// Thread-safe wrapper over the caller-provided base allocator.
///
/// Why this exists: `parallelServerSync` spawns worker threads that
/// each allocate through `DataService` — HTTP client init, TLS cert
/// bundle parsing, request/response buffers, and `Store.writeRaw`
/// path joins. The CLI's root allocator is an `ArenaAllocator`
/// (`src/main.zig`), which is NOT thread-safe. Unsynchronized
/// concurrent allocs from workers corrupt the arena's free list.
/// Symptoms seen in the wild:
///
/// thread N panic: reached unreachable code
/// std/mem/Allocator.zig:147 grow
/// std/hash_map.zig:1296 addCertsFromFile
/// std/crypto/Certificate/Bundle.zig:206 request
/// std/http/Client.zig:1789 request
/// src/net/http.zig:43 syncFromServer
///
/// and bare segfaults mid-heap on whatever pointer the arena
/// scrambled that run.
///
/// The wrapper serializes every allocation with a mutex. Cost is
/// one lock acquire/release per alloc — negligible next to the I/O
/// Thread-safe allocator used for all DataService-internal allocations.
///
/// In Zig 0.16, the Juicy-Main-provided `init.gpa` (DebugAllocator)
/// is thread-safe by default when not single-threaded, and
/// `ArenaAllocator` is thread-safe and lock-free. Callers should
/// pass whichever thread-safe allocator is appropriate — we no
/// longer wrap it ourselves.
///
/// DO NOT add an "unwrap" method or pass a non-thread-safe
/// allocator. The point is that internal callers don't need to
/// know whether they're running under threads — the allocator
/// itself guarantees safety.
allocator: std.mem.Allocator,
io: std.Io,
config: Config,
// Lazily initialized providers (null until first use)
td: ?TwelveData = null,
pg: ?Polygon = null,
fmp: ?Fmp = null,
cboe: ?Cboe = null,
yh: ?Yahoo = null,
tg: ?Tiingo = null,
wikidata: ?Wikidata = null,
edgar: ?Edgar = null,
/// Test-only guard: when true, any code path that would touch
/// the network panics with a clear message. Used by offline-mode
/// tests to verify that `FetchOptions.skip_network = true`
/// genuinely doesn't reach the network. Default false; never
/// set in production.
panic_on_network_attempt: bool = false,
pub fn init(io: std.Io, allocator: std.mem.Allocator, config: Config) DataService {
const self = DataService{
.allocator = allocator,
.io = io,
.config = config,
};
// Missing-key warnings are noise under `zig build test` where
// every test that spins up a DataService re-emits the whole
// block. Real users always see them at CLI/TUI startup.
if (!builtin.is_test) self.logMissingKeys();
return self;
}
/// Log warnings for missing API keys so users know which features are unavailable.
fn logMissingKeys(self: DataService) void {
// Primary candle provider
if (self.config.tiingo_key == null) {
log.warn("TIINGO_API_KEY not set — candle data will fall back to TwelveData/Yahoo", .{});
}
// Dividend/split data
if (self.config.polygon_key == null) {
log.warn("POLYGON_API_KEY not set — dividend and split data unavailable", .{});
}
// Earnings data
if (self.config.fmp_key == null) {
log.warn("FMP_API_KEY not set — earnings data unavailable", .{});
}
// ETF profiles + portfolio enrichment now go through public
// SEC EDGAR + Wikidata. Both require a contact email in
// outbound User-Agents (SEC's policy).
if (self.config.user_email == null) {
log.warn("ZFIN_USER_EMAIL not set — ETF profiles + enrichment unavailable", .{});
}
// Candle fallback
if (self.config.twelvedata_key == null and self.config.tiingo_key == null) {
log.warn("TWELVEDATA_API_KEY not set — no candle fallback if Yahoo fails", .{});
}
// CUSIP lookups
if (self.config.openfigi_key == null) {
log.info("OPENFIGI_API_KEY not set — CUSIP lookups will use anonymous rate limits", .{});
}
}
pub fn deinit(self: *DataService) void {
if (self.td) |*td| td.deinit();
if (self.pg) |*pg| pg.deinit();
if (self.fmp) |*fmp| fmp.deinit();
if (self.cboe) |*c| c.deinit();
if (self.yh) |*yh| yh.deinit();
if (self.tg) |*tg| tg.deinit();
if (self.wikidata) |*w| w.deinit();
if (self.edgar) |*e| e.deinit();
}
// ── Provider accessor ──────────────────────────────────────────
fn getProvider(self: *DataService, comptime T: type) DataError!*T {
const field_name = comptime providerField(T);
if (@field(self, field_name)) |*p| return p;
if (T == Cboe or T == Yahoo) {
// CBOE and Yahoo have no API key
@field(self, field_name) = T.init(self.io, self.allocator);
} else if (T == Wikidata or T == Edgar) {
// Open-data providers identified by contact email rather
// than an API key. The email goes in User-Agent + From
// headers per each provider's politeness contract.
const email = self.config.user_email orelse return DataError.NoApiKey;
@field(self, field_name) = T.init(self.io, self.allocator, email);
} else {
// All we're doing here is lower casing the type name, then
// appending _key to it, so Tiingo -> tiingo_key
const config_key = comptime blk: {
const full = @typeName(T);
var start: usize = 0;
for (full, 0..) |c, i| {
if (c == '.') start = i + 1;
}
const short = full[start..];
var buf: [short.len + 4]u8 = undefined;
_ = std.ascii.lowerString(buf[0..short.len], short);
@memcpy(buf[short.len..][0..4], "_key");
break :blk buf[0 .. short.len + 4];
};
const key = @field(self.config, config_key) orelse return DataError.NoApiKey;
@field(self, field_name) = T.init(self.io, self.allocator, key);
}
return &@field(self, field_name).?;
}
fn providerField(comptime T: type) []const u8 {
inline for (std.meta.fields(DataService)) |f| {
if (f.type == ?T) return f.name;
}
@compileError("unknown provider type");
}
// ── Cache helper ─────────────────────────────────────────────
fn store(self: *DataService) cache.Store {
return cache.Store.init(self.io, self.allocator, self.config.cache_dir);
}
/// Generic fetch-or-cache for simple data types (dividends, splits, options).
/// Checks cache first; on miss, fetches from the appropriate provider,
/// writes to cache, and returns. On permanent fetch failure, writes a negative
/// cache entry. Rate limit failures are retried once.
///
/// `opts.skip_network = true` → returns cached data even if stale,
/// returns FetchFailed on cache miss without touching the network.
/// `opts.force_refresh = true` → treats cache as stale and fetches.
fn fetchCached(
self: *DataService,
comptime T: type,
symbol: []const u8,
comptime postProcess: ?*const fn (*T, std.mem.Allocator) anyerror!void,
opts: FetchOptions,
) DataError!FetchResult(T) {
var s = self.store();
const data_type = comptime cache.Store.dataTypeFor(T);
// Force-refresh skips the fresh-cache early return; falls
// through to provider fetch. Skip-network does the opposite:
// returns cached even if stale, never touches the network.
if (!opts.force_refresh) {
if (s.read(self.allocator, T, symbol, postProcess, .fresh_only)) |cached| {
log.debug("{s}: {s} fresh in local cache", .{ symbol, @tagName(data_type) });
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
if (opts.skip_network) {
// Offline mode: return whatever's cached, even if stale.
// Cache miss is FetchFailed (not a network error).
if (s.read(self.allocator, T, symbol, postProcess, .any)) |cached| {
log.info("{s}: {s} stale-cached returned (skip_network)", .{ symbol, @tagName(data_type) });
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
// Try server sync before hitting providers (skipped on force_refresh).
if (!opts.force_refresh and self.syncFromServer(symbol, data_type)) {
if (s.read(self.allocator, T, symbol, postProcess, .fresh_only)) |cached| {
log.debug("{s}: {s} synced from server and fresh", .{ symbol, @tagName(data_type) });
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
log.debug("{s}: {s} synced from server but stale, falling through to provider", .{ symbol, @tagName(data_type) });
}
log.debug("{s}: fetching {s} from provider", .{ symbol, @tagName(data_type) });
self.assertNetworkAllowed("fetchCached fetchFromProvider");
const fetched = self.fetchFromProvider(T, symbol) catch |err| {
if (err == error.RateLimited) {
// Wait and retry once
self.rateLimitBackoff();
const retried = self.fetchFromProvider(T, symbol) catch {
return DataError.FetchFailed;
};
s.writeWithSource(T, symbol, retried, data_type.ttl(), sourceHintFor(T));
return .{ .data = retried, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
// Only NotFound (provider says "this symbol genuinely has
// no data of this type") gets a negative-cache entry.
// Transient failures (network, 5xx, auth misconfig, parse
// error) propagate as FetchFailed without poisoning the
// cache, so the next call retries naturally.
if (isPermanentProviderFailure(err)) {
s.writeNegative(symbol, data_type);
}
return DataError.FetchFailed;
};
s.writeWithSource(T, symbol, fetched, data_type.ttl(), sourceHintFor(T));
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
/// Map the model type fetched via `fetchCached` back to the
/// provider it came from, so the merge primitive's `info(cache)`
/// log lines can attribute new entries / field upgrades to a
/// named source. Returns null for types where the source name
/// isn't useful (the merge primitive only consults this for
/// Dividend and Split).
fn sourceHintFor(comptime T: type) ?[]const u8 {
return switch (T) {
Dividend, Split => "polygon",
else => null,
};
}
/// Dispatch a fetch to the correct provider based on model type.
fn fetchFromProvider(self: *DataService, comptime T: type, symbol: []const u8) !cache.Store.DataFor(T) {
return switch (T) {
Dividend => {
// Polygon is the primary source: it carries
// forward-looking declared dividends (e.g. ARCC's
// 2026-06-15 ex_date), which Tiingo's price-series
// response does not. Tiingo opportunistically
// supplements the cache via `populateAllFromTiingo`
// when candle fetches happen — that path uses the
// sorted-union write semantics in
// `cache.Store.writeMerged`, so Polygon's entries
// and Tiingo's entries coexist in `dividends.srf`
// without overwriting each other.
var pg = try self.getProvider(Polygon);
return pg.fetchDividends(self.allocator, symbol, null, null);
},
Split => {
// Same rationale as Dividend above. Polygon also
// carries forward-looking split announcements that
// Tiingo's price-series doesn't surface.
var pg = try self.getProvider(Polygon);
return pg.fetchSplits(self.allocator, symbol);
},
OptionsChain => {
var cboe = try self.getProvider(Cboe);
return cboe.fetchOptionsChain(self.allocator, symbol);
},
else => @compileError("unsupported type for fetchFromProvider"),
};
}
/// Fetch candles, dividends, and splits from Tiingo in a single
/// HTTP call and write all three caches. Returns the triple so
/// the caller can use the data without re-reading from disk.
///
/// This is the orchestrated "cold cache" path. `getCandles`
/// (cold-cache full fetch) calls this so a single Tiingo HTTP
/// request populates `candles_daily.srf`, `candles_meta.srf`,
/// `dividends.srf`, and `splits.srf` together. Tiingo's
/// per-row `divCash` and `splitFactor` make this almost free.
///
/// For dividends and splits the writes go through
/// `writeWithSource` with `"tiingo"` as the source hint. The
/// underlying `writeMerged` primitive merges Tiingo's view
/// into whatever's already on disk (typically Polygon-sourced
/// records), preserving forward-looking entries Polygon
/// uniquely carries. New entries trigger an `info(cache)` log
/// line attributing the discovery to Tiingo — useful when
/// Tiingo surfaces a corporate action Polygon missed (the
/// canonical case is SPYM's 2017-10-16 4:1 split).
///
/// `from` is fixed at 2000-01-01 to cover any 10Y trailing-return
/// window even when `--as-of` back-dates the reference to the
/// earliest imported portfolio data (currently 2014). The extra
/// few years of pre-2004 candles cost ~150 KB per symbol on disk
/// and a one-time bandwidth bump on cold-cache fetch, both
/// trivial. Also gives a comfortable buffer for older corporate
/// actions (e.g. SPYM's 2017-10-16 split, deep-history reverse
/// splits on legacy tickers).
fn populateAllFromTiingo(self: *DataService, symbol: []const u8) !@import("providers/tiingo.zig").CandleAndCorporateActions {
var tg = try self.getProvider(Tiingo);
const today = fmt.todayDate(self.io);
const from = Date.fromYmd(2000, 1, 1);
const triple = try tg.fetchCandlesAndCorporateActions(self.allocator, symbol, from, today);
var s = self.store();
// Candles + meta — `cacheCandles` writes both candles_daily.srf
// and candles_meta.srf in one shot (last_close, last_date,
// provider, fail_count=0).
if (triple.candles.len > 0) {
s.cacheCandles(symbol, triple.candles, .tiingo, 0);
}
// Dividends and splits use the merge write path so Tiingo's
// view supplements rather than replaces existing (typically
// Polygon-sourced) records. New entries are logged with
// "tiingo" attribution.
s.writeWithSource(Dividend, symbol, triple.dividends, cache.DataType.dividends.ttl(), "tiingo");
s.writeWithSource(Split, symbol, triple.splits, cache.DataType.splits.ttl(), "tiingo");
return triple;
}
/// Invalidate cached data for a symbol so the next get* call forces a fresh fetch.
pub fn invalidate(self: *DataService, symbol: []const u8, data_type: cache.DataType) void {
var s = self.store();
s.clearData(symbol, data_type);
// Also clear candle metadata when invalidating candle data
if (data_type == .candles_daily) {
s.clearData(symbol, .candles_meta);
}
}
// ── Public data methods ──────────────────────────────────────
/// Fetch candles from providers with error classification.
///
/// Error handling:
/// - ServerError/RateLimited/RequestFailed from Tiingo → TransientError (stop refresh, retry later)
/// - NotFound/ParseError/InvalidResponse from Tiingo → try Yahoo (symbol-level issue)
/// - Unauthorized → TransientError (config problem, stop refresh)
///
/// The `preferred` param controls incremental fetch consistency: use the same
/// provider that sourced the existing cache data.
fn fetchCandlesFromProviders(
self: *DataService,
symbol: []const u8,
from: Date,
to: Date,
preferred: cache.Store.CandleProvider,
) (DataError || error{NotFound})!struct { candles: []Candle, provider: cache.Store.CandleProvider } {
// If preferred is Yahoo (degraded symbol), try Yahoo first
if (preferred == .yahoo) {
if (self.getProvider(Yahoo)) |yh| {
if (yh.fetchCandles(self.allocator, symbol, from, to)) |candles| {
log.debug("{s}: candles from Yahoo (preferred)", .{symbol});
return .{ .candles = candles, .provider = .yahoo };
} else |err| {
log.warn("{s}: Yahoo (preferred) failed: {s}", .{ symbol, @errorName(err) });
}
} else |_| {}
}
// Primary: Tiingo
if (self.getProvider(Tiingo)) |tg| {
if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| {
log.debug("{s}: candles from Tiingo", .{symbol});
return .{ .candles = candles, .provider = .tiingo };
} else |err| {
log.warn("{s}: Tiingo failed: {s}", .{ symbol, @errorName(err) });
if (err == error.Unauthorized) {
log.err("{s}: Tiingo auth failed — check TIINGO_API_KEY", .{symbol});
return DataError.AuthError;
}
if (err == error.RateLimited) {
// Rate limited: back off and retry — this is expected, not a failure
log.info("{s}: Tiingo rate limited, backing off", .{symbol});
self.rateLimitBackoff();
if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| {
log.debug("{s}: candles from Tiingo (after rate limit backoff)", .{symbol});
return .{ .candles = candles, .provider = .tiingo };
} else |retry_err| {
log.warn("{s}: Tiingo retry after backoff failed: {s}", .{ symbol, @errorName(retry_err) });
if (retry_err == error.RateLimited) {
// Still rate limited after backoff — one more try
self.rateLimitBackoff();
if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| {
log.debug("{s}: candles from Tiingo (after second backoff)", .{symbol});
return .{ .candles = candles, .provider = .tiingo };
} else |_| {}
}
// Exhausted rate limit retries — treat as transient
return DataError.TransientError;
}
}
if (isTransientError(err)) {
// Server error or connection failure — stop, don't fall back
return DataError.TransientError;
}
// NotFound, ParseError, InvalidResponse — symbol-level issue, try Yahoo
log.info("{s}: Tiingo does not have this symbol, trying Yahoo", .{symbol});
}
} else |_| {
log.warn("{s}: Tiingo provider not available (no API key?)", .{symbol});
}
// Fallback: Yahoo (symbol not on Tiingo)
if (preferred != .yahoo) {
if (self.getProvider(Yahoo)) |yh| {
if (yh.fetchCandles(self.allocator, symbol, from, to)) |candles| {
log.info("{s}: candles from Yahoo (Tiingo fallback)", .{symbol});
return .{ .candles = candles, .provider = .yahoo };
} else |err| {
log.warn("{s}: Yahoo fallback also failed: {s}", .{ symbol, @errorName(err) });
}
} else |_| {
log.warn("{s}: Yahoo provider not available", .{symbol});
}
}
return DataError.FetchFailed;
}
/// Classify whether a provider error is transient (provider is down).
/// ServerError = HTTP 5xx, RequestFailed = connection/network failure.
/// Note: RateLimited and Unauthorized are handled separately.
fn isTransientError(err: anyerror) bool {
return err == error.ServerError or
err == error.RequestFailed;
}
/// Centralized "are we about to touch the network?" gate. Tests
/// set `panic_on_network_attempt` to assert that offline-mode
/// paths never reach this site. Production callers always pass.
/// Inline so the panic body is only generated when the field is
/// actually checked (no overhead on the false branch).
inline fn assertNetworkAllowed(self: *DataService, context: []const u8) void {
if (self.panic_on_network_attempt) {
std.debug.panic("network attempted in offline-mode test: {s}", .{context});
}
}
/// Fetch daily candles for a symbol (10+ years for trailing returns).
/// Checks cache first; fetches from Tiingo (primary) or Yahoo (fallback) if stale/missing.
/// Uses incremental updates: when the cache is stale, only fetches
/// candles newer than the last cached date rather than re-fetching
/// the entire history.
///
/// `opts.skip_network = true` → returns cached data even if stale,
/// returns FetchFailed on cache miss without touching the network.
/// `opts.force_refresh = true` → treats cache as stale and fetches.
pub fn getCandles(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Candle) {
var s = self.store();
const today = fmt.todayDate(self.io);
// Check candle metadata for freshness (tiny file, no candle deserialization)
const meta_result = s.readCandleMeta(symbol);
if (meta_result) |mr| {
const m = mr.meta;
// Offline mode: return cached data without touching the
// network. Cache miss / TwelveData-only cache is treated
// as unavailable.
if (opts.skip_network) {
if (m.provider == .twelvedata) {
log.debug("{s}: skip_network and only TwelveData cached — treating as unavailable", .{symbol});
return DataError.FetchFailed;
}
if (s.read(self.allocator, Candle, symbol, null, .any)) |r| {
if (!s.isCandleMetaFresh(symbol)) {
log.info("{s}: candles stale-cached returned (skip_network)", .{symbol});
}
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
// If cached data is from TwelveData (deprecated for candles due to
// unreliable adj_close), skip cache and fall through to full re-fetch.
if (m.provider == .twelvedata) {
log.debug("{s}: cached candles from TwelveData — forcing full re-fetch", .{symbol});
} else if (!opts.force_refresh and s.isCandleMetaFresh(symbol)) {
// Fresh — deserialize candles and return
log.debug("{s}: candles fresh in local cache", .{symbol});
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
} else {
// Stale — try server sync before incremental fetch.
// (Force-refresh skips server sync too: the user explicitly
// asked for fresh provider data.)
if (!opts.force_refresh and self.syncCandlesFromServer(symbol)) {
if (s.isCandleMetaFresh(symbol)) {
log.debug("{s}: candles synced from server and fresh", .{symbol});
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
log.debug("{s}: candles synced from server but stale, falling through to incremental fetch", .{symbol});
}
// Stale — try incremental update using last_date from meta
const fetch_from = m.last_date.addDays(1);
// If last cached date is today or later, just refresh the TTL (meta only)
if (!fetch_from.lessThan(today)) {
s.updateCandleMeta(symbol, m.last_close, m.last_date, m.provider, m.fail_count);
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
} else {
// Incremental fetch from day after last cached candle
self.assertNetworkAllowed("getCandles incremental fetchCandlesFromProviders");
const result = self.fetchCandlesFromProviders(symbol, fetch_from, today, m.provider) catch |err| {
if (err == DataError.TransientError) {
// Increment fail_count for this symbol
const new_fail_count = m.fail_count +| 1; // saturating add
log.warn("{s}: transient failure (fail_count now {d})", .{ symbol, new_fail_count });
s.updateCandleMeta(symbol, m.last_close, m.last_date, m.provider, new_fail_count);
// If degraded (fail_count >= 3), return stale data rather than failing
if (new_fail_count >= 3) {
log.warn("{s}: degraded after {d} consecutive failures, returning stale data", .{ symbol, new_fail_count });
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
}
return DataError.TransientError;
}
// Non-transient failure — return stale data if available
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
return DataError.FetchFailed;
};
const new_candles = result.candles;
if (new_candles.len == 0) {
// No new candles (weekend/holiday) — refresh TTL, reset fail_count
self.allocator.free(new_candles);
s.updateCandleMeta(symbol, m.last_close, m.last_date, result.provider, 0);
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
} else {
// Append new candles to existing file + update meta, reset fail_count
s.appendCandles(symbol, new_candles, result.provider, 0);
if (s.read(self.allocator, Candle, symbol, null, .any)) |r| {
self.allocator.free(new_candles);
return .{ .data = r.data, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
return .{ .data = new_candles, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
}
}
}
// Offline mode + no usable cache — give up.
if (opts.skip_network) {
log.debug("{s}: skip_network and no cached candles — unavailable", .{symbol});
return DataError.FetchFailed;
}
// No usable cache — try server sync first (skipped on force_refresh).
if (!opts.force_refresh and self.syncCandlesFromServer(symbol)) {
if (s.isCandleMetaFresh(symbol)) {
log.debug("{s}: candles synced from server and fresh (no prior cache)", .{symbol});
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
log.debug("{s}: candles synced from server but stale, falling through to full fetch", .{symbol});
}
// No usable cache — full fetch via the orchestrated Tiingo
// helper, which writes candles + dividends + splits caches in
// one shot from a single HTTP response. The fixed start date
// (see `populateAllFromTiingo`) is 2000-01-01, deep enough to
// cover a 10Y trailing-return window even when `--as-of`
// back-dates the reference into 2014-era imported portfolio
// history, plus a buffer for older corporate actions like
// SPYM's 2017-10-16 split.
log.debug("{s}: fetching full candle history from provider", .{symbol});
self.assertNetworkAllowed("getCandles full populateAllFromTiingo");
const triple = self.populateAllFromTiingo(symbol) catch |err| {
if (err == error.RateLimited or err == error.ServerError or err == error.RequestFailed) {
// Transient: increment fail_count on existing meta so
// we know to back off if this keeps happening.
if (meta_result) |mr| {
const new_fail_count = mr.meta.fail_count +| 1;
s.updateCandleMeta(symbol, mr.meta.last_close, mr.meta.last_date, mr.meta.provider, new_fail_count);
}
return DataError.TransientError;
}
// NotFound, ParseError, InvalidResponse, AuthError —
// symbol genuinely has no candle data on Tiingo (the only
// provider for historical candles since the 2026-05
// audit). Negative-cache so we don't keep retrying.
s.writeNegative(symbol, .candles_daily);
return DataError.FetchFailed;
};
// populateAllFromTiingo writes all three caches itself; we
// free the slices we don't return.
defer Dividend.freeSlice(self.allocator, triple.dividends);
defer self.allocator.free(triple.splits);
return .{ .data = triple.candles, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
/// Fetch dividend history for a symbol.
pub fn getDividends(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Dividend) {
return self.fetchCached(Dividend, symbol, null, opts);
}
/// Fetch split history for a symbol.
pub fn getSplits(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Split) {
return self.fetchCached(Split, symbol, null, opts);
}
/// Fetch options chain for a symbol (all expirations, no API key needed).
pub fn getOptions(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(OptionsChain) {
return self.fetchCached(OptionsChain, symbol, null, opts);
}
/// Fetch earnings history for a symbol.
/// Checks cache first; fetches from FMP if stale/missing.
/// Smart refresh: even if cache is fresh, re-fetches when a past earnings
/// date has no actual results yet (i.e. results just came out).
///
/// `opts.skip_network = true` → returns cached data even if stale,
/// returns FetchFailed on cache miss without touching the network.
/// `opts.force_refresh = true` → treats cache as stale and fetches.
pub fn getEarnings(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(EarningsEvent) {
// Mutual funds (5-letter tickers ending in X) don't have quarterly earnings.
if (isMutualFund(symbol)) {
return .{ .data = &.{}, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
var s = self.store();
const today = fmt.todayDate(self.io);
if (!opts.force_refresh) {
if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .fresh_only)) |cached| {
// Check if any past/today earnings event is still missing actual results.
// If so, the announcement likely just happened — force a refresh.
// (Suppressed when opts.skip_network — offline mode never refetches.)
const needs_refresh = if (opts.skip_network) false else for (cached.data) |ev| {
if (ev.actual == null and !today.lessThan(ev.date)) break true;
} else false;
if (!needs_refresh) {
log.debug("{s}: earnings fresh in local cache", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
// Stale: free cached events and re-fetch below
self.allocator.free(cached.data);
}
}
if (opts.skip_network) {
// Offline mode: fall back to any cached entry (even stale) before giving up.
if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .any)) |cached| {
log.info("{s}: earnings stale-cached returned (skip_network)", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
// Try server sync before hitting FMP (skipped on force_refresh).
if (!opts.force_refresh and self.syncFromServer(symbol, .earnings)) {
if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .fresh_only)) |cached| {
log.debug("{s}: earnings synced from server and fresh", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
log.debug("{s}: earnings synced from server but stale, falling through to provider", .{symbol});
}
log.debug("{s}: fetching earnings from provider", .{symbol});
self.assertNetworkAllowed("getEarnings fmp.fetchEarnings");
var fmp = try self.getProvider(Fmp);
const fetched = fmp.fetchEarnings(self.allocator, symbol) catch |err| blk: {
if (err == error.RateLimited) {
self.rateLimitBackoff();
break :blk fmp.fetchEarnings(self.allocator, symbol) catch {
return DataError.FetchFailed;
};
}
if (isPermanentProviderFailure(err)) {
s.writeNegative(symbol, .earnings);
}
return DataError.FetchFailed;
};
s.write(EarningsEvent, symbol, fetched, .{ .seconds = cache.Ttl.earnings });
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
/// Fetch ETF profile for a symbol. Assembles a unified
/// `EtfProfile` view from the EDGAR `etf_metrics` cache (profile
/// + sectors + holdings) plus the Wikidata `classification`
/// cache (inception_date, fund name fallback). Both underlying
/// caches are managed by `getEtfMetrics` / `getClassification`;
/// this function does not maintain its own cache.
///
/// Several legacy fields that AlphaVantage used to populate
/// (`expense_ratio`, `dividend_yield`, `portfolio_turnover`,
/// `leveraged`) remain on `EtfProfile` but stay null here —
/// EDGAR NPORT-P doesn't carry them. They'll fill in once a
/// prospectus parser lands.
///
/// `opts.skip_network = true` and `opts.force_refresh = true`
/// are forwarded to `getEtfMetrics`.
pub fn getEtfProfile(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(EtfProfile) {
// Primary source: EDGAR ETF metrics. If the symbol isn't a
// fund (or isn't in EDGAR), surface NotFound to the caller —
// matches the old AlphaVantage behavior of returning empty
// profiles for non-ETFs.
const metrics = try self.getEtfMetrics(symbol, opts);
defer metrics.deinit();
// Walk the EtfMetricRecord slice to extract profile + sectors
// + holdings. The slice shape is "one .profile, then N
// .sector, then M .holding" per `appendEtfMetricRecords`.
var name: ?[]const u8 = null;
errdefer if (name) |n| self.allocator.free(n);
var net_assets: ?f64 = null;
var sectors_buf: std.ArrayList(SectorWeight) = .empty;
errdefer {
for (sectors_buf.items) |s| self.allocator.free(s.name);
sectors_buf.deinit(self.allocator);
}
var holdings_buf: std.ArrayList(Holding) = .empty;
errdefer {
for (holdings_buf.items) |h| {
self.allocator.free(h.name);
if (h.symbol) |s| self.allocator.free(s);
if (h.cusip) |c| self.allocator.free(c);
}
holdings_buf.deinit(self.allocator);
}
for (metrics.data) |rec| switch (rec) {
.profile => |p| {
if (p.series_name) |sn| name = try self.allocator.dupe(u8, sn);
net_assets = p.net_assets;
},
.sector => |s| {
try sectors_buf.append(self.allocator, .{
.name = try self.allocator.dupe(u8, s.description),
.weight = s.pct_of_portfolio / 100.0,
});
},
.holding => |h| {
const sym_dup: ?[]const u8 = if (h.ticker) |t|
try self.allocator.dupe(u8, t)
else
null;
errdefer if (sym_dup) |s| self.allocator.free(s);
const cusip_dup: ?[]const u8 = if (h.cusip) |c|
try self.allocator.dupe(u8, c)
else
null;
errdefer if (cusip_dup) |c| self.allocator.free(c);
const name_dup = try self.allocator.dupe(u8, h.name);
errdefer self.allocator.free(name_dup);
try holdings_buf.append(self.allocator, .{
.symbol = sym_dup,
.name = name_dup,
.weight = h.pct_of_portfolio / 100.0,
.cusip = cusip_dup,
});
},
};
// Wikidata classification provides inception_date and a
// higher-quality name. Best-effort: if the fetch fails we
// still return the EDGAR-only profile.
var inception_date: ?Date = null;
if (self.getClassification(symbol, opts)) |class_result| {
defer class_result.deinit();
for (class_result.data) |c| {
if (c.inception_date) |idate_str| {
if (Date.parse(idate_str)) |d| inception_date = d else |_| {}
}
// Prefer Wikidata's name if EDGAR didn't provide one.
if (name == null) {
if (c.name) |n| name = try self.allocator.dupe(u8, n);
}
}
} else |_| {}
const sectors_count = sectors_buf.items.len;
const holdings_count = holdings_buf.items.len;
const profile: EtfProfile = .{
.symbol = try self.allocator.dupe(u8, symbol),
.name = name,
.net_assets = net_assets,
.holdings = if (holdings_count > 0)
try holdings_buf.toOwnedSlice(self.allocator)
else
null,
.total_holdings = if (holdings_count > 0) @intCast(holdings_count) else null,
.sectors = if (sectors_count > 0)
try sectors_buf.toOwnedSlice(self.allocator)
else
null,
.inception_date = inception_date,
};
// Free the empty ArrayLists we didn't consume via toOwnedSlice
// (they own no allocations but the ArrayList struct itself
// needs deinit when not handed off).
if (holdings_count == 0) holdings_buf.deinit(self.allocator);
if (sectors_count == 0) sectors_buf.deinit(self.allocator);
return .{
.data = profile,
.source = metrics.source,
.timestamp = metrics.timestamp,
.allocator = self.allocator,
};
}
// ── Wikidata + EDGAR providers ─────────────────────────────────
/// Fetch the Wikidata classification record for a single symbol
/// (name, sector, industry, country, inception date, CIK,
/// instance-of). Cache-first; on miss, runs a 1-symbol batched
/// SPARQL query.
///
/// `opts.skip_network = true` returns cached data even if stale,
/// `FetchFailed` on cache miss. `opts.force_refresh = true`
/// ignores the cache and re-fetches.
pub fn getClassification(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Wikidata.ClassificationRecord) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: classification fresh in local cache", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
if (opts.skip_network) {
if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .any)) |cached| {
log.info("{s}: classification stale-cached returned (skip_network)", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
// Try server sync before hitting Wikidata.
if (!opts.force_refresh and self.syncFromServer(symbol, .classification)) {
if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: classification synced from server", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
log.debug("{s}: fetching classification from Wikidata", .{symbol});
self.assertNetworkAllowed("getClassification wikidata.fetch");
var wd = try self.getProvider(Wikidata);
const symbols = [_][]const u8{symbol};
const fetched = wd.fetch(self.allocator, &symbols) catch |err| {
if (err == error.RateLimited) {
self.rateLimitBackoff();
if (wd.fetch(self.allocator, &symbols)) |retried| {
return self.finalizeClassification(symbol, retried, opts);
} else |_| {}
}
log.warn("{s}: wikidata fetch failed: {s}", .{ symbol, @errorName(err) });
return DataError.FetchFailed;
};
return self.finalizeClassification(symbol, fetched, opts);
}
/// Common post-Wikidata path: decide if the result is useful as
/// returned, otherwise consult EDGAR to fill in the gaps,
/// otherwise negative-cache. Either way the cache gets written
/// and a `FetchResult` is returned (or `DataError.NotFound`).
///
/// Takes ownership of `wikidata_records`. The slice is either
/// returned as the result data, freed and replaced by a
/// synthesized slice, or freed and the symbol negative-cached.
fn finalizeClassification(
self: *DataService,
symbol: []const u8,
wikidata_records: []Wikidata.ClassificationRecord,
opts: FetchOptions,
) DataError!FetchResult(Wikidata.ClassificationRecord) {
var s = self.store();
const ttl = cache.DataType.classification.ttl();
// Wikidata returned a useful row -> populate geo from
// geoFor(country) and cache as-is.
if (wikidata_records.len > 0 and wikidataLooksUseful(wikidata_records[0])) {
try self.populateGeo(&wikidata_records[0]);
s.write(Wikidata.ClassificationRecord, symbol, wikidata_records, ttl);
return .{ .data = wikidata_records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
// Sparse or empty: try EDGAR fallback. `synthesizeClassification`
// takes ownership of the wikidata slice (frees it, returns a
// new one-element slice with the merged record). Returns
// `error.NotFound` when even EDGAR has nothing.
const merged = self.synthesizeClassification(symbol, wikidata_records, opts) catch |err| {
if (err == error.NotFound) {
s.writeNegative(symbol, .classification);
return DataError.NotFound;
}
return DataError.FetchFailed;
};
s.write(Wikidata.ClassificationRecord, symbol, merged, ttl);
return .{ .data = merged, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
/// Populate `record.geo` from `geoFor(record.country)` when it
/// isn't already set. Best-effort: if duping the geo string
/// fails, leaves the field null and propagates the error so the
/// caller can decide whether to bail.
fn populateGeo(self: *DataService, record: *Wikidata.ClassificationRecord) !void {
if (record.geo != null) return;
const country = record.country orelse return;
const g = classification.geoFor(country);
if (std.mem.eql(u8, g, classification.geo.unknown)) return;
record.geo = try self.allocator.dupe(u8, g);
}
/// Whether a Wikidata classification record carries enough
/// downstream-usable data to skip the EDGAR fallback. A record
/// with at least one of `is_etf`, `sector`, `country`, or
/// `asset_class` set is "useful"; sparse records (e.g. SOXX
/// getting only a `name` from Wikidata) need the EDGAR
/// ticker-map fallback to fill in `is_etf=true,
/// asset_class=ETF, country=US`.
fn wikidataLooksUseful(c: Wikidata.ClassificationRecord) bool {
if (c.is_etf) return true;
if (c.asset_class != null) return true;
if (c.country != null) return true;
if (c.sector != null) return true;
return false;
}
/// Synthesize a `ClassificationRecord` for a symbol that
/// Wikidata couldn't classify usefully. Consults the EDGAR
/// ticker maps; if found, also fetches `getEtfMetrics` to
/// recover the NPORT-P series_name (more authoritative than
/// the company_tickers title). Title-keyword inference fills
/// in `sector` and `geo` when the name carries an unambiguous
/// keyword.
///
/// Takes ownership of `wikidata_records`: frees them at exit.
/// Wikidata's `name`/`industry`/`inception_date`/`cik` fields
/// are preserved into the synthesized record when present.
/// Returns `error.NotFound` when EDGAR has nothing either.
fn synthesizeClassification(
self: *DataService,
symbol: []const u8,
wikidata_records: []Wikidata.ClassificationRecord,
opts: FetchOptions,
) !cache.Store.DataFor(Wikidata.ClassificationRecord) {
defer Wikidata.ClassificationRecord.freeSlice(self.allocator, wikidata_records);
const lookup = self.lookupEdgarFallback(symbol, opts);
defer freeEdgarLookup(self.allocator, lookup);
if (lookup == .none) return error.NotFound;
// For ETF/fund hits, try to get the richer series_name from
// NPORT-P. Cache hit is cheap; cache miss triggers an EDGAR
// fetch but is bounded by EDGAR's rate limiter. If the call
// fails (e.g. money-market funds with no NPORT-P), we fall
// back to the ticker-map title.
var etf_metrics_result: ?FetchResult(Edgar.EtfMetricRecord) = null;
defer if (etf_metrics_result) |*r| r.deinit();
etf_metrics_result = self.getEtfMetrics(symbol, opts) catch null;
// Extract series_name and cik from the etf_metrics profile row.
var series_name: ?[]const u8 = null;
var etf_cik: ?[]const u8 = null;
if (etf_metrics_result) |r| {
for (r.data) |rec| switch (rec) {
.profile => |p| {
if (p.series_name) |sn| series_name = sn;
etf_cik = p.cik;
break;
},
else => {},
};
}
// Pull whatever Wikidata's sparse record carried so we
// don't lose data on the merge.
const wd: ?Wikidata.ClassificationRecord = if (wikidata_records.len > 0) wikidata_records[0] else null;
// Pick the best name source: NPORT-P series_name >
// EDGAR ticker-map title > Wikidata name > nothing.
//
// We're on the EDGAR-fallback path because Wikidata's
// record was sparse. For funds, Wikidata's `name` (when
// present) is frequently the underlying INDEX rather than
// the FUND itself -- e.g. SOXX's Wikidata `name` is "PHLX
// Semiconductor Sector" but the fund is "iShares
// Semiconductor ETF" per NPORT-P seriesName. Prefer the
// fund-authoritative source so downstream comments and
// labels show the fund name, not the index name.
const ticker_title: ?[]const u8 = switch (lookup) {
.company_or_uit => |c| c.title,
else => null,
};
const best_name: ?[]const u8 = blk: {
if (series_name) |n| break :blk n;
if (ticker_title) |n| break :blk n;
if (wd) |w| {
if (w.name) |n| break :blk n;
}
break :blk null;
};
// Name source for title-keyword inference: prefer the
// most-authoritative source for fund-style classification
// even when Wikidata supplied a (different) name. Wikidata's
// name for a fund is often less informative than NPORT-P's
// seriesName (e.g. SOXX's Wikidata name is "PHLX
// Semiconductor Sector" which is the index name, not the
// fund name).
const inference_name: ?[]const u8 = series_name orelse ticker_title orelse if (wd) |w| w.name else null;
const inferred_sector = classification.inferSectorFromTitle(inference_name);
const inferred_geo = classification.inferGeoFromTitle(inference_name);
// `is_etf` here means "this is fund-shaped, emit multi-row
// breakdown" -- true for ANY EDGAR-found symbol. The
// `tickers_funds.srf` map mixes mutual funds and
// series-of-trust ETFs alike. The `tickers_companies.srf`
// map carries operating companies, closed-end funds, and
// UITs; operating companies usually have Wikidata coverage
// and wouldn't reach this fallback, so anything that
// dropped here is also fund-shaped (e.g. PIMCO closed-end
// funds whose title says "FUND" but not "ETF" or "TRUST").
//
// The ETF/TRUST keyword in the title still drives the
// asset_class label below ("ETF" vs "Fund"), but the
// fund-shaped routing decision applies regardless.
const is_etf = true;
const asset_class: []const u8 = switch (lookup) {
.managed_fund => "Fund",
.company_or_uit => |c| if (c.is_etf) "ETF" else "Fund",
.none => unreachable,
};
// Country: prefer Wikidata's. Default to "US" for
// EDGAR-found symbols (they're SEC filers).
const country_str: []const u8 = if (wd) |w| (w.country orelse "US") else "US";
// Sector: prefer Wikidata's existing sector (rare in this
// sparse-fallback path), else fall back to inferred.
const sector_str: ?[]const u8 = blk: {
if (wd) |w| {
if (w.sector) |sec| break :blk sec;
}
break :blk inferred_sector;
};
// CIK: prefer Wikidata's, fall back to NPORT-P's.
const cik_str: ?[]const u8 = blk: {
if (wd) |w| {
if (w.cik) |c| break :blk c;
}
if (etf_cik) |c| break :blk c;
break :blk null;
};
// Geo: prefer the Wikidata-derived geo (computed from
// `geoFor(country)` against the country code), else use
// title-keyword inference. Default to "US" when neither
// is available -- EDGAR-found symbols are SEC filers.
const geo_str: []const u8 = blk: {
if (wd) |w| {
if (w.country) |c| {
const g = classification.geoFor(c);
if (!std.mem.eql(u8, g, classification.geo.unknown)) break :blk g;
}
}
if (inferred_geo) |g| break :blk g;
break :blk classification.geo.us;
};
const today = fmt.todayDate(self.io);
var as_of_buf: [10]u8 = undefined;
const as_of_str = try std.fmt.bufPrint(&as_of_buf, "{f}", .{today});
// Allocate each owned field up front with its own errdefer
// so a partial-build on OOM doesn't leak the earlier
// successful dupes. Once all dupes succeed we assemble the
// record (no fallible ops below this point).
const symbol_owned = try self.allocator.dupe(u8, symbol);
errdefer self.allocator.free(symbol_owned);
const name_owned: ?[]const u8 = if (best_name) |n| try self.allocator.dupe(u8, n) else null;
errdefer if (name_owned) |s| self.allocator.free(s);
const sector_owned: ?[]const u8 = if (sector_str) |s| try self.allocator.dupe(u8, s) else null;
errdefer if (sector_owned) |s| self.allocator.free(s);
const industry_owned: ?[]const u8 = if (wd) |w|
(if (w.industry) |i| try self.allocator.dupe(u8, i) else null)
else
null;
errdefer if (industry_owned) |s| self.allocator.free(s);
const country_owned = try self.allocator.dupe(u8, country_str);
errdefer self.allocator.free(country_owned);
const geo_owned = try self.allocator.dupe(u8, geo_str);
errdefer self.allocator.free(geo_owned);
const asset_class_owned = try self.allocator.dupe(u8, asset_class);
errdefer self.allocator.free(asset_class_owned);
const inception_owned: ?[]const u8 = if (wd) |w|
(if (w.inception_date) |i| try self.allocator.dupe(u8, i) else null)
else
null;
errdefer if (inception_owned) |s| self.allocator.free(s);
const cik_owned: ?[]const u8 = if (cik_str) |c| try self.allocator.dupe(u8, c) else null;
errdefer if (cik_owned) |s| self.allocator.free(s);
const as_of_owned = try self.allocator.dupe(u8, as_of_str);
errdefer self.allocator.free(as_of_owned);
const source_owned = try self.allocator.dupe(u8, "edgar_fallback");
errdefer self.allocator.free(source_owned);
const result = try self.allocator.alloc(Wikidata.ClassificationRecord, 1);
result[0] = .{
.symbol = symbol_owned,
.name = name_owned,
.sector = sector_owned,
.industry = industry_owned,
.country = country_owned,
.geo = geo_owned,
.asset_class = asset_class_owned,
.is_etf = is_etf,
.inception_date = inception_owned,
.cik = cik_owned,
.as_of = as_of_owned,
.source = source_owned,
};
return result;
}
/// Fetch XBRL-derived entity facts for a CIK (currently
/// shares-outstanding; extensible to revenue / net income / EPS
/// as new variants are added to `Edgar.EntityFactRecord`).
///
/// CIK is the cache key — the file lives at
/// `<cache_dir>/<cik>/entity_facts.srf`. A single dual-class
/// issuer (BRK.A / BRK.B) shares one entity_facts file because
/// both class symbols resolve to the same CIK.
pub fn getEntityFacts(self: *DataService, cik: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EntityFactRecord) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| {
log.debug("CIK {s}: entity_facts fresh in local cache", .{cik});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
if (opts.skip_network) {
if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .any)) |cached| {
log.info("CIK {s}: entity_facts stale-cached returned (skip_network)", .{cik});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
if (!opts.force_refresh and self.syncFromServer(cik, .entity_facts)) {
if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| {
log.debug("CIK {s}: entity_facts synced from server", .{cik});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
log.debug("CIK {s}: fetching entity facts from EDGAR", .{cik});
self.assertNetworkAllowed("getEntityFacts edgar.fetchSharesOutstanding");
var edgar = try self.getProvider(Edgar);
const so_opt = edgar.fetchSharesOutstanding(self.allocator, cik) catch |err| {
log.warn("CIK {s}: shares fetch failed: {s}", .{ cik, @errorName(err) });
return DataError.FetchFailed;
};
if (so_opt) |so_in| {
var so = so_in;
defer so.deinit(self.allocator);
const today = fmt.todayDate(self.io);
var as_of_buf: [10]u8 = undefined;
// [10]u8 always fits "YYYY-MM-DD" (10 chars exactly).
const as_of = std.fmt.bufPrint(&as_of_buf, "{f}", .{today}) catch
@panic("getEntityFacts: 10-byte buffer cannot hold YYYY-MM-DD — unreachable");
const form_dup: ?[]u8 = if (so.form.len > 0) try self.allocator.dupe(u8, so.form) else null;
const shares_record = Edgar.SharesRecord{
.symbol = try self.allocator.dupe(u8, ""),
.shares_outstanding = so.value,
.period_end = try self.allocator.dupe(u8, so.period_end),
.form = form_dup,
.cik = try self.allocator.dupe(u8, cik),
.as_of = try self.allocator.dupe(u8, as_of),
.source = "edgar_xbrl",
};
const records = try self.allocator.alloc(Edgar.EntityFactRecord, 1);
records[0] = .{ .shares_outstanding = shares_record };
s.write(Edgar.EntityFactRecord, cik, records, cache.DataType.entity_facts.ttl());
return .{ .data = records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
// No shares-outstanding data for this CIK (e.g. 20-F-only
// filers like BP, XBRL-light filers like META). Negative-
// cache so we don't keep retrying.
s.writeNegative(cik, .entity_facts);
return DataError.NotFound;
}
/// Fetch ETF metrics (NPORT-P profile + sectors + holdings) for
/// a fund symbol. Cache-first via `<symbol>/etf_metrics.srf`.
///
/// On cache miss, looks up the symbol in the EDGAR ticker maps
/// (fetched on demand via `getTickerMap*`), then runs the full
/// `Edgar.fetchEtfMetrics` cascade.
pub fn getEtfMetrics(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EtfMetricRecord) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: etf_metrics fresh in local cache", .{symbol});
return .{
.data = cached.data,
.source = .cached,
.timestamp = cached.timestamp,
.allocator = self.allocator,
};
}
}
if (opts.skip_network) {
if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .any)) |cached| {
log.info("{s}: etf_metrics stale-cached returned (skip_network)", .{symbol});
return .{
.data = cached.data,
.source = .cached,
.timestamp = cached.timestamp,
.allocator = self.allocator,
};
}
return DataError.FetchFailed;
}
if (!opts.force_refresh and self.syncFromServer(symbol, .etf_metrics)) {
if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: etf_metrics synced from server", .{symbol});
return .{
.data = cached.data,
.source = .cached,
.timestamp = cached.timestamp,
.allocator = self.allocator,
};
}
}
log.debug("{s}: fetching ETF metrics from EDGAR", .{symbol});
self.assertNetworkAllowed("getEtfMetrics edgar.fetchEtfMetrics");
// Load the ticker maps. These are big (3-5 MB each) but the
// load happens once per CLI invocation and the parsed
// TickerMap stays alive across all getEtfMetrics calls in
// the same process.
var mf_map = self.loadMutualFundTickerMap(opts) catch |err| {
log.warn("failed to load mutual-fund ticker map: {s}", .{@errorName(err)});
return DataError.FetchFailed;
};
defer mf_map.deinit();
var co_map = self.loadCompanyTickerMap(opts) catch |err| {
log.warn("failed to load company ticker map: {s}", .{@errorName(err)});
return DataError.FetchFailed;
};
defer co_map.deinit();
var edgar = try self.getProvider(Edgar);
const result = edgar.fetchEtfMetrics(
self.io,
self.allocator,
&mf_map,
&co_map,
symbol,
20,
) catch |err| {
log.warn("{s}: etf_metrics fetch failed: {s}", .{ symbol, @errorName(err) });
return DataError.FetchFailed;
};
switch (result) {
.full => |m_in| {
var m = m_in;
defer m.deinit(self.allocator);
var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty;
errdefer {
for (records.items) |*r| r.deinit(self.allocator);
records.deinit(self.allocator);
}
try Edgar.appendEtfMetricRecords(self.allocator, &records, m);
const owned = try records.toOwnedSlice(self.allocator);
s.write(Edgar.EtfMetricRecord, symbol, owned, cache.DataType.etf_metrics.ttl());
return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
},
.profile_only => |m_in| {
var m = m_in;
defer m.deinit(self.allocator);
var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty;
errdefer {
for (records.items) |*r| r.deinit(self.allocator);
records.deinit(self.allocator);
}
try Edgar.appendEtfMetricRecords(self.allocator, &records, m);
const owned = try records.toOwnedSlice(self.allocator);
s.write(Edgar.EtfMetricRecord, symbol, owned, cache.DataType.etf_metrics.ttl());
return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
},
.not_a_fund => {
// Not a fund — write a negative entry to suppress
// retries. The user can ask `getEntityFacts(cik)`
// separately for stock-level facts.
s.writeNegative(symbol, .etf_metrics);
return DataError.NotFound;
},
.not_in_edgar => {
// Symbol isn't in either ticker map. No EDGAR data
// available; negative-cache.
s.writeNegative(symbol, .etf_metrics);
return DataError.NotFound;
},
}
}
/// Load the EDGAR mutual-fund ticker map. Reads `[]MutualFundTickerEntry`
/// from cache when fresh; otherwise fetches via the provider
/// and writes the parsed slice to cache. The returned
/// `TickerMap` takes ownership of the entries; caller frees via
/// a single `mf_map.deinit()`.
///
/// Heavy: ~28k entries. Cheap on cache hit (fast SRF read);
/// expensive on miss (one HTTP round-trip + JSON parse).
/// Exposed publicly so commands like `enrich` can use the
/// ticker map as a fallback classifier when Wikidata returns
/// no rows for a symbol.
pub fn loadMutualFundTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.MutualFundTickerEntry) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(self.allocator, Edgar.MutualFundTickerEntry, "_edgar", null, .fresh_only)) |cached| {
if (cached.data.len > 0) {
return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, cached.data);
}
Edgar.MutualFundTickerEntry.freeSlice(self.allocator, cached.data);
}
}
log.debug("fetching EDGAR mutual-fund ticker map", .{});
self.assertNetworkAllowed("loadMutualFundTickerMap edgar.fetchMutualFundTickerMap");
var edgar = try self.getProvider(Edgar);
// Fetch + parse via the provider (correct UA + From + Accept
// + rate-limit token), cache the parsed slice, then build
// the lookup map (which takes ownership of the slice).
const entries = try edgar.fetchMutualFundTickerMap(self.allocator);
s.write(Edgar.MutualFundTickerEntry, "_edgar", entries, cache.DataType.tickers_funds.ttl());
return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, entries);
}
/// Load the EDGAR company ticker map (stocks + UITs). Same shape
/// as `loadMutualFundTickerMap` for the `CompanyTickerEntry`
/// type. See that function's doc-comment for cost / use-case
/// guidance.
pub fn loadCompanyTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.CompanyTickerEntry) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(self.allocator, Edgar.CompanyTickerEntry, "_edgar", null, .fresh_only)) |cached| {
if (cached.data.len > 0) {
return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, cached.data);
}
Edgar.CompanyTickerEntry.freeSlice(self.allocator, cached.data);
}
}
log.debug("fetching EDGAR company ticker map", .{});
self.assertNetworkAllowed("loadCompanyTickerMap edgar.fetchCompanyTickerMap");
var edgar = try self.getProvider(Edgar);
const entries = try edgar.fetchCompanyTickerMap(self.allocator);
s.write(Edgar.CompanyTickerEntry, "_edgar", entries, cache.DataType.tickers_companies.ttl());
return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, entries);
}
/// Look up a symbol in the EDGAR ticker maps. Used by the
/// `enrich` command as a fallback classifier when Wikidata
/// returns no rows for the symbol. Loads both maps (cache or
/// network), runs the lookup, frees the maps, returns the
/// digested `EdgarLookup` union.
///
/// Commands consume the union directly — they never see
/// `TickerMap` / `MutualFundTickerEntry` / `CompanyTickerEntry`
/// shapes. Provider details stay inside the service layer.
///
/// Caller owns the `title` string when the result is
/// `.company_or_uit{ .title = non-null }`. Free with the
/// allocator passed to this method (typically the same one
/// the service was initialized with).
pub fn lookupEdgarFallback(
self: *DataService,
sym: []const u8,
opts: FetchOptions,
) EdgarLookup {
var mf_opt: ?Edgar.TickerMap(Edgar.MutualFundTickerEntry) = self.loadMutualFundTickerMap(opts) catch null;
defer if (mf_opt) |*m| m.deinit();
var co_opt: ?Edgar.TickerMap(Edgar.CompanyTickerEntry) = self.loadCompanyTickerMap(opts) catch null;
defer if (co_opt) |*m| m.deinit();
return lookupInTickerMaps(
self.allocator,
sym,
if (mf_opt) |*m| m else null,
if (co_opt) |*m| m else null,
);
}
// ──────────────────────────────────────────────────────────────
/// Fetch a real-time quote for a symbol.
/// Yahoo Finance is primary (free, no API key, no 15-min delay).
/// Falls back to TwelveData if Yahoo fails.
///
/// Quotes are never cached, so `opts.force_refresh` is a no-op
/// (every call goes to the provider). `opts.skip_network = true`
/// returns FetchFailed unconditionally — there's no cached price
/// to fall back to.
pub fn getQuote(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!Quote {
if (opts.skip_network) {
log.debug("{s}: skip_network — quote unavailable (never cached)", .{symbol});
return DataError.FetchFailed;
}
self.assertNetworkAllowed("getQuote");
// Primary: Yahoo Finance (free, real-time)
if (self.getProvider(Yahoo)) |yh| {
if (yh.fetchQuote(self.allocator, symbol)) |quote| {
log.debug("{s}: quote from Yahoo", .{symbol});
return quote;
} else |_| {}
} else |_| {}
// Fallback: TwelveData (requires API key, may be 15-min delayed)
var td = try self.getProvider(TwelveData);
log.debug("{s}: quote fallback to TwelveData", .{symbol});
return td.fetchQuote(self.allocator, symbol) catch
return DataError.FetchFailed;
}
/// Compute trailing returns for a symbol (fetches candles + dividends).
/// Returns both as-of-date and month-end trailing returns.
/// As-of-date: end = latest close. Matches Morningstar "Trailing Returns" page.
/// Month-end: end = last business day of prior month. Matches Morningstar "Performance" page.
/// Compute trailing returns for a symbol (fetches candles + dividends + splits).
/// Returns both as-of-date and month-end trailing returns.
/// As-of-date: end = latest close. Matches Morningstar "Trailing Returns" page.
/// Month-end: end = last business day of prior month. Matches Morningstar "Performance" page.
///
/// `*_price` columns are split-adjusted, NOT dividend-adjusted (matches the
/// "price return" numbers public sources like Yahoo's chart-bar / FMP / Barchart
/// publish). `*_total` columns include dividend reinvestment (matches Morningstar
/// "Trailing Returns" / Yahoo "Performance Overview" / Koyfin "Total Return").
/// See `tmp/multi-ticker-audit.md` for the cross-validation evidence.
pub fn getTrailingReturns(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!struct {
asof_price: performance.TrailingReturns,
asof_total: ?performance.TrailingReturns,
me_price: performance.TrailingReturns,
me_total: ?performance.TrailingReturns,
candles: []Candle,
dividends: ?[]Dividend,
source: Source,
timestamp: i64,
} {
const candle_result = try self.getCandles(symbol, opts);
const c = candle_result.data;
if (c.len == 0) return DataError.FetchFailed;
const today = fmt.todayDate(self.io);
// Splits: needed to make raw `close` ratios meaningful across
// split boundaries (e.g. NVDA 10:1 on 2024-06-10). If the
// splits fetch fails, fall back to a no-splits empty slice —
// the price-return calculation will still be correct for
// tickers with no splits in the window (i.e. most of them).
var splits_buf: ?FetchResult(Split) = null;
defer if (splits_buf) |sb| sb.deinit();
const splits: []const Split = if (self.getSplits(symbol, opts)) |sr| blk: {
splits_buf = sr;
break :blk sr.data;
} else |_| &.{};
// As-of-date (end = last candle)
const asof_price = performance.trailingReturnsPriceOnly(c, splits);
// Month-end (end = last business day of prior month)
const me_price = performance.trailingReturnsPriceOnlyMonthEnd(c, splits, today);
// Total return: dividend-reinvested when dividends are
// available; otherwise fall back to adj_close-based total
// return (which captures dividends for providers like Tiingo
// that bake dividends into adj_close).
var divs: ?[]Dividend = null;
var asof_total: ?performance.TrailingReturns = null;
var me_total: ?performance.TrailingReturns = null;
// adj_close-based total return is the fallback. With Tiingo
// (the default provider) adj_close is already dividend-
// adjusted, so this gives a reasonable total-return estimate
// even when explicit dividend records are missing.
const asof_adj = performance.trailingReturns(c);
const me_adj = performance.trailingReturnsMonthEnd(c, today);
if (self.getDividends(symbol, opts)) |div_result| {
divs = div_result.data;
const asof_div = performance.trailingReturnsWithDividends(c, div_result.data);
const me_div = performance.trailingReturnsMonthEndWithDividends(c, div_result.data, today);
asof_total = performance.withDividendFallback(asof_div, asof_adj);
me_total = performance.withDividendFallback(me_div, me_adj);
} else |_| {
// No dividend data: still surface the adj_close-based
// total return rather than null, since Tiingo's
// adj_close already includes dividend adjustment.
asof_total = asof_adj;
me_total = me_adj;
}
return .{
.asof_price = asof_price,
.asof_total = asof_total,
.me_price = me_price,
.me_total = me_total,
.candles = c,
.dividends = divs,
.source = candle_result.source,
.timestamp = candle_result.timestamp,
};
}
/// Check if candle data is fresh in cache without full deserialization.
pub fn isCandleCacheFresh(self: *DataService, symbol: []const u8) bool {
var s = self.store();
return s.isCandleMetaFresh(symbol);
}
/// Read only the latest close price from cached candles (no full deserialization).
/// Returns null if no cached data exists.
pub fn getCachedLastClose(self: *DataService, symbol: []const u8) ?f64 {
var s = self.store();
return s.readLastClose(symbol);
}
/// Read the latest cached candle date for `symbol` without deserializing
/// the full candle history. Returns null if no cached metadata exists.
///
/// Callers should pair this with `isCandleCacheFresh` before trusting
/// the date: a stale cache entry can return a date from days or weeks
/// ago, which is fine for diagnostics but wrong for anything that
/// needs "the current market date".
pub fn getCachedLastDate(self: *DataService, symbol: []const u8) ?Date {
var s = self.store();
const mr = s.readCandleMeta(symbol) orelse return null;
return mr.meta.last_date;
}
/// Estimate wait time (in seconds) before a fetch for `data_type`
/// can proceed without blocking on its provider's rate limiter.
/// Returns 0 if a request can be made immediately, or if the
/// provider for this data type has no rate limiter. Returns null
/// if the relevant provider isn't instantiated yet (e.g., no API
/// key, or first call hasn't happened to lazy-init it).
///
/// The caller asks "how long until getX can proceed?" -- the
/// service maps data type to provider internally so the caller
/// doesn't have to know which provider serves which data.
pub fn estimateWaitSeconds(self: *DataService, data_type: cache.DataType) ?u64 {
const ns: u64 = switch (data_type) {
// Polygon-served: dividends and splits.
.dividends, .splits => if (self.pg) |*pg| pg.rate_limiter.estimateWaitNs() else return null,
// FMP-served: earnings.
.earnings => if (self.fmp) |*fmp| fmp.rate_limiter.estimateWaitNs() else return null,
// Cboe-served: options chains.
.options => if (self.cboe) |*cboe| cboe.rate_limiter.estimateWaitNs() else return null,
// EDGAR-served: ETF metrics, entity facts, ticker maps.
.etf_metrics, .entity_facts, .tickers_funds, .tickers_companies => if (self.edgar) |*e| e.rate_limiter.estimateWaitNs() else return null,
// Tiingo-served candles: 50/hour token bucket. When Tiingo
// isn't instantiated (no key), candles fall back to keyless
// Yahoo with no proactive limiter, so report 0 rather than
// null. `candles_meta` shares Tiingo's budget; `meta` isn't
// fetched; Wikidata (classification) has no published quota.
.candles_daily, .candles_meta => if (self.tg) |*tg| tg.rate_limiter.estimateWaitNs() else 0,
.classification, .meta => 0,
};
return if (ns == 0) 0 else @max(1, ns / std.time.ns_per_s);
}
/// Read candles from cache only (no network fetch). Used by TUI for display.
/// Returns null if no cached data exists or if the entry is a negative cache (fetch_failed).
///
/// `allocator` owns the returned `FetchResult.data`. Pass an
/// arena for "lives until reload" use cases (TUI per-portfolio
/// data); pass a per-call arena for CLI batch commands.
pub fn getCachedCandles(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(Candle) {
var s = self.store();
if (s.isNegative(symbol, .candles_daily)) return null;
const result = s.read(allocator, Candle, symbol, null, .any) orelse return null;
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
}
/// Read dividends from cache only (no network fetch). See
/// `getCachedCandles` for the allocator contract.
pub fn getCachedDividends(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(Dividend) {
var s = self.store();
const result = s.read(allocator, Dividend, symbol, null, .any) orelse return null;
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
}
/// Read earnings from cache only (no network fetch). See
/// `getCachedCandles` for the allocator contract.
pub fn getCachedEarnings(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(EarningsEvent) {
var s = self.store();
const result = s.read(allocator, EarningsEvent, symbol, earningsPostProcess, .any) orelse return null;
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
}
/// Read options from cache only (no network fetch). See
/// `getCachedCandles` for the allocator contract.
pub fn getCachedOptions(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(OptionsChain) {
var s = self.store();
const result = s.read(allocator, OptionsChain, symbol, null, .any) orelse return null;
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
}
// ── Portfolio price loading ──────────────────────────────────
/// Status emitted for each symbol during price loading.
pub const SymbolStatus = enum {
/// Price resolved from fresh cache.
cached,
/// About to attempt an API fetch (emitted before the network call).
fetching,
/// Price fetched successfully from API.
fetched,
/// API fetch failed but stale cached price was used.
failed_used_stale,
/// API fetch failed and no cached price exists.
failed,
};
/// Callback for progress reporting during price loading.
/// `context` is an opaque pointer to caller-owned state.
pub const ProgressCallback = struct {
context: *anyopaque,
on_progress: *const fn (ctx: *anyopaque, index: usize, total: usize, symbol: []const u8, status: SymbolStatus) void,
fn emit(self: ProgressCallback, index: usize, total: usize, symbol: []const u8, status: SymbolStatus) void {
self.on_progress(self.context, index, total, symbol, status);
}
};
// ── Consolidated Price Loading (Parallel Server + Sequential Provider) ──
/// Configuration for loadAllPrices.
pub const LoadAllConfig = struct {
force_refresh: bool = false,
/// Skip provider fetches and server sync. Returns cached
/// data (even if stale) and treats cache miss as failure.
/// Drives `--refresh-data=never`.
skip_network: bool = false,
color: bool = true,
/// Map this config to the per-call `FetchOptions` shape.
/// Convenience for paths that need to pass through to
/// `getCandles`/`getDividends`/etc.
pub fn fetchOptions(self: LoadAllConfig) FetchOptions {
return .{ .skip_network = self.skip_network, .force_refresh = self.force_refresh };
}
};
/// Result of loadAllPrices operation.
pub const LoadAllResult = struct {
prices: std.StringHashMap(f64),
/// Number of symbols resolved from fresh local cache.
cached_count: usize,
/// Number of symbols synced from server.
server_synced_count: usize,
/// Number of symbols fetched from providers (rate-limited APIs).
provider_fetched_count: usize,
/// Number of symbols that failed all sources but used stale cache.
stale_count: usize,
/// Number of symbols that failed completely (no data).
failed_count: usize,
/// Latest candle date seen.
latest_date: ?Date,
/// Free the prices hashmap. Call this if you don't transfer ownership.
pub fn deinit(self: *LoadAllResult) void {
self.prices.deinit();
}
};
/// Progress callback for aggregate (parallel) progress reporting.
/// Called periodically during parallel operations with current counts.
pub const AggregateProgressCallback = struct {
context: *anyopaque,
on_progress: *const fn (ctx: *anyopaque, completed: usize, total: usize, phase: Phase) void,
pub const Phase = enum {
/// Checking local cache
cache_check,
/// Syncing from ZFIN_SERVER
server_sync,
/// Fetching from rate-limited providers
provider_fetch,
/// Done
complete,
};
fn emit(self: AggregateProgressCallback, completed: usize, total: usize, phase: Phase) void {
self.on_progress(self.context, completed, total, phase);
}
};
/// Thread-safe counter for parallel progress tracking.
const AtomicCounter = struct {
value: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
fn increment(self: *AtomicCounter) usize {
return self.value.fetchAdd(1, .monotonic);
}
fn load(self: *const AtomicCounter) usize {
return self.value.load(.monotonic);
}
};
/// Per-symbol result from parallel server sync.
const ServerSyncResult = struct {
symbol: []const u8,
success: bool,
};
/// Load prices for portfolio and watchlist symbols with automatic parallelization.
///
/// When ZFIN_SERVER is configured:
/// 1. Check local cache (fast, parallel-safe)
/// 2. Parallel sync from server for cache misses
/// 3. Sequential provider fallback for server failures
///
/// When ZFIN_SERVER is not configured:
/// Falls back to sequential loading with per-symbol progress.
///
/// Progress is reported via `aggregate_progress` for parallel phases
/// and `symbol_progress` for sequential provider fallback.
pub fn loadAllPrices(
self: *DataService,
portfolio_syms: ?[]const []const u8,
watch_syms: []const []const u8,
config: LoadAllConfig,
aggregate_progress: ?AggregateProgressCallback,
symbol_progress: ?ProgressCallback,
) LoadAllResult {
var result = LoadAllResult{
.prices = std.StringHashMap(f64).init(self.allocator),
.cached_count = 0,
.server_synced_count = 0,
.provider_fetched_count = 0,
.stale_count = 0,
.failed_count = 0,
.latest_date = null,
};
// Combine all symbols
const portfolio_count = if (portfolio_syms) |ps| ps.len else 0;
const watch_count = watch_syms.len;
const total_count = portfolio_count + watch_count;
if (total_count == 0) return result;
// Build combined symbol list
var all_symbols = std.ArrayList([]const u8).initCapacity(self.allocator, total_count) catch return result;
defer all_symbols.deinit(self.allocator);
if (portfolio_syms) |ps| {
for (ps) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append portfolio sym({s}): {t}", .{ sym, err });
}
for (watch_syms) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append watch sym({s}): {t}", .{ sym, err });
// force_refresh does NOT wipe the candle cache. It flows
// through to getCandles (via config.fetchOptions()), which
// ignores the TTL and does an incremental top-up — see the
// `--refresh-data=force` contract. The Phase-1 fast path below
// is skipped on force_refresh so every symbol is re-validated
// against the provider. A full wipe + re-download from scratch
// is reserved for `cache clear`.
// Phase 1: Check local cache (fast path)
var needs_fetch: std.ArrayList([]const u8) = .empty;
defer needs_fetch.deinit(self.allocator);
if (aggregate_progress) |p| p.emit(0, total_count, .cache_check);
for (all_symbols.items) |sym| {
if (!config.force_refresh and self.isCandleCacheFresh(sym)) {
if (self.getCachedLastClose(sym)) |close| {
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err });
self.updateLatestDate(&result, sym);
}
result.cached_count += 1;
} else {
needs_fetch.append(self.allocator, sym) catch |err| log.warn("loadAllPrices needs_fetch append({s}): {t}", .{ sym, err });
}
}
if (aggregate_progress) |p| p.emit(result.cached_count, total_count, .cache_check);
if (needs_fetch.items.len == 0) {
if (aggregate_progress) |p| p.emit(total_count, total_count, .complete);
return result;
}
// Offline mode: skip server sync and provider fetch entirely.
// For symbols without a fresh cache, fall back to stale cache
// before giving up.
if (config.skip_network) {
for (needs_fetch.items) |sym| {
if (self.getCachedLastClose(sym)) |close| {
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err });
self.updateLatestDate(&result, sym);
result.stale_count += 1;
} else {
result.failed_count += 1;
}
}
if (aggregate_progress) |p| p.emit(total_count, total_count, .complete);
return result;
}
// Phase 2: Server sync (parallel if server configured)
var server_failures: std.ArrayList([]const u8) = .empty;
defer server_failures.deinit(self.allocator);
if (self.config.server_url != null) {
self.parallelServerSync(
needs_fetch.items,
&result,
&server_failures,
aggregate_progress,
total_count,
);
} else {
// No server — all need provider fetch
for (needs_fetch.items) |sym| {
server_failures.append(self.allocator, sym) catch |err| log.warn("loadAllPrices server_failures append({s}): {t}", .{ sym, err });
}
}
// Phase 3: Sequential provider fallback for server failures
if (server_failures.items.len > 0) {
if (aggregate_progress) |p| p.emit(
result.cached_count + result.server_synced_count,
total_count,
.provider_fetch,
);
self.sequentialProviderFetch(
server_failures.items,
&result,
symbol_progress,
total_count - server_failures.items.len, // offset for progress display
config.fetchOptions(),
);
}
if (aggregate_progress) |p| p.emit(total_count, total_count, .complete);
return result;
}
/// Fetch live intraday quotes for `symbols` in parallel, returning
/// a map of symbol → live last price. Symbols whose quote fetch
/// fails (or that the provider can't price) are simply absent; the
/// caller falls back to the last cached close.
///
/// This is a pure live-price fetch: quotes are never cached, so it
/// neither reads nor writes the candle cache. It exists for the
/// TUI refresh key (`r`), whose job is "give me current prices,"
/// distinct from candle-history maintenance (TTL/startup) and from
/// `--refresh-data=force` (incremental candle top-up).
///
/// Unlike `getQuote` (single-symbol, Yahoo→TwelveData fallback),
/// this is Yahoo-only: Yahoo is keyless with no shared rate
/// limiter, so each worker can safely own its HTTP client.
/// TwelveData's shared rate limiter makes it unsafe to fan out, and
/// its fallback role isn't worth the complexity for a bulk refresh.
///
/// Concurrency mirrors `parallelServerSync`: one task per symbol in
/// a single `std.Io.Group`, each with its own `Yahoo` client (a
/// shared `std.http.Client` is not safe across threads — see
/// `tryOneSync`). Relies on a thread-safe `allocator`/`io`, the
/// same assumption the server-sync fan-out already makes.
///
/// The returned map's keys borrow `symbols`: keep `symbols` alive
/// while using the map, and `deinit()` the map when done.
pub fn loadLiveQuotes(self: *DataService, symbols: []const []const u8) std.StringHashMap(f64) {
var prices = std.StringHashMap(f64).init(self.allocator);
if (symbols.len == 0) return prices;
self.assertNetworkAllowed("loadLiveQuotes");
const QuoteSlot = struct { symbol: []const u8, price: ?f64 = null };
const slots = self.allocator.alloc(QuoteSlot, symbols.len) catch return prices;
defer self.allocator.free(slots);
for (slots, 0..) |*slot, i| slot.* = .{ .symbol = symbols[i] };
const worker = struct {
fn run(io: std.Io, allocator: std.mem.Allocator, slot: *QuoteSlot) std.Io.Cancelable!void {
try io.checkCancel();
var yh = Yahoo.init(io, allocator);
defer yh.deinit();
// Quote borrows `symbol` and carries no owned memory,
// so the f64 close is all we keep — nothing to free.
slot.price = if (yh.fetchQuote(allocator, slot.symbol)) |q| q.close else |_| null;
}
};
var group: std.Io.Group = .init;
for (slots) |*slot| group.async(self.io, worker.run, .{ self.io, self.allocator, slot });
group.await(self.io) catch |err| log.debug("loadLiveQuotes group await: {t}", .{err});
for (slots) |slot| {
if (slot.price) |p| prices.put(slot.symbol, p) catch |err| log.warn("loadLiveQuotes put({s}): {t}", .{ slot.symbol, err });
}
return prices;
}
/// Parallel server sync via `std.Io.Group`.
///
/// Concurrency shape: one task per symbol, spawned into a
/// single `Group`. The `std.Io` implementation owns
/// scheduling and concurrency limits (e.g. `Io.Threaded`
/// sizes its pool from CPU count); we don't second-guess it
/// with our own worker cap or work-stealing queue.
///
/// Each task hits `io.checkCancel()` before its sync, so a
/// cancelation request propagating through `Group.await`
/// stops pending work at task granularity.
fn parallelServerSync(
self: *DataService,
symbols: []const []const u8,
result: *LoadAllResult,
failures: *std.ArrayList([]const u8),
aggregate_progress: ?AggregateProgressCallback,
total_count: usize,
) void {
if (aggregate_progress) |p| p.emit(result.cached_count, total_count, .server_sync);
// Shared state for tasks
var completed = AtomicCounter{};
const sync_results = self.allocator.alloc(ServerSyncResult, symbols.len) catch {
// Allocation failed — fall back to marking all as failures
for (symbols) |sym| failures.append(self.allocator, sym) catch |err| log.warn("parallelServerSync slots-alloc-fallback failures append({s}): {t}", .{ sym, err });
return;
};
defer self.allocator.free(sync_results);
// Initialize results
for (sync_results, 0..) |*sr, i| {
sr.* = .{ .symbol = symbols[i], .success = false };
}
const worker = struct {
fn run(io: std.Io, svc: *DataService, slot: *ServerSyncResult, done: *AtomicCounter) std.Io.Cancelable!void {
defer _ = done.increment();
try io.checkCancel();
slot.success = svc.syncCandlesFromServer(slot.symbol);
}
};
// Spawn one task per symbol. Group.async requires an
// eventual Group.await/cancel to release resources; the
// single await below covers all paths.
var group: std.Io.Group = .init;
for (sync_results) |*sr| {
group.async(self.io, worker.run, .{ self.io, self, sr, &completed });
}
// Progress reporting while the group runs
if (aggregate_progress) |p| {
while (completed.load() < symbols.len) {
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(50), .awake) catch |err| {
log.debug("parallelServerSync progress-poll sleep interrupted: {t}", .{err});
break;
};
p.emit(result.cached_count + completed.load(), total_count, .server_sync);
}
}
// Wait for all tasks. On cancelation the unstarted tasks
// exit at their checkCancel point; partial results (slots
// that completed) are still processed below — they came
// from successful cache writes.
group.await(self.io) catch |err| {
log.debug("parallelServerSync group await: {t}", .{err});
};
// Process results
for (sync_results) |sr| {
if (sr.success) {
// Server sync succeeded — read from cache
if (self.getCachedLastClose(sr.symbol)) |close| {
result.prices.put(sr.symbol, close) catch |err| log.warn("syncFromServer cache-after-sync put({s}): {t}", .{ sr.symbol, err });
self.updateLatestDate(result, sr.symbol);
result.server_synced_count += 1;
} else {
// Sync said success but can't read cache — treat as failure
failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer success-but-no-cache failures append({s}): {t}", .{ sr.symbol, err });
}
} else {
failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer fail-result failures append({s}): {t}", .{ sr.symbol, err });
}
}
}
/// Sequential provider fetch for symbols that failed server sync.
fn sequentialProviderFetch(
self: *DataService,
symbols: []const []const u8,
result: *LoadAllResult,
progress: ?ProgressCallback,
index_offset: usize,
opts: FetchOptions,
) void {
const total = index_offset + symbols.len;
for (symbols, 0..) |sym, i| {
const display_idx = index_offset + i;
// Notify: about to fetch
if (progress) |p| p.emit(display_idx, total, sym, .fetching);
// Try provider fetch
if (self.getCandles(sym, opts)) |candle_result| {
defer self.allocator.free(candle_result.data);
if (candle_result.data.len > 0) {
const last = candle_result.data[candle_result.data.len - 1];
result.prices.put(sym, last.close) catch |err| log.warn("loadAllPrices candle-close put({s}): {t}", .{ sym, err });
if (result.latest_date == null or last.date.days > result.latest_date.?.days) {
result.latest_date = last.date;
}
}
result.provider_fetched_count += 1;
if (progress) |p| p.emit(display_idx, total, sym, .fetched);
continue;
} else |_| {}
// Provider failed — try stale cache
result.failed_count += 1;
if (self.getCachedLastClose(sym)) |close| {
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices stale-fallback put({s}): {t}", .{ sym, err });
result.stale_count += 1;
if (progress) |p| p.emit(display_idx, total, sym, .failed_used_stale);
} else {
if (progress) |p| p.emit(display_idx, total, sym, .failed);
}
}
}
/// Update latest_date in result from cached candle metadata.
fn updateLatestDate(self: *DataService, result: *LoadAllResult, symbol: []const u8) void {
var s = self.store();
if (s.readCandleMeta(symbol)) |cm| {
const d = cm.meta.last_date;
if (result.latest_date == null or d.days > result.latest_date.?.days) {
result.latest_date = d;
}
}
}
// ── CUSIP Resolution ──────────────────────────────────────────
/// Look up multiple CUSIPs in a single batch request via OpenFIGI.
/// Results array is parallel to the input cusips array (same length, same order).
/// Caller owns the returned slice and all strings within each CusipResult.
pub fn lookupCusips(self: *DataService, cusips: []const []const u8) DataError![]CusipResult {
return OpenFigi.lookupCusips(self.io, self.allocator, cusips, self.config.openfigi_key) catch
return DataError.FetchFailed;
}
/// A single CUSIP-to-ticker mapping record in the cache file.
const CusipEntry = struct {
cusip: []const u8 = "",
ticker: []const u8 = "",
};
/// CUSIP->ticker lookup table loaded from `cusip_tickers.srf`.
///
/// Zero-copy: keys and values are slices into `backing` (the raw
/// file bytes parsed with `parse_allocator = .none`). Nothing is
/// duped per entry — the whole-file buffer IS the storage, and it
/// stays alive for the table's lifetime, released together with
/// the map table in `deinit`.
///
/// This is the L1 tier of CUSIP resolution: callers consult it
/// before reaching for the server or OpenFIGI.
pub const CusipTickerMap = struct {
map: std.StringHashMap([]const u8),
/// Raw bytes of `cusip_tickers.srf`; every map key and value
/// points into this buffer. `&.{}` when the file was missing
/// or unreadable (freeing a zero-length slice is a no-op).
backing: []const u8,
pub fn get(self: CusipTickerMap, cusip: []const u8) ?[]const u8 {
return self.map.get(cusip);
}
pub fn contains(self: CusipTickerMap, cusip: []const u8) bool {
return self.map.contains(cusip);
}
pub fn count(self: CusipTickerMap) u32 {
return self.map.count();
}
/// Release the map table and the backing buffer. Both were
/// allocated with the map's allocator at load time, so we
/// reuse it here — the two lifetimes are bound together by
/// construction, which is the whole point of the wrapper.
pub fn deinit(self: *CusipTickerMap) void {
const allocator = self.map.allocator;
self.map.deinit();
allocator.free(self.backing);
}
};
/// Load the CUSIP->ticker cache file into a `CusipTickerMap`. The
/// returned table owns the file bytes; release it with
/// `CusipTickerMap.deinit`.
///
/// Missing file → empty table (the common first-run case). First
/// occurrence wins on duplicate CUSIPs, which tolerates the
/// historical double-append bug in cache files written before
/// `cacheCusipTicker` learned to dedup.
///
/// The on-disk format is CUSIP-keyed (`cusip::X,ticker::Y`); the
/// returned map is keyed the same way for O(1) forward lookup.
pub fn loadCusipTickerMap(self: *DataService, allocator: std.mem.Allocator) CusipTickerMap {
const map = std.StringHashMap([]const u8).init(allocator);
const path = std.fs.path.join(allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch
return .{ .map = map, .backing = &.{} };
defer allocator.free(path);
const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, allocator, .limited(4 * 1024 * 1024)) catch
return .{ .map = map, .backing = &.{} };
// From here `data` is the table's backing store: keys and
// values are slices into it (parse_allocator = .none, so the
// parser borrows rather than copies). Freed by
// `CusipTickerMap.deinit`, never here — that's the lifetime
// contract that lets us skip per-entry dupes entirely.
var result: CusipTickerMap = .{ .map = map, .backing = data };
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return result;
defer it.deinit();
while (it.next() catch return result) |fields| {
const entry = fields.to(CusipEntry, .{}) catch continue;
if (entry.cusip.len == 0 or entry.ticker.len == 0) continue;
// First occurrence wins; getOrPut stores the borrowed
// slices directly — they live in `backing`, no dupe.
const gop = result.map.getOrPut(entry.cusip) catch continue;
if (!gop.found_existing) gop.value_ptr.* = entry.ticker;
}
return result;
}
/// Append CUSIP->ticker mappings to `cusip_tickers.srf`, skipping
/// any whose CUSIP is already on disk and any duplicates within
/// `entries`. One read + one atomic write regardless of batch size.
///
/// Read-append-atomic-write (rather than open-for-append) so a
/// concurrent reader never sees a valid header plus a partial
/// trailing record — see `cache/store.zig appendRaw` for the same
/// pattern and rationale. `#!srfv1` directives are emitted only
/// when the file is being created.
fn appendCusipEntries(self: *DataService, entries: []const CusipEntry) void {
if (entries.len == 0) return;
// One load gives us both the dedup set and the existing bytes
// to concat (`backing`). Missing/empty file → empty map + empty
// backing → directives emitted below.
var existing_map = self.loadCusipTickerMap(self.allocator);
defer existing_map.deinit();
const existing = existing_map.backing;
// Keep only entries new to the file and unique within the batch.
var seen = std.StringHashMap(void).init(self.allocator);
defer seen.deinit();
var to_write: std.ArrayList(CusipEntry) = .empty;
defer to_write.deinit(self.allocator);
for (entries) |e| {
if (e.cusip.len == 0 or e.ticker.len == 0) continue;
if (existing_map.contains(e.cusip)) continue;
const gop = seen.getOrPut(e.cusip) catch continue;
if (gop.found_existing) continue;
to_write.append(self.allocator, e) catch continue;
}
if (to_write.items.len == 0) return;
const path = std.fs.path.join(self.allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return;
defer self.allocator.free(path);
if (std.fs.path.dirnamePosix(path)) |dir| {
std.Io.Dir.cwd().createDirPath(self.io, dir) catch |err| log.warn("cusip-cache createDirPath({s}): {t}", .{ dir, err });
}
const emit_directives = existing.len == 0;
var aw: std.Io.Writer.Allocating = .init(self.allocator);
defer aw.deinit();
aw.writer.print("{f}", .{srf.fmt(CusipEntry, to_write.items, .{ .emit_directives = emit_directives })}) catch return;
const encoded = aw.writer.buffered();
if (encoded.len == 0) return;
// Concat existing + new, then atomic-write.
const combined = self.allocator.alloc(u8, existing.len + encoded.len) catch return;
defer self.allocator.free(combined);
@memcpy(combined[0..existing.len], existing);
@memcpy(combined[existing.len..], encoded);
atomic.writeFileAtomic(self.io, self.allocator, path, combined) catch |err| log.warn("cusip-cache writeFileAtomic({s}): {t}", .{ path, err });
}
/// Append a single CUSIP->ticker mapping to the cache file
/// (dedup-aware). Thin wrapper over `appendCusipEntries`; the
/// `lookup` command's single-CUSIP path.
pub fn cacheCusipTicker(self: *DataService, cusip: []const u8, ticker: []const u8) void {
self.appendCusipEntries(&.{.{ .cusip = cusip, .ticker = ticker }});
}
/// Resolve a set of CUSIPs to tickers via the three-tier cascade,
/// persisting newly-learned mappings to `cusip_tickers.srf` (union
/// policy: the local file accumulates everything it ever learns and
/// converges toward the shared server set).
///
/// Tiers, cheapest first:
/// L1 local `cusip_tickers.srf` (always; no network)
/// L2 server `GET /cusips` whole-file sync (if ZFIN_SERVER set)
/// L3 OpenFIGI batch lookup (whatever still misses)
///
/// `skip_network = true` restricts resolution to L1 (the local
/// cache) — for offline mode (`--refresh-data=never`). L2/L3 and
/// the persist-back are skipped entirely; cached CUSIPs still
/// resolve, uncached ones stay unresolved.
///
/// Best-effort: network failures degrade to "fewer entries
/// resolved" rather than erroring. The returned `CusipTickerMap` is
/// a zero-copy view over the (possibly just-rewritten) local file
/// and covers every CUSIP any tier could resolve. Callers resolve
/// forward-per-holding: look up each holding's CUSIP against it,
/// which sidesteps the "do I have every CUSIP for this ticker?"
/// completeness problem entirely.
///
/// Empty/duplicate CUSIPs in `cusips` are ignored. The caller owns
/// the returned map (`deinit`); pass a scratch allocator to scope
/// it to a single command invocation.
pub fn resolveCusips(self: *DataService, allocator: std.mem.Allocator, cusips: []const []const u8, skip_network: bool) CusipTickerMap {
var result = self.loadCusipTickerMap(allocator);
// Offline mode serves only L1. Also the warm-cache fast path:
// when nothing is missing there's no scratch, no network, no
// rewrite.
if (skip_network or !anyMissing(result, cusips)) return result;
// Scratch arena for minted entries; decouples their lifetime
// from the server body / OpenFIGI result buffers freed below.
var scratch = std.heap.ArenaAllocator.init(self.allocator);
defer scratch.deinit();
const sa = scratch.allocator();
var minted = std.StringHashMap([]const u8).init(sa); // cusip -> ticker
// L2: server whole-file sync. Degrades to no-op until the
// `GET /cusips` route exists (a 404 surfaces as NotFound from
// client.get); when it lands it's purely additive — no change
// here. The server is expected to serve the file via its
// existing `handleStaticSrfFile` machinery (same shape as
// `/_edgar/tickers_funds`).
if (self.config.server_url) |server_url| {
if (self.fetchServerCusips(server_url)) |body| {
defer self.allocator.free(body);
mergeCusipBody(sa, &minted, result, body);
}
}
// L3: OpenFIGI for whatever still misses.
self.mintMissingViaOpenFigi(sa, &minted, result, cusips);
if (minted.count() == 0) return result; // nothing new learned
// Persist the union, then reload so the returned map is a clean
// single-buffer zero-copy view over the updated file.
var ents: std.ArrayList(CusipEntry) = .empty;
// Reserve up front so the collection loop is infallible. On OOM
// (vanishingly unlikely for a small list), skip persistence and
// return the L1 view — some CUSIPs stay unresolved this run
// rather than erroring.
ents.ensureTotalCapacity(sa, minted.count()) catch return result;
var mit = minted.iterator();
while (mit.next()) |kv| ents.appendAssumeCapacity(.{ .cusip = kv.key_ptr.*, .ticker = kv.value_ptr.* });
self.appendCusipEntries(ents.items);
result.deinit();
return self.loadCusipTickerMap(allocator);
}
/// True if any non-empty CUSIP in `cusips` is absent from `map`.
fn anyMissing(map: CusipTickerMap, cusips: []const []const u8) bool {
for (cusips) |c| {
if (c.len == 0) continue;
if (!map.contains(c)) return true;
}
return false;
}
/// Merge a CUSIP->ticker SRF body (as served by `GET /cusips`) into
/// `out`, skipping any CUSIP already present in `have` or `out`.
/// Strings are duped into `arena`. Pure with respect to I/O, so it's
/// unit-tested directly with fixture bytes (the live L2 path can't
/// be exercised until the server route exists).
fn mergeCusipBody(arena: std.mem.Allocator, out: *std.StringHashMap([]const u8), have: CusipTickerMap, body: []const u8) void {
var reader = std.Io.Reader.fixed(body);
var it = srf.iterator(&reader, arena, .{ .parse_allocator = .none }) catch return;
defer it.deinit();
while (it.next() catch return) |fields| {
const e = fields.to(CusipEntry, .{}) catch continue;
if (e.cusip.len == 0 or e.ticker.len == 0) continue;
if (have.contains(e.cusip) or out.contains(e.cusip)) continue;
const kc = arena.dupe(u8, e.cusip) catch continue;
const vc = arena.dupe(u8, e.ticker) catch continue;
out.put(kc, vc) catch continue;
}
}
/// L2 seam: fetch the whole CUSIP->ticker map from the server via
/// `GET {server}/cusips`. Returns the raw SRF body (caller frees
/// with `self.allocator`) or null on any failure. Best-effort: no
/// retry and no torn-body archival (this is a shared reference
/// file, not per-symbol cache) — a bad/absent response just
/// degrades to the OpenFIGI tier.
fn fetchServerCusips(self: *DataService, server_url: []const u8) ?[]u8 {
const url = std.fmt.allocPrint(self.allocator, "{s}/cusips", .{server_url}) catch return null;
defer self.allocator.free(url);
var client = http.Client.init(self.io, self.allocator);
defer client.deinit();
var response = client.get(url) catch |err| {
log.debug("cusips server sync failed: {s}", .{@errorName(err)});
return null;
};
defer response.deinit();
if (!cache.Store.looksCompleteSrf(response.body)) {
log.debug("cusips server response not complete SRF ({d} bytes) — ignoring", .{response.body.len});
return null;
}
return self.allocator.dupe(u8, response.body) catch null;
}
/// L3: resolve still-missing CUSIPs through OpenFIGI (batched 100
/// per request, the API's job limit), recording hits into `out`
/// (duped into `arena`). De-dups the lookup set against `have`,
/// `out`, and itself. Best-effort: a failed batch logs and is
/// skipped; remaining batches still run.
fn mintMissingViaOpenFigi(self: *DataService, arena: std.mem.Allocator, out: *std.StringHashMap([]const u8), have: CusipTickerMap, cusips: []const []const u8) void {
var seen = std.StringHashMap(void).init(arena);
var to_lookup: std.ArrayList([]const u8) = .empty;
for (cusips) |c| {
if (c.len == 0) continue;
if (have.contains(c) or out.contains(c)) continue;
const gop = seen.getOrPut(c) catch continue;
if (gop.found_existing) continue;
to_lookup.append(arena, c) catch continue;
}
if (to_lookup.items.len == 0) return;
const batch_size = 100; // OpenFIGI accepts up to 100 jobs/request.
var start: usize = 0;
while (start < to_lookup.items.len) : (start += batch_size) {
const end = @min(start + batch_size, to_lookup.items.len);
const batch = to_lookup.items[start..end];
const figi = self.lookupCusips(batch) catch |err| {
log.warn("resolveCusips: OpenFIGI lookup of {d} CUSIP(s) failed: {s}", .{ batch.len, @errorName(err) });
continue;
};
defer {
for (figi) |r| {
if (r.ticker) |t| self.allocator.free(t);
if (r.name) |n| self.allocator.free(n);
if (r.security_type) |s| self.allocator.free(s);
}
self.allocator.free(figi);
}
// Results are parallel to `batch` (same length + order).
for (figi, 0..) |r, i| {
if (!r.found) continue;
const ticker = r.ticker orelse continue;
const kc = arena.dupe(u8, batch[i]) catch continue;
const vc = arena.dupe(u8, ticker) catch continue;
out.put(kc, vc) catch continue;
}
}
}
// ── Utility ──────────────────────────────────────────────────
/// Sleep before retrying after a rate limit error.
/// Uses the provider's rate limiter if available, otherwise a fixed 10s backoff.
fn rateLimitBackoff(self: *DataService) void {
if (self.td) |*td| {
td.rate_limiter.backoff();
} else {
std.Io.sleep(self.io, std.Io.Duration.fromSeconds(10), .awake) catch |err| log.debug("rate-limit backoff sleep interrupted: {t}", .{err});
}
}
// ── Server sync ──────────────────────────────────────────────
/// Try to sync a cache file from the configured zfin-server.
/// Returns true if the file was successfully synced, false on any error.
/// Silently returns false if no server is configured.
///
/// Applies a single retry with a short delay when the first attempt
/// fails at the HTTP layer OR produces a torn body (integrity
/// mismatch / `looksCompleteSrf` rejection). Motivation: refreshes
/// fan out 20+ symbols across 8 parallel threads, and the tear
/// pattern we've observed so far looks transient per-connection.
/// One retry papers over single-packet hiccups without dramatically
/// extending refresh wall time. If the retry also fails the
/// archive grows by one more `.bin`/`.meta` pair — two captures
/// from the same refresh are the most valuable diagnostic signal
/// we can produce (same body shape? same byte offset? same time
/// delta? all answers we can't get from a single failure).
fn syncFromServer(self: *DataService, symbol: []const u8, data_type: cache.DataType) bool {
const server_url = self.config.server_url orelse return false;
const endpoint = switch (data_type) {
.candles_daily => "/candles",
.candles_meta => "/candles_meta",
.dividends => "/dividends",
.earnings => "/earnings",
.options => "/options",
.splits => "/splits",
.meta => return false,
.classification => "/classification",
.etf_metrics => "/etf_metrics",
.entity_facts => "/entity_facts",
// Provider-internal cache files (ticker-map indexes)
// are not served — clients fetch them directly from
// the SEC. The DataService caches the JSON via
// `Store` after fetching; the server has no role.
.tickers_funds, .tickers_companies => return false,
};
const full_url = std.fmt.allocPrint(self.allocator, "{s}/{s}{s}", .{ server_url, symbol, endpoint }) catch return false;
defer self.allocator.free(full_url);
const max_attempts: u8 = 2;
const retry_delay_ms: u64 = 250;
var attempt: u8 = 0;
while (attempt < max_attempts) : (attempt += 1) {
if (attempt > 0) {
log.debug(
"{s}: retrying {s} server sync (attempt {d}/{d}) after {d}ms delay",
.{ symbol, @tagName(data_type), attempt + 1, max_attempts, retry_delay_ms },
);
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(retry_delay_ms), .awake) catch |err| log.debug("syncFromServer retry-delay sleep interrupted: {t}", .{err});
}
switch (self.tryOneSync(symbol, data_type, full_url)) {
.ok => return true,
// Torn or network error — retry if attempts remain.
.torn, .net_err => {},
}
}
return false;
}
const SyncAttempt = enum { ok, torn, net_err };
/// One attempt at syncing a file from the server. Archives a torn
/// body when detected but does NOT retry — the caller decides that.
fn tryOneSync(self: *DataService, symbol: []const u8, data_type: cache.DataType, full_url: []const u8) SyncAttempt {
// Per-attempt start/finish trace. The "started" line emits
// before any blocking call; the "finished" line emits on every
// exit path. If a sync wedges in `client.get`, you'll see the
// started line with no matching finished line — the missing
// finished entries identify which symbols are stuck. Pair this
// with the per-stage `http: stage=...` lines from `net/http.zig`
// to pinpoint which transport stage stalled.
//
// wall-clock required: per-attempt elapsed for diagnosing
// partial-success/stall patterns under parallel fan-out.
// `.awake` (monotonic) avoids spurious negatives on clock skew.
const t_start = std.Io.Timestamp.now(self.io, .awake).nanoseconds;
log.debug("{s}: tryOneSync started ({s})", .{ symbol, @tagName(data_type) });
var client = http.Client.init(self.io, self.allocator);
defer client.deinit();
var response = client.get(full_url) catch |err| {
const elapsed_ms = @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms);
// Operator-visible: surfaces meaningful failures
// (`NoAddressReturned`, `ConnectionRefused`,
// `TlsInitializationFailed`, etc.) instead of swallowing
// them. Network-shaped errors are exactly what the user
// needs to see when sync stops working — keeping this at
// debug level meant a DNS-truncation bug was visible only
// to anyone running with debug logging on, which cost
// hours of diagnosis time.
log.warn("{s}: server sync failed for {s}: {s} (elapsed_ms={d})", .{ symbol, @tagName(data_type), @errorName(err), elapsed_ms });
log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), elapsed_ms });
return .net_err;
};
defer response.deinit();
// Integrity check: if the server advertised an ETag in
// `"sha256:<hex>"` form, compare the body's actual sha256
// against it. Catches mid-stream truncation that Zig's
// std.http.Client.fetch silently accepts on the Content-Length
// path (EndOfStream from a cut transport is swallowed as a
// normal termination). Archive the mismatching body with the
// advertised etag so post-mortem can see exactly what was
// promised vs what arrived. Deployments with no ETag or a
// non-sha256 etag fall through to `looksCompleteSrf` below
// (backward-compatible with pre-fix servers).
switch (response.verifyIntegrity()) {
.mismatch => |m| {
cache.Store.archiveTornBody(
self.io,
self.allocator,
self.config.cache_dir,
symbol,
data_type,
response.body,
.{
.failure_reason = .etag_mismatch,
.http_status = @intFromEnum(response.status),
.server_url = full_url,
.server_etag = response.etag,
},
) catch |err| {
log.debug(
"{s}: failed to archive etag-mismatch {s} body: {s}",
.{ symbol, @tagName(data_type), @errorName(err) },
);
};
log.debug(
"{s}: {s} server response failed integrity check ({d} bytes, expected sha256={s}, actual={s}) — archived under _torn/, not writing to cache",
.{ symbol, @tagName(data_type), response.body.len, m.expected_hex, m.actual_hex },
);
log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
return .torn;
},
.ok, .not_applicable => {},
}
// Validate the response body looks like a complete SRF file before
// writing it to cache. This guards against HTTP body truncation
// (TCP reset, Content-Length mismatch, proxy that flushed a
// partial response, etc.) — torn bodies get written atomically
// to the cache otherwise, producing the classic SRF parse error
// on the next read:
// error(srf): custom parse of value YYYY-MM failed : InvalidDateFormat
//
// When the check rejects a body, archive the raw bytes + context
// under `{cache_dir}/_torn/` so the next time this recurs we
// have ammunition for root-cause analysis. The log line is kept
// at debug level on purpose — user explicitly asked that routine
// rejections not be noisy in production runs. The `.meta`
// sidecar on disk is the durable signal.
if (!cache.Store.looksCompleteSrf(response.body)) {
cache.Store.archiveTornBody(
self.io,
self.allocator,
self.config.cache_dir,
symbol,
data_type,
response.body,
.{
.failure_reason = .looks_complete_srf_failed,
.http_status = @intFromEnum(response.status),
.server_url = full_url,
.server_etag = response.etag,
},
) catch |err| {
log.debug(
"{s}: failed to archive torn {s} body: {s}",
.{ symbol, @tagName(data_type), @errorName(err) },
);
};
log.debug(
"{s}: rejecting torn {s} server response ({d} bytes) — archived under _torn/, not writing to cache",
.{ symbol, @tagName(data_type), response.body.len },
);
log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
return .torn;
}
// Write to local cache
var s = self.store();
s.writeRaw(symbol, data_type, response.body) catch |err| {
log.debug("{s}: failed to write synced {s} to cache: {s}", .{ symbol, @tagName(data_type), @errorName(err) });
log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
return .net_err;
};
log.debug("{s}: synced {s} from server ({d} bytes)", .{ symbol, @tagName(data_type), response.body.len });
log.debug("{s}: tryOneSync finished ({s}) result=ok elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
return .ok;
}
/// Sync candle data (both daily and meta) from the server.
fn syncCandlesFromServer(self: *DataService, symbol: []const u8) bool {
const daily = self.syncFromServer(symbol, .candles_daily);
const meta = self.syncFromServer(symbol, .candles_meta);
return daily and meta;
}
/// Mutual funds use 5-letter tickers ending in X (e.g. FDSCX, VSTCX, FAGIX).
/// These don't have quarterly earnings — skip the fetch rather than
/// round-tripping to the provider just to get an empty response.
fn isMutualFund(symbol: []const u8) bool {
return symbol.len == 5 and symbol[4] == 'X';
}
// ── User config files ─────────────────────────────────────────
/// Load and parse accounts.srf from the same directory as the given portfolio path.
/// Returns null if the file doesn't exist or can't be parsed.
/// Caller owns the returned AccountMap and must call deinit().
pub fn loadAccountMap(self: *DataService, allocator: std.mem.Allocator, portfolio_path: []const u8) ?analysis.AccountMap {
const dir_end = if (std.mem.lastIndexOfScalar(u8, portfolio_path, std.fs.path.sep)) |idx| idx + 1 else 0;
const acct_path = std.fmt.allocPrint(self.allocator, "{s}accounts.srf", .{portfolio_path[0..dir_end]}) catch return null;
defer self.allocator.free(acct_path);
const data = std.Io.Dir.cwd().readFileAlloc(self.io, acct_path, self.allocator, .limited(1024 * 1024)) catch return null;
defer self.allocator.free(data);
return analysis.parseAccountsFile(allocator, data) catch null;
}
/// Load and parse `transaction_log.srf` from the same directory as
/// the given portfolio path. Returns null if the file doesn't
/// exist or can't be parsed — the contributions pipeline falls
/// back to the pre-transaction-log behavior (no transfer netting)
/// when null is returned.
///
/// Caller owns the returned `TransactionLog` and must call
/// `deinit()`.
pub fn loadTransferLog(self: *DataService, portfolio_path: []const u8) ?transaction_log.TransactionLog {
const dir_end = if (std.mem.lastIndexOfScalar(u8, portfolio_path, std.fs.path.sep)) |idx| idx + 1 else 0;
const path = std.fmt.allocPrint(self.allocator, "{s}transaction_log.srf", .{portfolio_path[0..dir_end]}) catch return null;
defer self.allocator.free(path);
const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, self.allocator, .limited(1024 * 1024)) catch return null;
defer self.allocator.free(data);
return transaction_log.parseTransactionLogFile(self.allocator, data) catch null;
}
};
// ── Tests ─────────────────────────────────────────────────────────
test "isPermanentProviderFailure: NotFound is permanent" {
try std.testing.expect(isPermanentProviderFailure(error.NotFound));
}
test "isPermanentProviderFailure: RequestFailed is transient" {
try std.testing.expect(!isPermanentProviderFailure(error.RequestFailed));
}
test "isPermanentProviderFailure: ServerError is transient" {
try std.testing.expect(!isPermanentProviderFailure(error.ServerError));
}
test "isPermanentProviderFailure: Unauthorized is transient" {
// Auth misconfigs are user-fixable (set the API key); not a reason
// to permanently suppress retries.
try std.testing.expect(!isPermanentProviderFailure(error.Unauthorized));
}
test "isPermanentProviderFailure: InvalidResponse is transient" {
// Parse errors are usually a provider format change or one-off
// garbage response — retrying later is fine.
try std.testing.expect(!isPermanentProviderFailure(error.InvalidResponse));
}
test "isPermanentProviderFailure: PaymentRequired is transient" {
// FMP marks plan-locked symbols with HTTP 402; user can upgrade
// their plan or rotate providers, so don't poison the cache.
try std.testing.expect(!isPermanentProviderFailure(error.PaymentRequired));
}
test "isPermanentProviderFailure: RateLimited is transient" {
// Rate-limit is the textbook transient case; the caller already
// handles it specially with backoff + retry.
try std.testing.expect(!isPermanentProviderFailure(error.RateLimited));
}
test "isMutualFund identifies mutual funds" {
// Standard mutual fund tickers (5 letters ending in X)
try std.testing.expect(DataService.isMutualFund("FDSCX"));
try std.testing.expect(DataService.isMutualFund("VSTCX"));
try std.testing.expect(DataService.isMutualFund("FAGIX"));
try std.testing.expect(DataService.isMutualFund("VFINX"));
// Not mutual funds
try std.testing.expect(!DataService.isMutualFund("AAPL"));
try std.testing.expect(!DataService.isMutualFund("VTI"));
try std.testing.expect(!DataService.isMutualFund("SPY"));
try std.testing.expect(!DataService.isMutualFund("GOOGL"));
try std.testing.expect(!DataService.isMutualFund("")); // empty
try std.testing.expect(!DataService.isMutualFund("X")); // too short
try std.testing.expect(!DataService.isMutualFund("FDSCA")); // 5 letters but not ending in X
try std.testing.expect(!DataService.isMutualFund("FDSCXA")); // 6 letters ending in A
}
test "DataService init/deinit lifecycle" {
const allocator = std.testing.allocator;
const config = Config{
.cache_dir = "/tmp/zfin-test-cache",
};
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
// Should be able to access config
try std.testing.expectEqualStrings("/tmp/zfin-test-cache", svc.config.cache_dir);
// Providers should be null (lazy init)
try std.testing.expect(svc.td == null);
try std.testing.expect(svc.pg == null);
try std.testing.expect(svc.fmp == null);
try std.testing.expect(svc.yh == null);
try std.testing.expect(svc.tg == null);
}
test "DataService store helper creates valid store" {
const allocator = std.testing.allocator;
const config = Config{
.cache_dir = "/tmp/zfin-test-cache",
};
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
const s = svc.store();
try std.testing.expectEqualStrings("/tmp/zfin-test-cache", s.cache_dir);
}
test "DataService getProvider returns NoApiKey without key" {
const allocator = std.testing.allocator;
const config = Config{
.cache_dir = "/tmp/zfin-test-cache",
// No API keys set
};
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
// TwelveData requires API key
const td_result = svc.getProvider(TwelveData);
try std.testing.expectError(DataError.NoApiKey, td_result);
// Polygon requires API key
const pg_result = svc.getProvider(Polygon);
try std.testing.expectError(DataError.NoApiKey, pg_result);
// Yahoo doesn't require API key
const yh_result = svc.getProvider(Yahoo);
try std.testing.expect(yh_result != error.NoApiKey);
}
test "DataService getProvider initializes provider with key" {
const allocator = std.testing.allocator;
const config = Config{
.cache_dir = "/tmp/zfin-test-cache",
.tiingo_key = "test-tiingo-key",
};
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
// First call initializes
const tg1 = try svc.getProvider(Tiingo);
try std.testing.expect(svc.tg != null);
// Second call returns same instance
const tg2 = try svc.getProvider(Tiingo);
try std.testing.expect(tg1 == tg2);
}
test "DataService LoadAllResult default values" {
const allocator = std.testing.allocator;
var result = DataService.LoadAllResult{
.prices = std.StringHashMap(f64).init(allocator),
.cached_count = 0,
.server_synced_count = 0,
.provider_fetched_count = 0,
.stale_count = 0,
.failed_count = 0,
.latest_date = null,
};
defer result.deinit();
try std.testing.expectEqual(@as(usize, 0), result.prices.count());
}
test "FetchResult type construction" {
// Verify FetchResult works for different types
const candle_result = FetchResult(Candle){
.data = &.{},
.source = .cached,
.timestamp = 0,
.allocator = std.testing.allocator,
};
try std.testing.expect(candle_result.source == .cached);
const div_result = FetchResult(Dividend){
.data = &.{},
.source = .fetched,
.timestamp = 12345,
.allocator = std.testing.allocator,
};
try std.testing.expect(div_result.source == .fetched);
try std.testing.expectEqual(@as(i64, 12345), div_result.timestamp);
}
test "FetchOptions default is fully permissive" {
// Default-init should allow normal fetch behavior.
const opts: FetchOptions = .{};
try std.testing.expect(!opts.skip_network);
try std.testing.expect(!opts.force_refresh);
}
test "LoadAllConfig.fetchOptions maps fields through" {
const cfg = DataService.LoadAllConfig{
.force_refresh = true,
.skip_network = false,
};
const opts = cfg.fetchOptions();
try std.testing.expect(opts.force_refresh);
try std.testing.expect(!opts.skip_network);
const cfg2 = DataService.LoadAllConfig{
.skip_network = true,
};
const opts2 = cfg2.fetchOptions();
try std.testing.expect(opts2.skip_network);
try std.testing.expect(!opts2.force_refresh);
}
test "getCandles offline mode returns cached data without network" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
// Construct a service with a cache pre-populated with candle data.
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
// Pre-populate cache via the Store API.
var store = svc.store();
var candles = [_]Candle{
.{ .date = Date.fromYmd(2026, 5, 19), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 },
.{ .date = Date.fromYmd(2026, 5, 20), .open = 104, .high = 106, .low = 103, .close = 105, .adj_close = 105, .volume = 1100 },
};
store.cacheCandles("TEST", candles[0..], .tiingo, 0);
// Set the test guard: any network call would panic. We expect
// the offline-mode path NOT to touch the network.
svc.panic_on_network_attempt = true;
const result = try svc.getCandles("TEST", .{ .skip_network = true });
defer result.deinit();
try std.testing.expectEqual(@as(usize, 2), result.data.len);
try std.testing.expect(result.data[0].date.eql(Date.fromYmd(2026, 5, 19)));
try std.testing.expect(result.data[1].date.eql(Date.fromYmd(2026, 5, 20)));
try std.testing.expectEqual(Source.cached, result.source);
}
test "getCandles offline mode with no cache returns FetchFailed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
// Network guard is on. With no cache and skip_network=true,
// we must return FetchFailed without panicking.
svc.panic_on_network_attempt = true;
const err = svc.getCandles("NEVERHEARDOFIT", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "fetchCached offline mode returns stale-cached data" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
// Pre-populate dividend cache with a TTL in the past (stale).
var store = svc.store();
var divs = [_]Dividend{
.{ .ex_date = Date.fromYmd(2026, 3, 15), .amount = 0.50, .type = .regular },
};
// Manually set TTL to 1 second (long since expired) by writing
// through writeWithSource with a tiny TTL.
store.writeWithSource(Dividend, "TEST", divs[0..], .{ .seconds = -1_000_000 }, "test");
svc.panic_on_network_attempt = true;
// Even though the cache is stale, skip_network must return it
// rather than touching the network.
const result = try svc.getDividends("TEST", .{ .skip_network = true });
defer result.deinit();
try std.testing.expectEqual(@as(usize, 1), result.data.len);
try std.testing.expectEqual(Source.cached, result.source);
}
test "getQuote offline mode returns FetchFailed (quotes never cached)" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
svc.panic_on_network_attempt = true;
// Quotes have no cache to fall back to in offline mode.
const err = svc.getQuote("AAPL", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "loadAllPrices offline mode skips network and returns cached" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var store = svc.store();
// Symbol with fresh cache.
var fresh_candles = [_]Candle{
.{ .date = Date.fromYmd(2026, 5, 20), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 },
};
store.cacheCandles("FRESH", fresh_candles[0..], .tiingo, 0);
// Symbol with no cache at all.
// (no setup needed — just passes a symbol that doesn't exist)
svc.panic_on_network_attempt = true;
const symbols = [_][]const u8{ "FRESH", "MISSING" };
var result = svc.loadAllPrices(
symbols[0..],
&.{},
.{ .skip_network = true },
null,
null,
);
defer result.prices.deinit();
// FRESH should resolve from cache.
try std.testing.expect(result.prices.contains("FRESH"));
try std.testing.expectEqual(@as(f64, 104), result.prices.get("FRESH").?);
// MISSING should not be in the prices map.
try std.testing.expect(!result.prices.contains("MISSING"));
// failed_count should reflect MISSING.
try std.testing.expectEqual(@as(usize, 1), result.failed_count);
}
test "loadAllPrices force_refresh tops up without wiping the candle cache" {
// Regression: force_refresh must mean "ignore TTL + incremental
// top-up", NOT "delete the cache and re-download from scratch".
// The old behavior invalidated (deleted) candles_daily before the
// fetch, which forced a full network re-download. With the cache
// already covering through today, force_refresh must serve from
// the surviving cache and touch no network.
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var store = svc.store();
// Dated far in the future so getCandles' "last cached date is
// today-or-later" branch fires deterministically regardless of the
// test clock — an incremental fetch would have nothing to pull and
// never reaches the network.
var candles = [_]Candle{
.{ .date = Date.fromYmd(2099, 12, 31), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 },
};
store.cacheCandles("HELD", candles[0..], .tiingo, 0);
// Any provider/network attempt now panics. If force_refresh wiped
// the cache (old behavior), getCandles would fall through to a full
// re-fetch and trip this.
svc.panic_on_network_attempt = true;
const symbols = [_][]const u8{"HELD"};
var result = svc.loadAllPrices(
symbols[0..],
&.{},
.{ .force_refresh = true },
null,
null,
);
defer result.prices.deinit();
// Served from the (un-wiped) cache.
try std.testing.expect(result.prices.contains("HELD"));
try std.testing.expectEqual(@as(f64, 104), result.prices.get("HELD").?);
// The candle cache survived the force-refresh.
try std.testing.expect(svc.getCachedLastClose("HELD") != null);
}
test "getClassification: skip_network with no cache returns FetchFailed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
svc.panic_on_network_attempt = true;
const err = svc.getClassification("NEVERHEARDOFIT", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "getClassification: cache hit returns cached data without network" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
// Pre-populate the classification cache.
var s = svc.store();
var records = [_]Wikidata.ClassificationRecord{.{
.symbol = "AAPL",
.name = "Apple Inc.",
.country = "US",
.as_of = "2026-05-25",
.source = "wikidata",
}};
s.write(Wikidata.ClassificationRecord, "AAPL", records[0..], .{ .seconds = cache.Ttl.classification });
// Network guard on — must return from cache without touching network.
svc.panic_on_network_attempt = true;
const result = try svc.getClassification("AAPL", .{});
defer result.deinit();
try std.testing.expectEqual(@as(usize, 1), result.data.len);
try std.testing.expectEqualStrings("AAPL", result.data[0].symbol);
try std.testing.expectEqualStrings("Apple Inc.", result.data[0].name.?);
try std.testing.expectEqual(Source.cached, result.source);
}
test "populateGeo: country US -> geo US" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var record: Wikidata.ClassificationRecord = .{
.symbol = try allocator.dupe(u8, "TEST"),
.country = try allocator.dupe(u8, "US"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
defer record.deinit(allocator);
try svc.populateGeo(&record);
try std.testing.expect(record.geo != null);
try std.testing.expectEqualStrings("US", record.geo.?);
}
test "populateGeo: country GB -> geo International Developed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var record: Wikidata.ClassificationRecord = .{
.symbol = try allocator.dupe(u8, "TEST"),
.country = try allocator.dupe(u8, "GB"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
defer record.deinit(allocator);
try svc.populateGeo(&record);
try std.testing.expect(record.geo != null);
try std.testing.expectEqualStrings("International Developed", record.geo.?);
}
test "populateGeo: null country -> noop" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var record: Wikidata.ClassificationRecord = .{
.symbol = try allocator.dupe(u8, "TEST"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
defer record.deinit(allocator);
try svc.populateGeo(&record);
try std.testing.expectEqual(@as(?[]const u8, null), record.geo);
}
test "populateGeo: existing geo not overwritten" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var record: Wikidata.ClassificationRecord = .{
.symbol = try allocator.dupe(u8, "TEST"),
.country = try allocator.dupe(u8, "US"),
.geo = try allocator.dupe(u8, "Already Set"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
defer record.deinit(allocator);
try svc.populateGeo(&record);
try std.testing.expectEqualStrings("Already Set", record.geo.?);
}
test "getClassification: sparse Wikidata + EDGAR managed_fund hit produces merged record" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
// Seed both EDGAR ticker map caches with at least one entry
// each so the synthesizeClassification path doesn't try to
// fetch them (the load helpers treat empty cached slices as
// "miss" and fall through to a network fetch).
var s = svc.store();
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
.symbol = "FAGIX",
.cik = "0000275309",
}};
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
var co_entries = [_]Edgar.CompanyTickerEntry{.{
.symbol = "DUMMY",
.cik = "0000000001",
}};
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
// Seed an etf_metrics negative cache so getEtfMetrics doesn't
// try to fetch from the network.
s.writeNegative("FAGIX", .etf_metrics);
// Sparse Wikidata records (length 1, only name set -- not useful).
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
sparse[0] = .{
.symbol = try allocator.dupe(u8, "FAGIX"),
.name = try allocator.dupe(u8, "Test Fund"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
// Drive directly through synthesizeClassification (skip the
// Wikidata fetch). It takes ownership of `sparse`.
svc.panic_on_network_attempt = true; // any provider call -> panic
const merged = try svc.synthesizeClassification("FAGIX", sparse, .{ .skip_network = true });
defer Wikidata.ClassificationRecord.freeSlice(allocator, merged);
try std.testing.expectEqual(@as(usize, 1), merged.len);
const c = merged[0];
try std.testing.expectEqualStrings("FAGIX", c.symbol);
try std.testing.expect(c.is_etf);
try std.testing.expectEqualStrings("Fund", c.asset_class.?);
try std.testing.expectEqualStrings("US", c.country.?);
try std.testing.expectEqualStrings("US", c.geo.?);
try std.testing.expectEqualStrings("edgar_fallback", c.source);
// Wikidata's name preserved on merge.
try std.testing.expectEqualStrings("Test Fund", c.name.?);
}
test "synthesizeClassification: no EDGAR hit returns NotFound" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
// Seed both ticker maps with throwaway entries so the
// EDGAR lookup returns .none for our test symbol but doesn't
// try to fetch the maps from the network.
var s = svc.store();
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
.symbol = "DUMMY1",
.cik = "0000000001",
}};
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
var co_entries = [_]Edgar.CompanyTickerEntry{.{
.symbol = "DUMMY2",
.cik = "0000000002",
}};
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
sparse[0] = .{
.symbol = try allocator.dupe(u8, "NEVERHEARDOFIT"),
.name = try allocator.dupe(u8, "ghost"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
svc.panic_on_network_attempt = true;
try std.testing.expectError(error.NotFound, svc.synthesizeClassification("NEVERHEARDOFIT", sparse, .{ .skip_network = true }));
}
test "synthesizeClassification: company_or_uit without ETF/TRUST keyword still routes to multi-row" {
// PTY shape: closed-end fund whose company_tickers title is
// "PIMCO CORPORATE & INCOME OPPORTUNITY FUND" -- no "ETF" or
// "TRUST" in the title, so lookupInTickerMaps returns
// .company_or_uit{is_etf=false}. But it's still fund-shaped
// and should produce multi-row metadata in enrich.
//
// The downstream signal for "fund-like, emit multi-row" is
// ClassificationRecord.is_etf. Set it to true for any
// EDGAR-found .company_or_uit hit (even when the title
// doesn't carry the ETF/TRUST keyword), so PTY-shape
// closed-end funds get the same treatment as ETFs.
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var s = svc.store();
// Throwaway MF entry so the MF lookup returns null.
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
.symbol = "DUMMY",
.cik = "0000000001",
}};
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
// PTY in the company map with NO ETF/TRUST in title.
var co_entries = [_]Edgar.CompanyTickerEntry{.{
.symbol = "PTY",
.cik = "0001202604",
.title = "PIMCO CORPORATE & INCOME OPPORTUNITY FUND",
}};
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
s.writeNegative("PTY", .etf_metrics);
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
sparse[0] = .{
.symbol = try allocator.dupe(u8, "PTY"),
.name = try allocator.dupe(u8, "PIMCO Corporate & Income Opportunity Fund"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
svc.panic_on_network_attempt = true;
const merged = try svc.synthesizeClassification("PTY", sparse, .{ .skip_network = true });
defer Wikidata.ClassificationRecord.freeSlice(allocator, merged);
try std.testing.expectEqual(@as(usize, 1), merged.len);
const c = merged[0];
// is_etf MUST be true so enrich routes through emitEtfRows
// (multi-row sleeve breakdown). The asset_class stays "Fund"
// because no ETF/TRUST keyword in title.
try std.testing.expect(c.is_etf);
try std.testing.expectEqualStrings("Fund", c.asset_class.?);
}
test "synthesizeClassification: NPORT-P series_name beats Wikidata's index name for funds" {
// SOXX shape: Wikidata returns the underlying INDEX name
// ("PHLX Semiconductor Sector") which is technically what the
// ticker symbol is for, but downstream consumers want the
// FUND name ("iShares Semiconductor ETF") that NPORT-P
// <seriesName> carries. Series_name is more authoritative
// for the fund itself.
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var s = svc.store();
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
.symbol = "DUMMY",
.cik = "0000000001",
}};
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
var co_entries = [_]Edgar.CompanyTickerEntry{.{
.symbol = "SOXX",
.cik = "0001100663",
.title = "iShares Trust",
}};
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
// Pre-seed etf_metrics with a profile row carrying the
// NPORT-P seriesName.
var etf_records = [_]Edgar.EtfMetricRecord{
.{ .profile = .{
.symbol = try allocator.dupe(u8, "SOXX"),
.series_name = try allocator.dupe(u8, "iShares Semiconductor ETF"),
.cik = try allocator.dupe(u8, "0001100663"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "edgar"),
} },
};
defer for (etf_records) |r| r.deinit(allocator);
s.write(Edgar.EtfMetricRecord, "SOXX", etf_records[0..], cache.DataType.etf_metrics.ttl());
// Wikidata returned only the index name (sparse).
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
sparse[0] = .{
.symbol = try allocator.dupe(u8, "SOXX"),
.name = try allocator.dupe(u8, "PHLX Semiconductor Sector"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "wikidata"),
};
svc.panic_on_network_attempt = true;
const merged = try svc.synthesizeClassification("SOXX", sparse, .{ .skip_network = true });
defer Wikidata.ClassificationRecord.freeSlice(allocator, merged);
try std.testing.expectEqual(@as(usize, 1), merged.len);
const c = merged[0];
// Series_name from NPORT-P wins -- not Wikidata's index name.
try std.testing.expectEqualStrings("iShares Semiconductor ETF", c.name.?);
}
test "getEntityFacts: skip_network with no cache returns FetchFailed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
svc.panic_on_network_attempt = true;
const err = svc.getEntityFacts("0000999999", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "getEntityFacts: cache hit returns cached shares-outstanding" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var s = svc.store();
var records = [_]Edgar.EntityFactRecord{
.{ .shares_outstanding = .{
.symbol = "",
.shares_outstanding = 14687356000,
.period_end = "2026-04-17",
.form = "10-Q",
.cik = "0000320193",
.as_of = "2026-05-25",
.source = "edgar_xbrl",
} },
};
s.write(Edgar.EntityFactRecord, "0000320193", records[0..], .{ .seconds = cache.Ttl.entity_facts });
svc.panic_on_network_attempt = true;
const result = try svc.getEntityFacts("0000320193", .{});
defer result.deinit();
try std.testing.expectEqual(@as(usize, 1), result.data.len);
switch (result.data[0]) {
.shares_outstanding => |so| {
try std.testing.expectEqual(@as(u64, 14687356000), so.shares_outstanding);
try std.testing.expectEqualStrings("0000320193", so.cik);
},
}
try std.testing.expectEqual(Source.cached, result.source);
}
test "getEtfMetrics: skip_network with no cache returns FetchFailed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
svc.panic_on_network_attempt = true;
const err = svc.getEtfMetrics("NEVERHEARDOFIT", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "getEtfMetrics: cache hit returns cached profile + sectors + holdings" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var s = svc.store();
var records = [_]Edgar.EtfMetricRecord{
.{ .profile = .{
.symbol = "VTI",
.cik = "0000036405",
.as_of = "2026-05-25",
.source = "edgar",
} },
.{ .sector = .{
.symbol = "VTI",
.code = "EC/CORP",
.description = "Equity / Corporate",
.pct_of_portfolio = 99.7,
.as_of = "2026-05-25",
.source = "edgar",
} },
.{ .holding = .{
.symbol = "VTI",
.name = "NVIDIA Corp",
.pct_of_portfolio = 6.57,
.as_of = "2026-05-25",
.source = "edgar",
} },
};
s.write(Edgar.EtfMetricRecord, "VTI", records[0..], .{ .seconds = cache.Ttl.etf_metrics });
svc.panic_on_network_attempt = true;
const result = try svc.getEtfMetrics("VTI", .{});
defer result.deinit();
try std.testing.expectEqual(@as(usize, 3), result.data.len);
try std.testing.expect(result.data[0] == .profile);
try std.testing.expect(result.data[1] == .sector);
try std.testing.expect(result.data[2] == .holding);
try std.testing.expectEqualStrings("VTI", result.data[0].profile.symbol);
try std.testing.expectEqual(Source.cached, result.source);
}
test "DataService getProvider initializes Wikidata with user_email" {
const allocator = std.testing.allocator;
const config = Config{
.cache_dir = "/tmp/zfin-test-cache",
.user_email = "test@example.com",
};
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
const wd1 = try svc.getProvider(Wikidata);
try std.testing.expect(svc.wikidata != null);
try std.testing.expectEqualStrings("test@example.com", wd1.user_email);
// Second call returns same instance.
const wd2 = try svc.getProvider(Wikidata);
try std.testing.expect(wd1 == wd2);
}
test "DataService getProvider returns NoApiKey for Wikidata without user_email" {
const allocator = std.testing.allocator;
const config = Config{ .cache_dir = "/tmp/zfin-test-cache" };
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
const wd_result = svc.getProvider(Wikidata);
try std.testing.expectError(DataError.NoApiKey, wd_result);
const ed_result = svc.getProvider(Edgar);
try std.testing.expectError(DataError.NoApiKey, ed_result);
}
test "estimateWaitSeconds returns null when relevant provider not instantiated" {
const allocator = std.testing.allocator;
const config = Config{ .cache_dir = "/tmp/zfin-test-cache" };
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
// No providers initialized yet (lazy). Each rate-limited data
// type returns null because its provider is missing.
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.dividends));
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.splits));
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.earnings));
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.options));
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.etf_metrics));
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.entity_facts));
}
test "estimateWaitSeconds returns 0 for types without rate limiters" {
// candles_daily, classification, etc. are served by providers
// that don't have a rate limiter (Tiingo, Wikidata). The
// function returns 0 for these regardless of provider state --
// there's nothing to wait for.
const allocator = std.testing.allocator;
const config = Config{ .cache_dir = "/tmp/zfin-test-cache" };
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.candles_daily));
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.candles_meta));
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.classification));
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.meta));
}
test "estimateWaitSeconds returns 0 for fresh rate-limited providers" {
// Once the provider is instantiated, an unused rate limiter
// returns 0 (no wait). This is the steady-state happy path
// for the call at the top of each refresh iteration.
const allocator = std.testing.allocator;
const config = Config{
.cache_dir = "/tmp/zfin-test-cache",
.polygon_key = "test-polygon-key",
.fmp_key = "test-fmp-key",
};
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
// Touch each provider to lazy-init it. We don't care about the
// returned pointer; just need svc.pg / svc.fmp to be non-null.
_ = try svc.getProvider(Polygon);
_ = try svc.getProvider(Fmp);
// Fresh limiters have full token bucket -> 0 wait.
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.dividends));
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.splits));
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.earnings));
}
// ── lookupInTickerMaps ────────────────────────────────────────
//
// Pure function — no I/O. Consumed by `lookupEdgarFallback`,
// which loads the maps then calls this. Tests construct
// synthetic ticker-map data directly to exercise every branch
// without touching the cache or network.
fn testNewMfEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8) !Edgar.MutualFundTickerEntry {
return .{
.symbol = try allocator.dupe(u8, symbol),
.cik = try allocator.dupe(u8, cik),
};
}
fn testNewCoEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8, title: ?[]const u8) !Edgar.CompanyTickerEntry {
return .{
.symbol = try allocator.dupe(u8, symbol),
.cik = try allocator.dupe(u8, cik),
.title = if (title) |t| try allocator.dupe(u8, t) else null,
};
}
test "lookupInTickerMaps: both maps null -> .none" {
const allocator = std.testing.allocator;
const result = lookupInTickerMaps(allocator, "ANY", null, null);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .none);
}
test "lookupInTickerMaps: symbol in MF map -> .managed_fund" {
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322");
var map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "FAGIX", &map, null);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .managed_fund);
}
test "lookupInTickerMaps: symbol in company map with TRUST title -> ETF hint" {
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "SPY", "0000884394", "SPDR S&P 500 ETF TRUST");
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "SPY", null, &map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .company_or_uit);
try std.testing.expect(result.company_or_uit.is_etf);
try std.testing.expectEqualStrings("SPDR S&P 500 ETF TRUST", result.company_or_uit.title.?);
}
test "lookupInTickerMaps: company map with operating-company title -> not ETF" {
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "AAPL", "0000320193", "Apple Inc.");
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "AAPL", null, &map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .company_or_uit);
try std.testing.expect(!result.company_or_uit.is_etf);
}
test "lookupInTickerMaps: not in either map -> .none" {
const allocator = std.testing.allocator;
const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
mf_entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322");
var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries);
defer mf_map.deinit();
const result = lookupInTickerMaps(allocator, "MISSING", &mf_map, null);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .none);
}
test "lookupInTickerMaps: MF map takes precedence over company map" {
// If a symbol appears in both (rare but possible — class
// shares of an open-end fund vs the fund's parent company),
// we prefer the MF answer. Lock in the contract.
const allocator = std.testing.allocator;
const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
mf_entries[0] = try testNewMfEntry(allocator, "DUP", "0000000001");
const co_entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
co_entries[0] = try testNewCoEntry(allocator, "DUP", "0000000002", "DUP TRUST");
var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries);
defer mf_map.deinit();
var co_map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, co_entries);
defer co_map.deinit();
const result = lookupInTickerMaps(allocator, "DUP", &mf_map, &co_map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .managed_fund);
}
test "lookupInTickerMaps: company map with null title -> .company_or_uit, no ETF" {
// Defensive: if EDGAR's company file has a row with no
// title, we still return the lookup but can't infer ETF
// status from a missing string.
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "BARE", "0000000001", null);
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
const result = lookupInTickerMaps(allocator, "BARE", null, &map);
defer freeEdgarLookup(allocator, result);
try std.testing.expect(result == .company_or_uit);
try std.testing.expect(!result.company_or_uit.is_etf);
try std.testing.expect(result.company_or_uit.title == null);
}
test "lookupInTickerMaps: returned title is owned (survives map deinit)" {
// Critical for the service.lookupEdgarFallback contract:
// the maps get freed before the EdgarLookup is returned to
// the caller. The title must survive that.
const allocator = std.testing.allocator;
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
entries[0] = try testNewCoEntry(allocator, "VTI", "0000884394", "VANGUARD TOTAL STOCK MARKET ETF");
const result = blk: {
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
defer map.deinit();
break :blk lookupInTickerMaps(allocator, "VTI", null, &map);
};
defer freeEdgarLookup(allocator, result);
// Map is gone. Title must still be readable.
try std.testing.expect(result == .company_or_uit);
try std.testing.expectEqualStrings("VANGUARD TOTAL STOCK MARKET ETF", result.company_or_uit.title.?);
try std.testing.expect(result.company_or_uit.is_etf);
}
test "freeEdgarLookup: handles all three union variants without leak" {
const allocator = std.testing.allocator;
// .managed_fund — no-op
freeEdgarLookup(allocator, .managed_fund);
// .none — no-op
freeEdgarLookup(allocator, .none);
// .company_or_uit with null title — no-op
freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = null, .is_etf = false } });
// .company_or_uit with non-null title — frees the title.
const owned = try allocator.dupe(u8, "Some Title");
freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = owned, .is_etf = true } });
// testing.allocator panics on leak — passing this test means
// the title was freed.
}
// ── CUSIP->ticker cache (loadCusipTickerMap / cacheCusipTicker) ──
test "loadCusipTickerMap: missing file returns empty map" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
var map = svc.loadCusipTickerMap(allocator);
defer map.deinit();
try std.testing.expectEqual(@as(usize, 0), map.count());
}
test "cacheCusipTicker + loadCusipTickerMap: write/read round-trip" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
// Placeholder CUSIPs/tickers — never real PII.
svc.cacheCusipTicker("111111111", "AAA");
svc.cacheCusipTicker("222222222", "BBB");
var map = svc.loadCusipTickerMap(allocator);
defer map.deinit();
try std.testing.expectEqual(@as(usize, 2), map.count());
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
try std.testing.expectEqualStrings("BBB", map.get("222222222").?);
}
test "cacheCusipTicker: dedups repeated CUSIP (the historical bug)" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
// Write the same CUSIP three times — must collapse to one row.
svc.cacheCusipTicker("111111111", "AAA");
svc.cacheCusipTicker("111111111", "AAA");
svc.cacheCusipTicker("111111111", "AAA");
var map = svc.loadCusipTickerMap(allocator);
defer map.deinit();
try std.testing.expectEqual(@as(usize, 1), map.count());
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
// The on-disk file should physically contain exactly one data
// row (plus the directive header), proving dedup at the writer.
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
defer allocator.free(path);
const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024));
defer allocator.free(data);
var row_count: usize = 0;
var lines = std.mem.splitScalar(u8, data, '\n');
while (lines.next()) |line| {
if (std.mem.indexOf(u8, line, "cusip::") != null) row_count += 1;
}
try std.testing.expectEqual(@as(usize, 1), row_count);
}
test "loadCusipTickerMap: first occurrence wins on duplicate rows" {
// Tolerate a pre-existing file written by the buggy appender
// (duplicate rows). The reader must not crash and must keep the
// first mapping.
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
// Hand-write a file with a duplicate row (as the old bug did).
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
defer allocator.free(path);
try std.Io.Dir.cwd().writeFile(io, .{
.sub_path = path,
.data = "#!srfv1\ncusip::111111111,ticker::AAA\ncusip::111111111,ticker::AAA\n",
});
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
var map = svc.loadCusipTickerMap(allocator);
defer map.deinit();
try std.testing.expectEqual(@as(usize, 1), map.count());
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
}
// ── CUSIP resolution cascade (resolveCusips / appendCusipEntries) ──
test "appendCusipEntries: batches, dedups vs file and within batch" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
// Seed one entry on disk.
svc.cacheCusipTicker("111111111", "AAA");
// Batch: 111 already on disk (skip), 222 + 333 new, 222 repeated
// within the batch (skip the second).
const batch = [_]DataService.CusipEntry{
.{ .cusip = "111111111", .ticker = "ZZZ" },
.{ .cusip = "222222222", .ticker = "BBB" },
.{ .cusip = "333333333", .ticker = "CCC" },
.{ .cusip = "222222222", .ticker = "BBB" },
};
svc.appendCusipEntries(batch[0..]);
var map = svc.loadCusipTickerMap(allocator);
defer map.deinit();
try std.testing.expectEqual(@as(u32, 3), map.count());
try std.testing.expectEqualStrings("AAA", map.get("111111111").?); // file wins
try std.testing.expectEqualStrings("BBB", map.get("222222222").?);
try std.testing.expectEqualStrings("CCC", map.get("333333333").?);
// Physically exactly 3 data rows (plus the directive header).
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
defer allocator.free(path);
const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024));
defer allocator.free(data);
var rows: usize = 0;
var lines = std.mem.splitScalar(u8, data, '\n');
while (lines.next()) |line| {
if (std.mem.indexOf(u8, line, "cusip::") != null) rows += 1;
}
try std.testing.expectEqual(@as(usize, 3), rows);
}
test "mergeCusipBody: merges new entries, skips those already in `have` or the batch" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
// `have` already maps 111 -> AAA (local is authoritative).
svc.cacheCusipTicker("111111111", "AAA");
var have = svc.loadCusipTickerMap(allocator);
defer have.deinit();
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
var out = std.StringHashMap([]const u8).init(arena.allocator());
// Server body: 111 conflicts with `have` (ignored), 222 + 333 are
// new, 222 repeated (the second is skipped).
const body =
"#!srfv1\n" ++
"cusip::111111111,ticker::ZZZ\n" ++
"cusip::222222222,ticker::BBB\n" ++
"cusip::333333333,ticker::CCC\n" ++
"cusip::222222222,ticker::BBB\n";
DataService.mergeCusipBody(arena.allocator(), &out, have, body);
try std.testing.expectEqual(@as(u32, 2), out.count());
try std.testing.expectEqualStrings("BBB", out.get("222222222").?);
try std.testing.expectEqualStrings("CCC", out.get("333333333").?);
try std.testing.expect(out.get("111111111") == null); // have wins
}
test "resolveCusips: warm cache resolves without touching the network" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
// No server_url; assert L2/L3 are never reached for an all-hit set.
svc.panic_on_network_attempt = true;
svc.cacheCusipTicker("111111111", "AAA");
svc.cacheCusipTicker("222222222", "BBB");
// Duplicate + empty CUSIP in the request must be tolerated.
const want = [_][]const u8{ "111111111", "222222222", "111111111", "" };
var map = svc.resolveCusips(allocator, want[0..], false);
defer map.deinit();
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
try std.testing.expectEqualStrings("BBB", map.get("222222222").?);
}
test "resolveCusips: skip_network serves L1 only, never hits the network" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
// A miss would normally fall through to L2/L3; skip_network must
// prevent any network attempt even so.
svc.panic_on_network_attempt = true;
svc.cacheCusipTicker("111111111", "AAA");
// "999999999" is absent from L1 — with skip_network it stays
// unresolved rather than triggering a server/OpenFIGI lookup.
const want = [_][]const u8{ "111111111", "999999999" };
var map = svc.resolveCusips(allocator, want[0..], true);
defer map.deinit();
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
try std.testing.expect(map.get("999999999") == null);
}
test "getEtfProfile: carries holding CUSIP through the model boundary" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
defer svc.deinit();
// Seed etf_metrics: a profile row + a holding carrying a CUSIP but
// no ticker (the common NPORT-P shape — placeholder values only).
var etf_records = [_]Edgar.EtfMetricRecord{
.{ .profile = .{
.symbol = try allocator.dupe(u8, "TESTF"),
.series_name = try allocator.dupe(u8, "Test Fund"),
.cik = try allocator.dupe(u8, "0000000002"),
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "edgar"),
} },
.{ .holding = .{
.symbol = try allocator.dupe(u8, "TESTF"),
.name = try allocator.dupe(u8, "Placeholder Corp"),
.cusip = try allocator.dupe(u8, "999999999"),
.pct_of_portfolio = 12.5,
.as_of = try allocator.dupe(u8, "2026-06-01"),
.source = try allocator.dupe(u8, "edgar"),
} },
};
defer for (etf_records) |r| r.deinit(allocator);
var s = svc.store();
s.write(Edgar.EtfMetricRecord, "TESTF", etf_records[0..], cache.DataType.etf_metrics.ttl());
svc.panic_on_network_attempt = true;
const result = try svc.getEtfProfile("TESTF", .{ .skip_network = true });
defer result.deinit();
const holdings = result.data.holdings orelse return error.NoHoldings;
try std.testing.expectEqual(@as(usize, 1), holdings.len);
try std.testing.expectEqualStrings("999999999", holdings[0].cusip orelse return error.NoCusip);
try std.testing.expect(holdings[0].symbol == null); // filing had no ticker
}