4366 lines
198 KiB
Zig
4366 lines
198 KiB
Zig
//! DataService -- unified data access layer for zfin.
|
|
//!
|
|
//! Encapsulates the "check cache -> fresh? return -> else fetch from provider -> cache -> return"
|
|
//! pattern that was previously duplicated between CLI and TUI. Both frontends should use this
|
|
//! as their sole data source.
|
|
//!
|
|
//! Provider selection is internal: each data type routes to the appropriate provider
|
|
//! based on available API keys. Callers never need to know which provider was used.
|
|
|
|
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
const log = std.log.scoped(.service);
|
|
|
|
const Date = @import("Date.zig");
|
|
const Candle = @import("models/candle.zig").Candle;
|
|
const Dividend = @import("models/dividend.zig").Dividend;
|
|
const Split = @import("models/split.zig").Split;
|
|
const OptionsChain = @import("models/option.zig").OptionsChain;
|
|
const EarningsEvent = @import("models/earnings.zig").EarningsEvent;
|
|
const Quote = @import("models/quote.zig").Quote;
|
|
const EtfProfile = @import("models/etf_profile.zig").EtfProfile;
|
|
const Holding = @import("models/etf_profile.zig").Holding;
|
|
const SectorWeight = @import("models/etf_profile.zig").SectorWeight;
|
|
const Config = @import("Config.zig");
|
|
const cache = @import("cache/store.zig");
|
|
const srf = @import("srf");
|
|
const analysis = @import("analytics/analysis.zig");
|
|
const transaction_log = @import("models/transaction_log.zig");
|
|
const TwelveData = @import("providers/twelvedata.zig").TwelveData;
|
|
const Polygon = @import("providers/polygon.zig").Polygon;
|
|
const Fmp = @import("providers/fmp.zig").Fmp;
|
|
const Cboe = @import("providers/cboe.zig").Cboe;
|
|
const OpenFigi = @import("providers/openfigi.zig");
|
|
const Yahoo = @import("providers/yahoo.zig").Yahoo;
|
|
const Tiingo = @import("providers/tiingo.zig").Tiingo;
|
|
const Wikidata = @import("providers/Wikidata.zig");
|
|
const Edgar = @import("providers/Edgar.zig");
|
|
const classification = @import("models/classification.zig");
|
|
const fmt = @import("format.zig");
|
|
const performance = @import("analytics/performance.zig");
|
|
const http = @import("net/http.zig");
|
|
const atomic = @import("atomic.zig");
|
|
|
|
// ── Wall-clock policy ────────────────────────────────────────
|
|
//
|
|
// `FetchResult.timestamp` records when a given fetch or cached-read
|
|
// completed. Each `std.Io.Timestamp.now(self.io, .real)` call in
|
|
// this file stamps one specific fetch — a single command invocation
|
|
// produces many fetches, each with its own real-time stamp. Threading
|
|
// `now_s` in from the caller would collapse all per-fetch timestamps to
|
|
// the command-entry time, which is not what callers want when they
|
|
// display "fetched 3s ago" for some symbols and "cached 2d ago" for
|
|
// others in the same command.
|
|
|
|
pub const DataError = error{
|
|
NoApiKey,
|
|
FetchFailed,
|
|
CacheError,
|
|
ParseError,
|
|
OutOfMemory,
|
|
/// Transient provider failure (server error, connection issue).
|
|
/// Caller should stop and retry later.
|
|
TransientError,
|
|
/// Provider auth failure (bad API key). Entire refresh should stop.
|
|
AuthError,
|
|
/// Provider returned a rate-limit response (e.g. SEC EDGAR's
|
|
/// 10-req/sec ceiling, or a free-tier candle API's per-minute
|
|
/// cap). Caller should stop the current batch and surface a
|
|
/// "try again later" message;
|
|
/// retrying immediately will just hit the same limit.
|
|
RateLimited,
|
|
/// Provider responded but doesn't have data for the requested
|
|
/// symbol (404, "Error Message" body, or equivalent). Distinct
|
|
/// from `FetchFailed` so callers (e.g. `enrich`) can tell the
|
|
/// user "this symbol isn't in the provider's catalog; mark it
|
|
/// manually" instead of an opaque "fetch failed."
|
|
NotFound,
|
|
};
|
|
|
|
/// Per-call options controlling cache vs network behavior. Drives
|
|
/// the `--refresh-data` global flag's three modes:
|
|
///
|
|
/// - `--refresh-data=auto` → `.{}` (default; respect TTL, fetch on stale/miss).
|
|
/// - `--refresh-data=never` → `.{ .skip_network = true }` (offline mode;
|
|
/// return cached data even if stale, treat cache miss as unavailable).
|
|
/// - `--refresh-data=force` → `.{ .force_refresh = true }` (ignore cache TTL,
|
|
/// fetch fresh from provider).
|
|
///
|
|
/// `skip_network` and `force_refresh` represent contradictory intents.
|
|
/// The CLI flag cannot produce the combination — `RefreshPolicy` is a
|
|
/// 3-variant enum, so the user can never set both. But because the
|
|
/// underlying shape is two independent booleans, an internal caller
|
|
/// constructing `FetchOptions` directly *could* produce the
|
|
/// combination. When both are true, **`skip_network` wins**:
|
|
///
|
|
/// - The call returns cached data (fresh or stale, whatever's there).
|
|
/// - `force_refresh` has no effect — no network is touched.
|
|
///
|
|
/// This is the safe default: when in doubt, don't reach the network.
|
|
/// Internal callers that genuinely want fresh data should set
|
|
/// `force_refresh = true, skip_network = false`.
|
|
pub const FetchOptions = struct {
|
|
/// Skip provider fetches and server sync. Returns cached data
|
|
/// (even if stale) or null/empty on cache miss. Wins over
|
|
/// `force_refresh` when both are set.
|
|
skip_network: bool = false,
|
|
/// Force a fresh fetch ignoring cache TTL. No-op when
|
|
/// `skip_network` is also set.
|
|
force_refresh: bool = false,
|
|
};
|
|
|
|
/// Decide whether a provider failure is permanent enough to merit a
|
|
/// negative-cache entry. Negative entries suppress retries until the
|
|
/// next manual `--refresh-data=force` / `cache clear`, so writing one is only
|
|
/// safe when we're confident more attempts won't succeed.
|
|
///
|
|
/// Today the only certain-permanent failure is `NotFound`: the symbol
|
|
/// just doesn't have data of this type at this provider. Everything
|
|
/// else (rate limit, network blip, server 5xx, auth, parse error) is
|
|
/// either transient or fixable; recording a negative entry would
|
|
/// silently suppress retries for hours/days.
|
|
///
|
|
/// Rate-limit (`error.RateLimited`) is excluded here because callers
|
|
/// handle it specially (single retry after backoff). Anything that
|
|
/// reaches this classifier and isn't `NotFound` returns false →
|
|
/// caller returns `FetchFailed` without poisoning the cache.
|
|
pub fn isPermanentProviderFailure(err: anyerror) bool {
|
|
return err == error.NotFound;
|
|
}
|
|
|
|
/// Result of a CUSIP-to-ticker lookup (provider-agnostic).
|
|
pub const CusipResult = OpenFigi.FigiResult;
|
|
|
|
/// Result of an EDGAR ticker-map fallback lookup. Returned by
|
|
/// `DataService.lookupEdgarFallback` so commands consume a
|
|
/// digested shape instead of pulling in `TickerMap` /
|
|
/// `MutualFundTickerEntry` / `CompanyTickerEntry` (those are
|
|
/// provider-internal).
|
|
///
|
|
/// `enrich` uses this to decide what metadata.srf line to emit
|
|
/// when Wikidata had no match for a symbol.
|
|
pub const EdgarLookup = union(enum) {
|
|
/// Symbol matched the EDGAR mutual-fund / managed-fund map.
|
|
/// Generic "Fund" label (the `tickers_funds.srf` file mixes
|
|
/// mutual funds and series-of-trust ETFs; we can't tell
|
|
/// which without digging into submissions metadata).
|
|
managed_fund,
|
|
/// Symbol matched the EDGAR company / UIT map. `title` is
|
|
/// the entry's `title` (e.g. "SPDR S&P 500 ETF TRUST"),
|
|
/// allocated by the service's allocator — caller frees with
|
|
/// `freeEdgarLookup` when done. The `is_etf` flag is set
|
|
/// when the title contains "ETF" or "TRUST" — operating
|
|
/// companies usually have Wikidata coverage and wouldn't
|
|
/// reach this fallback, so a UIT-style hit is almost
|
|
/// certainly an ETF.
|
|
company_or_uit: struct { title: ?[]const u8, is_etf: bool },
|
|
/// Symbol not in either EDGAR map.
|
|
none,
|
|
};
|
|
|
|
/// Free any owned strings inside an `EdgarLookup`. Currently
|
|
/// only `.company_or_uit.title` is owned; `.managed_fund` and
|
|
/// `.none` are no-ops.
|
|
pub fn freeEdgarLookup(allocator: std.mem.Allocator, lookup: EdgarLookup) void {
|
|
switch (lookup) {
|
|
.company_or_uit => |c| if (c.title) |t| allocator.free(t),
|
|
.managed_fund, .none => {},
|
|
}
|
|
}
|
|
|
|
/// Look up `sym` in the supplied EDGAR ticker maps. Pure data
|
|
/// transform; no I/O. Returns the borrowing-shape result.
|
|
///
|
|
/// Both maps may be null (caller failed to load one or both).
|
|
/// A null map produces a `none` result for that pass.
|
|
///
|
|
/// On `.company_or_uit`, the returned `title` is duped from the
|
|
/// underlying entry using `allocator` so the caller can use it
|
|
/// after the maps are freed. Free with `freeEdgarLookup`.
|
|
fn lookupInTickerMaps(
|
|
allocator: std.mem.Allocator,
|
|
sym: []const u8,
|
|
mf_map: ?*const Edgar.TickerMap(Edgar.MutualFundTickerEntry),
|
|
co_map: ?*const Edgar.TickerMap(Edgar.CompanyTickerEntry),
|
|
) EdgarLookup {
|
|
if (mf_map) |m| {
|
|
if (m.get(sym)) |_| return .managed_fund;
|
|
}
|
|
if (co_map) |m| {
|
|
if (m.get(sym)) |entry| {
|
|
const title_owned: ?[]const u8 = if (entry.title) |t|
|
|
allocator.dupe(u8, t) catch null
|
|
else
|
|
null;
|
|
const title_for_check = title_owned orelse "";
|
|
const is_etf =
|
|
std.ascii.indexOfIgnoreCase(title_for_check, "ETF") != null or
|
|
std.ascii.indexOfIgnoreCase(title_for_check, "TRUST") != null;
|
|
return .{ .company_or_uit = .{ .title = title_owned, .is_etf = is_etf } };
|
|
}
|
|
}
|
|
return .none;
|
|
}
|
|
|
|
/// Indicates whether the returned data came from cache or was freshly fetched.
|
|
pub const Source = enum {
|
|
cached,
|
|
fetched,
|
|
};
|
|
|
|
/// In-memory payload shape for a fetched type `T`.
|
|
///
|
|
/// Almost everything is a slice of records (`[]Candle`, `[]Dividend`,
|
|
/// …) — the same shape the cache stores. `EtfProfile` is the lone
|
|
/// exception: `getEtfProfile` assembles a single struct from the
|
|
/// `etf_metrics` cache rather than returning a slice, so its payload
|
|
/// is the struct itself. The cache layer never stores `EtfProfile`
|
|
/// directly, which is why this single-struct knowledge lives here in
|
|
/// the fetch layer rather than in `Store.DataFor`.
|
|
fn PayloadFor(comptime T: type) type {
|
|
return if (T == EtfProfile) EtfProfile else []T;
|
|
}
|
|
|
|
/// Generic result type for all fetch operations: data payload + provenance metadata.
|
|
///
|
|
/// `data` is owned by `allocator` — call `result.deinit()` to release
|
|
/// it (both the outer slice/struct and any nested owned fields). This
|
|
/// replaces the earlier "caller frees with whatever allocator they
|
|
/// happen to have" pattern, which was error-prone when the caller's
|
|
/// allocator (e.g. an arena) differed from the service's allocator.
|
|
pub fn FetchResult(comptime T: type) type {
|
|
return struct {
|
|
data: PayloadFor(T),
|
|
source: Source,
|
|
timestamp: i64,
|
|
/// Allocator that owns `data`. Populated by the service on
|
|
/// every return path; callers use it via `deinit` rather than
|
|
/// touching it directly.
|
|
allocator: std.mem.Allocator,
|
|
|
|
/// Free `data` and any nested owned fields.
|
|
///
|
|
/// Dispatches at comptime:
|
|
/// - If `T` has a `freeSlice` helper (Dividend, OptionsChain),
|
|
/// call it — handles element deinit plus the outer slice.
|
|
/// - Else if `data` is a slice (Candle, Split, EarningsEvent),
|
|
/// do a simple slice free.
|
|
/// - Else if `T` has a `deinit` method (EtfProfile), call it
|
|
/// on the struct itself.
|
|
pub fn deinit(self: @This()) void {
|
|
const DT = @TypeOf(self.data);
|
|
if (@hasDecl(T, "freeSlice")) {
|
|
T.freeSlice(self.allocator, self.data);
|
|
} else if (@typeInfo(DT) == .pointer) {
|
|
self.allocator.free(self.data);
|
|
} else if (@hasDecl(T, "deinit")) {
|
|
self.data.deinit(self.allocator);
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
// ── PostProcess callbacks ────────────────────────────────────
|
|
// `Store.read` parses with `parse_allocator = .{ .allocator = ... }`,
|
|
// so SRF dupes every owned string into the caller's allocator
|
|
// automatically. PostProcess callbacks remain only for non-trivial
|
|
// post-parse logic (e.g. recomputing derived fields). String duping
|
|
// is NOT a valid reason to add a postProcess.
|
|
|
|
/// Recompute surprise/surprise_percent from actual and estimate fields.
|
|
/// SRF only stores actual and estimate; surprise is derived.
|
|
fn earningsPostProcess(ev: *EarningsEvent, _: std.mem.Allocator) anyerror!void {
|
|
if (ev.actual != null and ev.estimate != null) {
|
|
ev.surprise = ev.actual.? - ev.estimate.?;
|
|
if (ev.estimate.? != 0) {
|
|
ev.surprise_percent = (ev.surprise.? / @abs(ev.estimate.?)) * 100.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
pub const DataService = struct {
|
|
/// Thread-safe wrapper over the caller-provided base allocator.
|
|
///
|
|
/// Why this exists: `parallelServerSync` spawns worker threads that
|
|
/// each allocate through `DataService` — HTTP client init, TLS cert
|
|
/// bundle parsing, request/response buffers, and `Store.writeRaw`
|
|
/// path joins. The CLI's root allocator is an `ArenaAllocator`
|
|
/// (`src/main.zig`), which is NOT thread-safe. Unsynchronized
|
|
/// concurrent allocs from workers corrupt the arena's free list.
|
|
/// Symptoms seen in the wild:
|
|
///
|
|
/// thread N panic: reached unreachable code
|
|
/// std/mem/Allocator.zig:147 grow
|
|
/// std/hash_map.zig:1296 addCertsFromFile
|
|
/// std/crypto/Certificate/Bundle.zig:206 request
|
|
/// std/http/Client.zig:1789 request
|
|
/// src/net/http.zig:43 syncFromServer
|
|
///
|
|
/// and bare segfaults mid-heap on whatever pointer the arena
|
|
/// scrambled that run.
|
|
///
|
|
/// The wrapper serializes every allocation with a mutex. Cost is
|
|
/// one lock acquire/release per alloc — negligible next to the I/O
|
|
/// Thread-safe allocator used for all DataService-internal allocations.
|
|
///
|
|
/// In Zig 0.16, the Juicy-Main-provided `init.gpa` (DebugAllocator)
|
|
/// is thread-safe by default when not single-threaded, and
|
|
/// `ArenaAllocator` is thread-safe and lock-free. Callers should
|
|
/// pass whichever thread-safe allocator is appropriate — we no
|
|
/// longer wrap it ourselves.
|
|
///
|
|
/// DO NOT add an "unwrap" method or pass a non-thread-safe
|
|
/// allocator. The point is that internal callers don't need to
|
|
/// know whether they're running under threads — the allocator
|
|
/// itself guarantees safety.
|
|
allocator: std.mem.Allocator,
|
|
io: std.Io,
|
|
config: Config,
|
|
|
|
// Lazily initialized providers (null until first use)
|
|
td: ?TwelveData = null,
|
|
pg: ?Polygon = null,
|
|
fmp: ?Fmp = null,
|
|
cboe: ?Cboe = null,
|
|
yh: ?Yahoo = null,
|
|
tg: ?Tiingo = null,
|
|
wikidata: ?Wikidata = null,
|
|
edgar: ?Edgar = null,
|
|
|
|
/// Test-only guard: when true, any code path that would touch
|
|
/// the network panics with a clear message. Used by offline-mode
|
|
/// tests to verify that `FetchOptions.skip_network = true`
|
|
/// genuinely doesn't reach the network. Default false; never
|
|
/// set in production.
|
|
panic_on_network_attempt: bool = false,
|
|
|
|
pub fn init(io: std.Io, allocator: std.mem.Allocator, config: Config) DataService {
|
|
const self = DataService{
|
|
.allocator = allocator,
|
|
.io = io,
|
|
.config = config,
|
|
};
|
|
// Missing-key warnings are noise under `zig build test` where
|
|
// every test that spins up a DataService re-emits the whole
|
|
// block. Real users always see them at CLI/TUI startup.
|
|
if (!builtin.is_test) self.logMissingKeys();
|
|
return self;
|
|
}
|
|
|
|
/// Log warnings for missing API keys so users know which features are unavailable.
|
|
fn logMissingKeys(self: DataService) void {
|
|
// Primary candle provider
|
|
if (self.config.tiingo_key == null) {
|
|
log.warn("TIINGO_API_KEY not set — candle data will fall back to TwelveData/Yahoo", .{});
|
|
}
|
|
// Dividend/split data
|
|
if (self.config.polygon_key == null) {
|
|
log.warn("POLYGON_API_KEY not set — dividend and split data unavailable", .{});
|
|
}
|
|
// Earnings data
|
|
if (self.config.fmp_key == null) {
|
|
log.warn("FMP_API_KEY not set — earnings data unavailable", .{});
|
|
}
|
|
// ETF profiles + portfolio enrichment now go through public
|
|
// SEC EDGAR + Wikidata. Both require a contact email in
|
|
// outbound User-Agents (SEC's policy).
|
|
if (self.config.user_email == null) {
|
|
log.warn("ZFIN_USER_EMAIL not set — ETF profiles + enrichment unavailable", .{});
|
|
}
|
|
// Candle fallback
|
|
if (self.config.twelvedata_key == null and self.config.tiingo_key == null) {
|
|
log.warn("TWELVEDATA_API_KEY not set — no candle fallback if Yahoo fails", .{});
|
|
}
|
|
// CUSIP lookups
|
|
if (self.config.openfigi_key == null) {
|
|
log.info("OPENFIGI_API_KEY not set — CUSIP lookups will use anonymous rate limits", .{});
|
|
}
|
|
}
|
|
|
|
pub fn deinit(self: *DataService) void {
|
|
if (self.td) |*td| td.deinit();
|
|
if (self.pg) |*pg| pg.deinit();
|
|
if (self.fmp) |*fmp| fmp.deinit();
|
|
if (self.cboe) |*c| c.deinit();
|
|
if (self.yh) |*yh| yh.deinit();
|
|
if (self.tg) |*tg| tg.deinit();
|
|
if (self.wikidata) |*w| w.deinit();
|
|
if (self.edgar) |*e| e.deinit();
|
|
}
|
|
|
|
// ── Provider accessor ──────────────────────────────────────────
|
|
|
|
fn getProvider(self: *DataService, comptime T: type) DataError!*T {
|
|
const field_name = comptime providerField(T);
|
|
if (@field(self, field_name)) |*p| return p;
|
|
if (T == Cboe or T == Yahoo) {
|
|
// CBOE and Yahoo have no API key
|
|
@field(self, field_name) = T.init(self.io, self.allocator);
|
|
} else if (T == Wikidata or T == Edgar) {
|
|
// Open-data providers identified by contact email rather
|
|
// than an API key. The email goes in User-Agent + From
|
|
// headers per each provider's politeness contract.
|
|
const email = self.config.user_email orelse return DataError.NoApiKey;
|
|
@field(self, field_name) = T.init(self.io, self.allocator, email);
|
|
} else {
|
|
// All we're doing here is lower casing the type name, then
|
|
// appending _key to it, so Tiingo -> tiingo_key
|
|
const config_key = comptime blk: {
|
|
const full = @typeName(T);
|
|
var start: usize = 0;
|
|
for (full, 0..) |c, i| {
|
|
if (c == '.') start = i + 1;
|
|
}
|
|
const short = full[start..];
|
|
var buf: [short.len + 4]u8 = undefined;
|
|
_ = std.ascii.lowerString(buf[0..short.len], short);
|
|
@memcpy(buf[short.len..][0..4], "_key");
|
|
break :blk buf[0 .. short.len + 4];
|
|
};
|
|
const key = @field(self.config, config_key) orelse return DataError.NoApiKey;
|
|
@field(self, field_name) = T.init(self.io, self.allocator, key);
|
|
}
|
|
return &@field(self, field_name).?;
|
|
}
|
|
|
|
fn providerField(comptime T: type) []const u8 {
|
|
inline for (std.meta.fields(DataService)) |f| {
|
|
if (f.type == ?T) return f.name;
|
|
}
|
|
@compileError("unknown provider type");
|
|
}
|
|
|
|
// ── Cache helper ─────────────────────────────────────────────
|
|
|
|
fn store(self: *DataService) cache.Store {
|
|
return cache.Store.init(self.io, self.allocator, self.config.cache_dir);
|
|
}
|
|
|
|
/// Generic fetch-or-cache for simple data types (dividends, splits, options).
|
|
/// Checks cache first; on miss, fetches from the appropriate provider,
|
|
/// writes to cache, and returns. On permanent fetch failure, writes a negative
|
|
/// cache entry. Rate limit failures are retried once.
|
|
///
|
|
/// `opts.skip_network = true` → returns cached data even if stale,
|
|
/// returns FetchFailed on cache miss without touching the network.
|
|
/// `opts.force_refresh = true` → treats cache as stale and fetches.
|
|
fn fetchCached(
|
|
self: *DataService,
|
|
comptime T: type,
|
|
symbol: []const u8,
|
|
comptime postProcess: ?*const fn (*T, std.mem.Allocator) anyerror!void,
|
|
opts: FetchOptions,
|
|
) DataError!FetchResult(T) {
|
|
var s = self.store();
|
|
const data_type = comptime cache.Store.dataTypeFor(T);
|
|
|
|
// Force-refresh skips the fresh-cache early return; falls
|
|
// through to provider fetch. Skip-network does the opposite:
|
|
// returns cached even if stale, never touches the network.
|
|
if (!opts.force_refresh) {
|
|
if (s.read(self.allocator, T, symbol, postProcess, .fresh_only)) |cached| {
|
|
log.debug("{s}: {s} fresh in local cache", .{ symbol, @tagName(data_type) });
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
}
|
|
|
|
if (opts.skip_network) {
|
|
// Offline mode: return whatever's cached, even if stale.
|
|
// Cache miss is FetchFailed (not a network error).
|
|
if (s.read(self.allocator, T, symbol, postProcess, .any)) |cached| {
|
|
log.info("{s}: {s} stale-cached returned (skip_network)", .{ symbol, @tagName(data_type) });
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
// Try server sync before hitting providers (skipped on force_refresh).
|
|
if (!opts.force_refresh and self.syncFromServer(symbol, data_type)) {
|
|
if (s.read(self.allocator, T, symbol, postProcess, .fresh_only)) |cached| {
|
|
log.debug("{s}: {s} synced from server and fresh", .{ symbol, @tagName(data_type) });
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
log.debug("{s}: {s} synced from server but stale, falling through to provider", .{ symbol, @tagName(data_type) });
|
|
}
|
|
|
|
log.debug("{s}: fetching {s} from provider", .{ symbol, @tagName(data_type) });
|
|
self.assertNetworkAllowed("fetchCached fetchFromProvider");
|
|
const fetched = self.fetchFromProvider(T, symbol) catch |err| {
|
|
if (err == error.RateLimited) {
|
|
// Wait and retry once
|
|
self.rateLimitBackoff();
|
|
const retried = self.fetchFromProvider(T, symbol) catch {
|
|
return DataError.FetchFailed;
|
|
};
|
|
s.writeWithSource(T, symbol, retried, data_type.ttl(), sourceHintFor(T));
|
|
return .{ .data = retried, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
// Only NotFound (provider says "this symbol genuinely has
|
|
// no data of this type") gets a negative-cache entry.
|
|
// Transient failures (network, 5xx, auth misconfig, parse
|
|
// error) propagate as FetchFailed without poisoning the
|
|
// cache, so the next call retries naturally.
|
|
if (isPermanentProviderFailure(err)) {
|
|
s.writeNegative(symbol, data_type);
|
|
}
|
|
return DataError.FetchFailed;
|
|
};
|
|
|
|
s.writeWithSource(T, symbol, fetched, data_type.ttl(), sourceHintFor(T));
|
|
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
|
|
/// Map the model type fetched via `fetchCached` back to the
|
|
/// provider it came from, so the merge primitive's `info(cache)`
|
|
/// log lines can attribute new entries / field upgrades to a
|
|
/// named source. Returns null for types where the source name
|
|
/// isn't useful (the merge primitive only consults this for
|
|
/// Dividend and Split).
|
|
fn sourceHintFor(comptime T: type) ?[]const u8 {
|
|
return switch (T) {
|
|
Dividend, Split => "polygon",
|
|
else => null,
|
|
};
|
|
}
|
|
|
|
/// Dispatch a fetch to the correct provider based on model type.
|
|
fn fetchFromProvider(self: *DataService, comptime T: type, symbol: []const u8) !cache.Store.DataFor(T) {
|
|
return switch (T) {
|
|
Dividend => {
|
|
// Polygon is the primary source: it carries
|
|
// forward-looking declared dividends (e.g. ARCC's
|
|
// 2026-06-15 ex_date), which Tiingo's price-series
|
|
// response does not. Tiingo opportunistically
|
|
// supplements the cache via `populateAllFromTiingo`
|
|
// when candle fetches happen — that path uses the
|
|
// sorted-union write semantics in
|
|
// `cache.Store.writeMerged`, so Polygon's entries
|
|
// and Tiingo's entries coexist in `dividends.srf`
|
|
// without overwriting each other.
|
|
var pg = try self.getProvider(Polygon);
|
|
return pg.fetchDividends(self.allocator, symbol, null, null);
|
|
},
|
|
Split => {
|
|
// Same rationale as Dividend above. Polygon also
|
|
// carries forward-looking split announcements that
|
|
// Tiingo's price-series doesn't surface.
|
|
var pg = try self.getProvider(Polygon);
|
|
return pg.fetchSplits(self.allocator, symbol);
|
|
},
|
|
OptionsChain => {
|
|
var cboe = try self.getProvider(Cboe);
|
|
return cboe.fetchOptionsChain(self.allocator, symbol);
|
|
},
|
|
else => @compileError("unsupported type for fetchFromProvider"),
|
|
};
|
|
}
|
|
|
|
/// Fetch candles, dividends, and splits from Tiingo in a single
|
|
/// HTTP call and write all three caches. Returns the triple so
|
|
/// the caller can use the data without re-reading from disk.
|
|
///
|
|
/// This is the orchestrated "cold cache" path. `getCandles`
|
|
/// (cold-cache full fetch) calls this so a single Tiingo HTTP
|
|
/// request populates `candles_daily.srf`, `candles_meta.srf`,
|
|
/// `dividends.srf`, and `splits.srf` together. Tiingo's
|
|
/// per-row `divCash` and `splitFactor` make this almost free.
|
|
///
|
|
/// For dividends and splits the writes go through
|
|
/// `writeWithSource` with `"tiingo"` as the source hint. The
|
|
/// underlying `writeMerged` primitive merges Tiingo's view
|
|
/// into whatever's already on disk (typically Polygon-sourced
|
|
/// records), preserving forward-looking entries Polygon
|
|
/// uniquely carries. New entries trigger an `info(cache)` log
|
|
/// line attributing the discovery to Tiingo — useful when
|
|
/// Tiingo surfaces a corporate action Polygon missed (the
|
|
/// canonical case is SPYM's 2017-10-16 4:1 split).
|
|
///
|
|
/// `from` is fixed at 2000-01-01 to cover any 10Y trailing-return
|
|
/// window even when `--as-of` back-dates the reference to the
|
|
/// earliest imported portfolio data (currently 2014). The extra
|
|
/// few years of pre-2004 candles cost ~150 KB per symbol on disk
|
|
/// and a one-time bandwidth bump on cold-cache fetch, both
|
|
/// trivial. Also gives a comfortable buffer for older corporate
|
|
/// actions (e.g. SPYM's 2017-10-16 split, deep-history reverse
|
|
/// splits on legacy tickers).
|
|
fn populateAllFromTiingo(self: *DataService, symbol: []const u8) !@import("providers/tiingo.zig").CandleAndCorporateActions {
|
|
var tg = try self.getProvider(Tiingo);
|
|
const today = fmt.todayDate(self.io);
|
|
const from = Date.fromYmd(2000, 1, 1);
|
|
const triple = try tg.fetchCandlesAndCorporateActions(self.allocator, symbol, from, today);
|
|
|
|
var s = self.store();
|
|
// Candles + meta — `cacheCandles` writes both candles_daily.srf
|
|
// and candles_meta.srf in one shot (last_close, last_date,
|
|
// provider, fail_count=0).
|
|
if (triple.candles.len > 0) {
|
|
s.cacheCandles(symbol, triple.candles, .tiingo, 0);
|
|
}
|
|
// Dividends and splits use the merge write path so Tiingo's
|
|
// view supplements rather than replaces existing (typically
|
|
// Polygon-sourced) records. New entries are logged with
|
|
// "tiingo" attribution.
|
|
s.writeWithSource(Dividend, symbol, triple.dividends, cache.DataType.dividends.ttl(), "tiingo");
|
|
s.writeWithSource(Split, symbol, triple.splits, cache.DataType.splits.ttl(), "tiingo");
|
|
|
|
return triple;
|
|
}
|
|
|
|
/// Invalidate cached data for a symbol so the next get* call forces a fresh fetch.
|
|
pub fn invalidate(self: *DataService, symbol: []const u8, data_type: cache.DataType) void {
|
|
var s = self.store();
|
|
s.clearData(symbol, data_type);
|
|
// Also clear candle metadata when invalidating candle data
|
|
if (data_type == .candles_daily) {
|
|
s.clearData(symbol, .candles_meta);
|
|
}
|
|
}
|
|
|
|
// ── Public data methods ──────────────────────────────────────
|
|
|
|
/// Fetch candles from providers with error classification.
|
|
///
|
|
/// Error handling:
|
|
/// - ServerError/RateLimited/RequestFailed from Tiingo → TransientError (stop refresh, retry later)
|
|
/// - NotFound/ParseError/InvalidResponse from Tiingo → try Yahoo (symbol-level issue)
|
|
/// - Unauthorized → TransientError (config problem, stop refresh)
|
|
///
|
|
/// The `preferred` param controls incremental fetch consistency: use the same
|
|
/// provider that sourced the existing cache data.
|
|
fn fetchCandlesFromProviders(
|
|
self: *DataService,
|
|
symbol: []const u8,
|
|
from: Date,
|
|
to: Date,
|
|
preferred: cache.Store.CandleProvider,
|
|
) (DataError || error{NotFound})!struct { candles: []Candle, provider: cache.Store.CandleProvider } {
|
|
// If preferred is Yahoo (degraded symbol), try Yahoo first
|
|
if (preferred == .yahoo) {
|
|
if (self.getProvider(Yahoo)) |yh| {
|
|
if (yh.fetchCandles(self.allocator, symbol, from, to)) |candles| {
|
|
log.debug("{s}: candles from Yahoo (preferred)", .{symbol});
|
|
return .{ .candles = candles, .provider = .yahoo };
|
|
} else |err| {
|
|
log.warn("{s}: Yahoo (preferred) failed: {s}", .{ symbol, @errorName(err) });
|
|
}
|
|
} else |_| {}
|
|
}
|
|
|
|
// Primary: Tiingo
|
|
if (self.getProvider(Tiingo)) |tg| {
|
|
if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| {
|
|
log.debug("{s}: candles from Tiingo", .{symbol});
|
|
return .{ .candles = candles, .provider = .tiingo };
|
|
} else |err| {
|
|
log.warn("{s}: Tiingo failed: {s}", .{ symbol, @errorName(err) });
|
|
|
|
if (err == error.Unauthorized) {
|
|
log.err("{s}: Tiingo auth failed — check TIINGO_API_KEY", .{symbol});
|
|
return DataError.AuthError;
|
|
}
|
|
|
|
if (err == error.RateLimited) {
|
|
// Rate limited: back off and retry — this is expected, not a failure
|
|
log.info("{s}: Tiingo rate limited, backing off", .{symbol});
|
|
self.rateLimitBackoff();
|
|
if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| {
|
|
log.debug("{s}: candles from Tiingo (after rate limit backoff)", .{symbol});
|
|
return .{ .candles = candles, .provider = .tiingo };
|
|
} else |retry_err| {
|
|
log.warn("{s}: Tiingo retry after backoff failed: {s}", .{ symbol, @errorName(retry_err) });
|
|
if (retry_err == error.RateLimited) {
|
|
// Still rate limited after backoff — one more try
|
|
self.rateLimitBackoff();
|
|
if (tg.fetchCandles(self.allocator, symbol, from, to)) |candles| {
|
|
log.debug("{s}: candles from Tiingo (after second backoff)", .{symbol});
|
|
return .{ .candles = candles, .provider = .tiingo };
|
|
} else |_| {}
|
|
}
|
|
// Exhausted rate limit retries — treat as transient
|
|
return DataError.TransientError;
|
|
}
|
|
}
|
|
|
|
if (isTransientError(err)) {
|
|
// Server error or connection failure — stop, don't fall back
|
|
return DataError.TransientError;
|
|
}
|
|
|
|
// NotFound, ParseError, InvalidResponse — symbol-level issue, try Yahoo
|
|
log.info("{s}: Tiingo does not have this symbol, trying Yahoo", .{symbol});
|
|
}
|
|
} else |_| {
|
|
log.warn("{s}: Tiingo provider not available (no API key?)", .{symbol});
|
|
}
|
|
|
|
// Fallback: Yahoo (symbol not on Tiingo)
|
|
if (preferred != .yahoo) {
|
|
if (self.getProvider(Yahoo)) |yh| {
|
|
if (yh.fetchCandles(self.allocator, symbol, from, to)) |candles| {
|
|
log.info("{s}: candles from Yahoo (Tiingo fallback)", .{symbol});
|
|
return .{ .candles = candles, .provider = .yahoo };
|
|
} else |err| {
|
|
log.warn("{s}: Yahoo fallback also failed: {s}", .{ symbol, @errorName(err) });
|
|
}
|
|
} else |_| {
|
|
log.warn("{s}: Yahoo provider not available", .{symbol});
|
|
}
|
|
}
|
|
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
/// Classify whether a provider error is transient (provider is down).
|
|
/// ServerError = HTTP 5xx, RequestFailed = connection/network failure.
|
|
/// Note: RateLimited and Unauthorized are handled separately.
|
|
fn isTransientError(err: anyerror) bool {
|
|
return err == error.ServerError or
|
|
err == error.RequestFailed;
|
|
}
|
|
|
|
/// Centralized "are we about to touch the network?" gate. Tests
|
|
/// set `panic_on_network_attempt` to assert that offline-mode
|
|
/// paths never reach this site. Production callers always pass.
|
|
/// Inline so the panic body is only generated when the field is
|
|
/// actually checked (no overhead on the false branch).
|
|
inline fn assertNetworkAllowed(self: *DataService, context: []const u8) void {
|
|
if (self.panic_on_network_attempt) {
|
|
std.debug.panic("network attempted in offline-mode test: {s}", .{context});
|
|
}
|
|
}
|
|
|
|
/// Fetch daily candles for a symbol (10+ years for trailing returns).
|
|
/// Checks cache first; fetches from Tiingo (primary) or Yahoo (fallback) if stale/missing.
|
|
/// Uses incremental updates: when the cache is stale, only fetches
|
|
/// candles newer than the last cached date rather than re-fetching
|
|
/// the entire history.
|
|
///
|
|
/// `opts.skip_network = true` → returns cached data even if stale,
|
|
/// returns FetchFailed on cache miss without touching the network.
|
|
/// `opts.force_refresh = true` → treats cache as stale and fetches.
|
|
pub fn getCandles(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Candle) {
|
|
var s = self.store();
|
|
const today = fmt.todayDate(self.io);
|
|
|
|
// Check candle metadata for freshness (tiny file, no candle deserialization)
|
|
const meta_result = s.readCandleMeta(symbol);
|
|
if (meta_result) |mr| {
|
|
const m = mr.meta;
|
|
|
|
// Offline mode: return cached data without touching the
|
|
// network. Cache miss / TwelveData-only cache is treated
|
|
// as unavailable.
|
|
if (opts.skip_network) {
|
|
if (m.provider == .twelvedata) {
|
|
log.debug("{s}: skip_network and only TwelveData cached — treating as unavailable", .{symbol});
|
|
return DataError.FetchFailed;
|
|
}
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r| {
|
|
if (!s.isCandleMetaFresh(symbol)) {
|
|
log.info("{s}: candles stale-cached returned (skip_network)", .{symbol});
|
|
}
|
|
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
|
|
}
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
// If cached data is from TwelveData (deprecated for candles due to
|
|
// unreliable adj_close), skip cache and fall through to full re-fetch.
|
|
if (m.provider == .twelvedata) {
|
|
log.debug("{s}: cached candles from TwelveData — forcing full re-fetch", .{symbol});
|
|
} else if (!opts.force_refresh and s.isCandleMetaFresh(symbol)) {
|
|
// Fresh — deserialize candles and return
|
|
log.debug("{s}: candles fresh in local cache", .{symbol});
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
|
|
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
|
|
} else {
|
|
// Stale — try server sync before incremental fetch.
|
|
// (Force-refresh skips server sync too: the user explicitly
|
|
// asked for fresh provider data.)
|
|
if (!opts.force_refresh and self.syncCandlesFromServer(symbol)) {
|
|
if (s.isCandleMetaFresh(symbol)) {
|
|
log.debug("{s}: candles synced from server and fresh", .{symbol});
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
|
|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
log.debug("{s}: candles synced from server but stale, falling through to incremental fetch", .{symbol});
|
|
}
|
|
|
|
// Stale — try incremental update using last_date from meta
|
|
const fetch_from = m.last_date.addDays(1);
|
|
|
|
// If last cached date is today or later, just refresh the TTL (meta only)
|
|
if (!fetch_from.lessThan(today)) {
|
|
s.updateCandleMeta(symbol, m.last_close, m.last_date, m.provider, m.fail_count);
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
|
|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
} else {
|
|
// Incremental fetch from day after last cached candle
|
|
self.assertNetworkAllowed("getCandles incremental fetchCandlesFromProviders");
|
|
const result = self.fetchCandlesFromProviders(symbol, fetch_from, today, m.provider) catch |err| {
|
|
if (err == DataError.TransientError) {
|
|
// Increment fail_count for this symbol
|
|
const new_fail_count = m.fail_count +| 1; // saturating add
|
|
log.warn("{s}: transient failure (fail_count now {d})", .{ symbol, new_fail_count });
|
|
s.updateCandleMeta(symbol, m.last_close, m.last_date, m.provider, new_fail_count);
|
|
|
|
// If degraded (fail_count >= 3), return stale data rather than failing
|
|
if (new_fail_count >= 3) {
|
|
log.warn("{s}: degraded after {d} consecutive failures, returning stale data", .{ symbol, new_fail_count });
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
|
|
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
|
|
}
|
|
return DataError.TransientError;
|
|
}
|
|
// Non-transient failure — return stale data if available
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
|
|
return .{ .data = r.data, .source = .cached, .timestamp = mr.created, .allocator = self.allocator };
|
|
return DataError.FetchFailed;
|
|
};
|
|
const new_candles = result.candles;
|
|
|
|
if (new_candles.len == 0) {
|
|
// No new candles (weekend/holiday) — refresh TTL, reset fail_count
|
|
self.allocator.free(new_candles);
|
|
s.updateCandleMeta(symbol, m.last_close, m.last_date, result.provider, 0);
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
|
|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
} else {
|
|
// Append new candles to existing file + update meta, reset fail_count
|
|
s.appendCandles(symbol, new_candles, result.provider, 0);
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r| {
|
|
self.allocator.free(new_candles);
|
|
return .{ .data = r.data, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
return .{ .data = new_candles, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Offline mode + no usable cache — give up.
|
|
if (opts.skip_network) {
|
|
log.debug("{s}: skip_network and no cached candles — unavailable", .{symbol});
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
// No usable cache — try server sync first (skipped on force_refresh).
|
|
if (!opts.force_refresh and self.syncCandlesFromServer(symbol)) {
|
|
if (s.isCandleMetaFresh(symbol)) {
|
|
log.debug("{s}: candles synced from server and fresh (no prior cache)", .{symbol});
|
|
if (s.read(self.allocator, Candle, symbol, null, .any)) |r|
|
|
return .{ .data = r.data, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
log.debug("{s}: candles synced from server but stale, falling through to full fetch", .{symbol});
|
|
}
|
|
|
|
// No usable cache — full fetch via the orchestrated Tiingo
|
|
// helper, which writes candles + dividends + splits caches in
|
|
// one shot from a single HTTP response. The fixed start date
|
|
// (see `populateAllFromTiingo`) is 2000-01-01, deep enough to
|
|
// cover a 10Y trailing-return window even when `--as-of`
|
|
// back-dates the reference into 2014-era imported portfolio
|
|
// history, plus a buffer for older corporate actions like
|
|
// SPYM's 2017-10-16 split.
|
|
log.debug("{s}: fetching full candle history from provider", .{symbol});
|
|
self.assertNetworkAllowed("getCandles full populateAllFromTiingo");
|
|
|
|
const triple = self.populateAllFromTiingo(symbol) catch |err| {
|
|
if (err == error.RateLimited or err == error.ServerError or err == error.RequestFailed) {
|
|
// Transient: increment fail_count on existing meta so
|
|
// we know to back off if this keeps happening.
|
|
if (meta_result) |mr| {
|
|
const new_fail_count = mr.meta.fail_count +| 1;
|
|
s.updateCandleMeta(symbol, mr.meta.last_close, mr.meta.last_date, mr.meta.provider, new_fail_count);
|
|
}
|
|
return DataError.TransientError;
|
|
}
|
|
// NotFound, ParseError, InvalidResponse, AuthError —
|
|
// symbol genuinely has no candle data on Tiingo (the only
|
|
// provider for historical candles since the 2026-05
|
|
// audit). Negative-cache so we don't keep retrying.
|
|
s.writeNegative(symbol, .candles_daily);
|
|
return DataError.FetchFailed;
|
|
};
|
|
// populateAllFromTiingo writes all three caches itself; we
|
|
// free the slices we don't return.
|
|
defer Dividend.freeSlice(self.allocator, triple.dividends);
|
|
defer self.allocator.free(triple.splits);
|
|
|
|
return .{ .data = triple.candles, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
|
|
/// Fetch dividend history for a symbol.
|
|
pub fn getDividends(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Dividend) {
|
|
return self.fetchCached(Dividend, symbol, null, opts);
|
|
}
|
|
|
|
/// Fetch split history for a symbol.
|
|
pub fn getSplits(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Split) {
|
|
return self.fetchCached(Split, symbol, null, opts);
|
|
}
|
|
|
|
/// Fetch options chain for a symbol (all expirations, no API key needed).
|
|
pub fn getOptions(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(OptionsChain) {
|
|
return self.fetchCached(OptionsChain, symbol, null, opts);
|
|
}
|
|
|
|
/// Fetch earnings history for a symbol.
|
|
/// Checks cache first; fetches from FMP if stale/missing.
|
|
/// Smart refresh: even if cache is fresh, re-fetches when a past earnings
|
|
/// date has no actual results yet (i.e. results just came out).
|
|
///
|
|
/// `opts.skip_network = true` → returns cached data even if stale,
|
|
/// returns FetchFailed on cache miss without touching the network.
|
|
/// `opts.force_refresh = true` → treats cache as stale and fetches.
|
|
pub fn getEarnings(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(EarningsEvent) {
|
|
// Mutual funds (5-letter tickers ending in X) don't have quarterly earnings.
|
|
if (isMutualFund(symbol)) {
|
|
return .{ .data = &.{}, .source = .cached, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
|
|
var s = self.store();
|
|
const today = fmt.todayDate(self.io);
|
|
|
|
if (!opts.force_refresh) {
|
|
if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .fresh_only)) |cached| {
|
|
// Check if any past/today earnings event is still missing actual results.
|
|
// If so, the announcement likely just happened — force a refresh.
|
|
// (Suppressed when opts.skip_network — offline mode never refetches.)
|
|
const needs_refresh = if (opts.skip_network) false else for (cached.data) |ev| {
|
|
if (ev.actual == null and !today.lessThan(ev.date)) break true;
|
|
} else false;
|
|
|
|
if (!needs_refresh) {
|
|
log.debug("{s}: earnings fresh in local cache", .{symbol});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
// Stale: free cached events and re-fetch below
|
|
self.allocator.free(cached.data);
|
|
}
|
|
}
|
|
|
|
if (opts.skip_network) {
|
|
// Offline mode: fall back to any cached entry (even stale) before giving up.
|
|
if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .any)) |cached| {
|
|
log.info("{s}: earnings stale-cached returned (skip_network)", .{symbol});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
// Try server sync before hitting FMP (skipped on force_refresh).
|
|
if (!opts.force_refresh and self.syncFromServer(symbol, .earnings)) {
|
|
if (s.read(self.allocator, EarningsEvent, symbol, earningsPostProcess, .fresh_only)) |cached| {
|
|
log.debug("{s}: earnings synced from server and fresh", .{symbol});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
log.debug("{s}: earnings synced from server but stale, falling through to provider", .{symbol});
|
|
}
|
|
|
|
log.debug("{s}: fetching earnings from provider", .{symbol});
|
|
self.assertNetworkAllowed("getEarnings fmp.fetchEarnings");
|
|
var fmp = try self.getProvider(Fmp);
|
|
|
|
const fetched = fmp.fetchEarnings(self.allocator, symbol) catch |err| blk: {
|
|
if (err == error.RateLimited) {
|
|
self.rateLimitBackoff();
|
|
break :blk fmp.fetchEarnings(self.allocator, symbol) catch {
|
|
return DataError.FetchFailed;
|
|
};
|
|
}
|
|
if (isPermanentProviderFailure(err)) {
|
|
s.writeNegative(symbol, .earnings);
|
|
}
|
|
return DataError.FetchFailed;
|
|
};
|
|
|
|
s.write(EarningsEvent, symbol, fetched, .{ .seconds = cache.Ttl.earnings });
|
|
|
|
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
|
|
/// Fetch ETF profile for a symbol. Assembles a unified
|
|
/// `EtfProfile` view from the EDGAR `etf_metrics` cache (profile
|
|
/// + sectors + holdings) plus the Wikidata `classification`
|
|
/// cache (inception_date, fund name fallback). Both underlying
|
|
/// caches are managed by `getEtfMetrics` / `getClassification`;
|
|
/// this function does not maintain its own cache.
|
|
///
|
|
/// Several legacy fields that AlphaVantage used to populate
|
|
/// (`expense_ratio`, `dividend_yield`, `portfolio_turnover`,
|
|
/// `leveraged`) remain on `EtfProfile` but stay null here —
|
|
/// EDGAR NPORT-P doesn't carry them. They'll fill in once a
|
|
/// prospectus parser lands.
|
|
///
|
|
/// `opts.skip_network = true` and `opts.force_refresh = true`
|
|
/// are forwarded to `getEtfMetrics`.
|
|
pub fn getEtfProfile(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(EtfProfile) {
|
|
// Primary source: EDGAR ETF metrics. If the symbol isn't a
|
|
// fund (or isn't in EDGAR), surface NotFound to the caller —
|
|
// matches the old AlphaVantage behavior of returning empty
|
|
// profiles for non-ETFs.
|
|
const metrics = try self.getEtfMetrics(symbol, opts);
|
|
defer metrics.deinit();
|
|
|
|
// Walk the EtfMetricRecord slice to extract profile + sectors
|
|
// + holdings. The slice shape is "one .profile, then N
|
|
// .sector, then M .holding" per `appendEtfMetricRecords`.
|
|
var name: ?[]const u8 = null;
|
|
errdefer if (name) |n| self.allocator.free(n);
|
|
var net_assets: ?f64 = null;
|
|
var sectors_buf: std.ArrayList(SectorWeight) = .empty;
|
|
errdefer {
|
|
for (sectors_buf.items) |s| self.allocator.free(s.name);
|
|
sectors_buf.deinit(self.allocator);
|
|
}
|
|
var holdings_buf: std.ArrayList(Holding) = .empty;
|
|
errdefer {
|
|
for (holdings_buf.items) |h| {
|
|
self.allocator.free(h.name);
|
|
if (h.symbol) |s| self.allocator.free(s);
|
|
if (h.cusip) |c| self.allocator.free(c);
|
|
}
|
|
holdings_buf.deinit(self.allocator);
|
|
}
|
|
|
|
for (metrics.data) |rec| switch (rec) {
|
|
.profile => |p| {
|
|
if (p.series_name) |sn| name = try self.allocator.dupe(u8, sn);
|
|
net_assets = p.net_assets;
|
|
},
|
|
.sector => |s| {
|
|
try sectors_buf.append(self.allocator, .{
|
|
.name = try self.allocator.dupe(u8, s.description),
|
|
.weight = s.pct_of_portfolio / 100.0,
|
|
});
|
|
},
|
|
.holding => |h| {
|
|
const sym_dup: ?[]const u8 = if (h.ticker) |t|
|
|
try self.allocator.dupe(u8, t)
|
|
else
|
|
null;
|
|
errdefer if (sym_dup) |s| self.allocator.free(s);
|
|
const cusip_dup: ?[]const u8 = if (h.cusip) |c|
|
|
try self.allocator.dupe(u8, c)
|
|
else
|
|
null;
|
|
errdefer if (cusip_dup) |c| self.allocator.free(c);
|
|
const name_dup = try self.allocator.dupe(u8, h.name);
|
|
errdefer self.allocator.free(name_dup);
|
|
try holdings_buf.append(self.allocator, .{
|
|
.symbol = sym_dup,
|
|
.name = name_dup,
|
|
.weight = h.pct_of_portfolio / 100.0,
|
|
.cusip = cusip_dup,
|
|
});
|
|
},
|
|
};
|
|
|
|
// Wikidata classification provides inception_date and a
|
|
// higher-quality name. Best-effort: if the fetch fails we
|
|
// still return the EDGAR-only profile.
|
|
var inception_date: ?Date = null;
|
|
if (self.getClassification(symbol, opts)) |class_result| {
|
|
defer class_result.deinit();
|
|
for (class_result.data) |c| {
|
|
if (c.inception_date) |idate_str| {
|
|
if (Date.parse(idate_str)) |d| inception_date = d else |_| {}
|
|
}
|
|
// Prefer Wikidata's name if EDGAR didn't provide one.
|
|
if (name == null) {
|
|
if (c.name) |n| name = try self.allocator.dupe(u8, n);
|
|
}
|
|
}
|
|
} else |_| {}
|
|
|
|
const sectors_count = sectors_buf.items.len;
|
|
const holdings_count = holdings_buf.items.len;
|
|
const profile: EtfProfile = .{
|
|
.symbol = try self.allocator.dupe(u8, symbol),
|
|
.name = name,
|
|
.net_assets = net_assets,
|
|
.holdings = if (holdings_count > 0)
|
|
try holdings_buf.toOwnedSlice(self.allocator)
|
|
else
|
|
null,
|
|
.total_holdings = if (holdings_count > 0) @intCast(holdings_count) else null,
|
|
.sectors = if (sectors_count > 0)
|
|
try sectors_buf.toOwnedSlice(self.allocator)
|
|
else
|
|
null,
|
|
.inception_date = inception_date,
|
|
};
|
|
|
|
// Free the empty ArrayLists we didn't consume via toOwnedSlice
|
|
// (they own no allocations but the ArrayList struct itself
|
|
// needs deinit when not handed off).
|
|
if (holdings_count == 0) holdings_buf.deinit(self.allocator);
|
|
if (sectors_count == 0) sectors_buf.deinit(self.allocator);
|
|
|
|
return .{
|
|
.data = profile,
|
|
.source = metrics.source,
|
|
.timestamp = metrics.timestamp,
|
|
.allocator = self.allocator,
|
|
};
|
|
}
|
|
|
|
// ── Wikidata + EDGAR providers ─────────────────────────────────
|
|
|
|
/// Fetch the Wikidata classification record for a single symbol
|
|
/// (name, sector, industry, country, inception date, CIK,
|
|
/// instance-of). Cache-first; on miss, runs a 1-symbol batched
|
|
/// SPARQL query.
|
|
///
|
|
/// `opts.skip_network = true` returns cached data even if stale,
|
|
/// `FetchFailed` on cache miss. `opts.force_refresh = true`
|
|
/// ignores the cache and re-fetches.
|
|
pub fn getClassification(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Wikidata.ClassificationRecord) {
|
|
var s = self.store();
|
|
|
|
if (!opts.force_refresh) {
|
|
if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| {
|
|
log.debug("{s}: classification fresh in local cache", .{symbol});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
}
|
|
|
|
if (opts.skip_network) {
|
|
if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .any)) |cached| {
|
|
log.info("{s}: classification stale-cached returned (skip_network)", .{symbol});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
// Try server sync before hitting Wikidata.
|
|
if (!opts.force_refresh and self.syncFromServer(symbol, .classification)) {
|
|
if (s.read(self.allocator, Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| {
|
|
log.debug("{s}: classification synced from server", .{symbol});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
}
|
|
|
|
log.debug("{s}: fetching classification from Wikidata", .{symbol});
|
|
self.assertNetworkAllowed("getClassification wikidata.fetch");
|
|
var wd = try self.getProvider(Wikidata);
|
|
|
|
const symbols = [_][]const u8{symbol};
|
|
const fetched = wd.fetch(self.allocator, &symbols) catch |err| {
|
|
if (err == error.RateLimited) {
|
|
self.rateLimitBackoff();
|
|
if (wd.fetch(self.allocator, &symbols)) |retried| {
|
|
return self.finalizeClassification(symbol, retried, opts);
|
|
} else |_| {}
|
|
}
|
|
log.warn("{s}: wikidata fetch failed: {s}", .{ symbol, @errorName(err) });
|
|
return DataError.FetchFailed;
|
|
};
|
|
|
|
return self.finalizeClassification(symbol, fetched, opts);
|
|
}
|
|
|
|
/// Common post-Wikidata path: decide if the result is useful as
|
|
/// returned, otherwise consult EDGAR to fill in the gaps,
|
|
/// otherwise negative-cache. Either way the cache gets written
|
|
/// and a `FetchResult` is returned (or `DataError.NotFound`).
|
|
///
|
|
/// Takes ownership of `wikidata_records`. The slice is either
|
|
/// returned as the result data, freed and replaced by a
|
|
/// synthesized slice, or freed and the symbol negative-cached.
|
|
fn finalizeClassification(
|
|
self: *DataService,
|
|
symbol: []const u8,
|
|
wikidata_records: []Wikidata.ClassificationRecord,
|
|
opts: FetchOptions,
|
|
) DataError!FetchResult(Wikidata.ClassificationRecord) {
|
|
var s = self.store();
|
|
const ttl = cache.DataType.classification.ttl();
|
|
|
|
// Wikidata returned a useful row -> populate geo from
|
|
// geoFor(country) and cache as-is.
|
|
if (wikidata_records.len > 0 and wikidataLooksUseful(wikidata_records[0])) {
|
|
try self.populateGeo(&wikidata_records[0]);
|
|
s.write(Wikidata.ClassificationRecord, symbol, wikidata_records, ttl);
|
|
return .{ .data = wikidata_records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
|
|
// Sparse or empty: try EDGAR fallback. `synthesizeClassification`
|
|
// takes ownership of the wikidata slice (frees it, returns a
|
|
// new one-element slice with the merged record). Returns
|
|
// `error.NotFound` when even EDGAR has nothing.
|
|
const merged = self.synthesizeClassification(symbol, wikidata_records, opts) catch |err| {
|
|
if (err == error.NotFound) {
|
|
s.writeNegative(symbol, .classification);
|
|
return DataError.NotFound;
|
|
}
|
|
return DataError.FetchFailed;
|
|
};
|
|
|
|
s.write(Wikidata.ClassificationRecord, symbol, merged, ttl);
|
|
return .{ .data = merged, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
|
|
/// Populate `record.geo` from `geoFor(record.country)` when it
|
|
/// isn't already set. Best-effort: if duping the geo string
|
|
/// fails, leaves the field null and propagates the error so the
|
|
/// caller can decide whether to bail.
|
|
fn populateGeo(self: *DataService, record: *Wikidata.ClassificationRecord) !void {
|
|
if (record.geo != null) return;
|
|
const country = record.country orelse return;
|
|
const g = classification.geoFor(country);
|
|
if (std.mem.eql(u8, g, classification.geo.unknown)) return;
|
|
record.geo = try self.allocator.dupe(u8, g);
|
|
}
|
|
|
|
/// Whether a Wikidata classification record carries enough
|
|
/// downstream-usable data to skip the EDGAR fallback. A record
|
|
/// with at least one of `is_etf`, `sector`, `country`, or
|
|
/// `asset_class` set is "useful"; sparse records (e.g. SOXX
|
|
/// getting only a `name` from Wikidata) need the EDGAR
|
|
/// ticker-map fallback to fill in `is_etf=true,
|
|
/// asset_class=ETF, country=US`.
|
|
fn wikidataLooksUseful(c: Wikidata.ClassificationRecord) bool {
|
|
if (c.is_etf) return true;
|
|
if (c.asset_class != null) return true;
|
|
if (c.country != null) return true;
|
|
if (c.sector != null) return true;
|
|
return false;
|
|
}
|
|
|
|
/// Synthesize a `ClassificationRecord` for a symbol that
|
|
/// Wikidata couldn't classify usefully. Consults the EDGAR
|
|
/// ticker maps; if found, also fetches `getEtfMetrics` to
|
|
/// recover the NPORT-P series_name (more authoritative than
|
|
/// the company_tickers title). Title-keyword inference fills
|
|
/// in `sector` and `geo` when the name carries an unambiguous
|
|
/// keyword.
|
|
///
|
|
/// Takes ownership of `wikidata_records`: frees them at exit.
|
|
/// Wikidata's `name`/`industry`/`inception_date`/`cik` fields
|
|
/// are preserved into the synthesized record when present.
|
|
/// Returns `error.NotFound` when EDGAR has nothing either.
|
|
fn synthesizeClassification(
|
|
self: *DataService,
|
|
symbol: []const u8,
|
|
wikidata_records: []Wikidata.ClassificationRecord,
|
|
opts: FetchOptions,
|
|
) !cache.Store.DataFor(Wikidata.ClassificationRecord) {
|
|
defer Wikidata.ClassificationRecord.freeSlice(self.allocator, wikidata_records);
|
|
|
|
const lookup = self.lookupEdgarFallback(symbol, opts);
|
|
defer freeEdgarLookup(self.allocator, lookup);
|
|
if (lookup == .none) return error.NotFound;
|
|
|
|
// For ETF/fund hits, try to get the richer series_name from
|
|
// NPORT-P. Cache hit is cheap; cache miss triggers an EDGAR
|
|
// fetch but is bounded by EDGAR's rate limiter. If the call
|
|
// fails (e.g. money-market funds with no NPORT-P), we fall
|
|
// back to the ticker-map title.
|
|
var etf_metrics_result: ?FetchResult(Edgar.EtfMetricRecord) = null;
|
|
defer if (etf_metrics_result) |*r| r.deinit();
|
|
etf_metrics_result = self.getEtfMetrics(symbol, opts) catch null;
|
|
|
|
// Extract series_name and cik from the etf_metrics profile row.
|
|
var series_name: ?[]const u8 = null;
|
|
var etf_cik: ?[]const u8 = null;
|
|
if (etf_metrics_result) |r| {
|
|
for (r.data) |rec| switch (rec) {
|
|
.profile => |p| {
|
|
if (p.series_name) |sn| series_name = sn;
|
|
etf_cik = p.cik;
|
|
break;
|
|
},
|
|
else => {},
|
|
};
|
|
}
|
|
|
|
// Pull whatever Wikidata's sparse record carried so we
|
|
// don't lose data on the merge.
|
|
const wd: ?Wikidata.ClassificationRecord = if (wikidata_records.len > 0) wikidata_records[0] else null;
|
|
|
|
// Pick the best name source: NPORT-P series_name >
|
|
// EDGAR ticker-map title > Wikidata name > nothing.
|
|
//
|
|
// We're on the EDGAR-fallback path because Wikidata's
|
|
// record was sparse. For funds, Wikidata's `name` (when
|
|
// present) is frequently the underlying INDEX rather than
|
|
// the FUND itself -- e.g. SOXX's Wikidata `name` is "PHLX
|
|
// Semiconductor Sector" but the fund is "iShares
|
|
// Semiconductor ETF" per NPORT-P seriesName. Prefer the
|
|
// fund-authoritative source so downstream comments and
|
|
// labels show the fund name, not the index name.
|
|
const ticker_title: ?[]const u8 = switch (lookup) {
|
|
.company_or_uit => |c| c.title,
|
|
else => null,
|
|
};
|
|
const best_name: ?[]const u8 = blk: {
|
|
if (series_name) |n| break :blk n;
|
|
if (ticker_title) |n| break :blk n;
|
|
if (wd) |w| {
|
|
if (w.name) |n| break :blk n;
|
|
}
|
|
break :blk null;
|
|
};
|
|
|
|
// Name source for title-keyword inference: prefer the
|
|
// most-authoritative source for fund-style classification
|
|
// even when Wikidata supplied a (different) name. Wikidata's
|
|
// name for a fund is often less informative than NPORT-P's
|
|
// seriesName (e.g. SOXX's Wikidata name is "PHLX
|
|
// Semiconductor Sector" which is the index name, not the
|
|
// fund name).
|
|
const inference_name: ?[]const u8 = series_name orelse ticker_title orelse if (wd) |w| w.name else null;
|
|
|
|
const inferred_sector = classification.inferSectorFromTitle(inference_name);
|
|
const inferred_geo = classification.inferGeoFromTitle(inference_name);
|
|
|
|
// `is_etf` here means "this is fund-shaped, emit multi-row
|
|
// breakdown" -- true for ANY EDGAR-found symbol. The
|
|
// `tickers_funds.srf` map mixes mutual funds and
|
|
// series-of-trust ETFs alike. The `tickers_companies.srf`
|
|
// map carries operating companies, closed-end funds, and
|
|
// UITs; operating companies usually have Wikidata coverage
|
|
// and wouldn't reach this fallback, so anything that
|
|
// dropped here is also fund-shaped (e.g. PIMCO closed-end
|
|
// funds whose title says "FUND" but not "ETF" or "TRUST").
|
|
//
|
|
// The ETF/TRUST keyword in the title still drives the
|
|
// asset_class label below ("ETF" vs "Fund"), but the
|
|
// fund-shaped routing decision applies regardless.
|
|
const is_etf = true;
|
|
const asset_class: []const u8 = switch (lookup) {
|
|
.managed_fund => "Fund",
|
|
.company_or_uit => |c| if (c.is_etf) "ETF" else "Fund",
|
|
.none => unreachable,
|
|
};
|
|
|
|
// Country: prefer Wikidata's. Default to "US" for
|
|
// EDGAR-found symbols (they're SEC filers).
|
|
const country_str: []const u8 = if (wd) |w| (w.country orelse "US") else "US";
|
|
|
|
// Sector: prefer Wikidata's existing sector (rare in this
|
|
// sparse-fallback path), else fall back to inferred.
|
|
const sector_str: ?[]const u8 = blk: {
|
|
if (wd) |w| {
|
|
if (w.sector) |sec| break :blk sec;
|
|
}
|
|
break :blk inferred_sector;
|
|
};
|
|
|
|
// CIK: prefer Wikidata's, fall back to NPORT-P's.
|
|
const cik_str: ?[]const u8 = blk: {
|
|
if (wd) |w| {
|
|
if (w.cik) |c| break :blk c;
|
|
}
|
|
if (etf_cik) |c| break :blk c;
|
|
break :blk null;
|
|
};
|
|
|
|
// Geo: prefer the Wikidata-derived geo (computed from
|
|
// `geoFor(country)` against the country code), else use
|
|
// title-keyword inference. Default to "US" when neither
|
|
// is available -- EDGAR-found symbols are SEC filers.
|
|
const geo_str: []const u8 = blk: {
|
|
if (wd) |w| {
|
|
if (w.country) |c| {
|
|
const g = classification.geoFor(c);
|
|
if (!std.mem.eql(u8, g, classification.geo.unknown)) break :blk g;
|
|
}
|
|
}
|
|
if (inferred_geo) |g| break :blk g;
|
|
break :blk classification.geo.us;
|
|
};
|
|
|
|
const today = fmt.todayDate(self.io);
|
|
var as_of_buf: [10]u8 = undefined;
|
|
const as_of_str = try std.fmt.bufPrint(&as_of_buf, "{f}", .{today});
|
|
|
|
// Allocate each owned field up front with its own errdefer
|
|
// so a partial-build on OOM doesn't leak the earlier
|
|
// successful dupes. Once all dupes succeed we assemble the
|
|
// record (no fallible ops below this point).
|
|
const symbol_owned = try self.allocator.dupe(u8, symbol);
|
|
errdefer self.allocator.free(symbol_owned);
|
|
const name_owned: ?[]const u8 = if (best_name) |n| try self.allocator.dupe(u8, n) else null;
|
|
errdefer if (name_owned) |s| self.allocator.free(s);
|
|
const sector_owned: ?[]const u8 = if (sector_str) |s| try self.allocator.dupe(u8, s) else null;
|
|
errdefer if (sector_owned) |s| self.allocator.free(s);
|
|
const industry_owned: ?[]const u8 = if (wd) |w|
|
|
(if (w.industry) |i| try self.allocator.dupe(u8, i) else null)
|
|
else
|
|
null;
|
|
errdefer if (industry_owned) |s| self.allocator.free(s);
|
|
const country_owned = try self.allocator.dupe(u8, country_str);
|
|
errdefer self.allocator.free(country_owned);
|
|
const geo_owned = try self.allocator.dupe(u8, geo_str);
|
|
errdefer self.allocator.free(geo_owned);
|
|
const asset_class_owned = try self.allocator.dupe(u8, asset_class);
|
|
errdefer self.allocator.free(asset_class_owned);
|
|
const inception_owned: ?[]const u8 = if (wd) |w|
|
|
(if (w.inception_date) |i| try self.allocator.dupe(u8, i) else null)
|
|
else
|
|
null;
|
|
errdefer if (inception_owned) |s| self.allocator.free(s);
|
|
const cik_owned: ?[]const u8 = if (cik_str) |c| try self.allocator.dupe(u8, c) else null;
|
|
errdefer if (cik_owned) |s| self.allocator.free(s);
|
|
const as_of_owned = try self.allocator.dupe(u8, as_of_str);
|
|
errdefer self.allocator.free(as_of_owned);
|
|
const source_owned = try self.allocator.dupe(u8, "edgar_fallback");
|
|
errdefer self.allocator.free(source_owned);
|
|
|
|
const result = try self.allocator.alloc(Wikidata.ClassificationRecord, 1);
|
|
result[0] = .{
|
|
.symbol = symbol_owned,
|
|
.name = name_owned,
|
|
.sector = sector_owned,
|
|
.industry = industry_owned,
|
|
.country = country_owned,
|
|
.geo = geo_owned,
|
|
.asset_class = asset_class_owned,
|
|
.is_etf = is_etf,
|
|
.inception_date = inception_owned,
|
|
.cik = cik_owned,
|
|
.as_of = as_of_owned,
|
|
.source = source_owned,
|
|
};
|
|
return result;
|
|
}
|
|
|
|
/// Fetch XBRL-derived entity facts for a CIK (currently
|
|
/// shares-outstanding; extensible to revenue / net income / EPS
|
|
/// as new variants are added to `Edgar.EntityFactRecord`).
|
|
///
|
|
/// CIK is the cache key — the file lives at
|
|
/// `<cache_dir>/<cik>/entity_facts.srf`. A single dual-class
|
|
/// issuer (BRK.A / BRK.B) shares one entity_facts file because
|
|
/// both class symbols resolve to the same CIK.
|
|
pub fn getEntityFacts(self: *DataService, cik: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EntityFactRecord) {
|
|
var s = self.store();
|
|
|
|
if (!opts.force_refresh) {
|
|
if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| {
|
|
log.debug("CIK {s}: entity_facts fresh in local cache", .{cik});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
}
|
|
|
|
if (opts.skip_network) {
|
|
if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .any)) |cached| {
|
|
log.info("CIK {s}: entity_facts stale-cached returned (skip_network)", .{cik});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
if (!opts.force_refresh and self.syncFromServer(cik, .entity_facts)) {
|
|
if (s.read(self.allocator, Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| {
|
|
log.debug("CIK {s}: entity_facts synced from server", .{cik});
|
|
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
|
|
}
|
|
}
|
|
|
|
log.debug("CIK {s}: fetching entity facts from EDGAR", .{cik});
|
|
self.assertNetworkAllowed("getEntityFacts edgar.fetchSharesOutstanding");
|
|
var edgar = try self.getProvider(Edgar);
|
|
|
|
const so_opt = edgar.fetchSharesOutstanding(self.allocator, cik) catch |err| {
|
|
log.warn("CIK {s}: shares fetch failed: {s}", .{ cik, @errorName(err) });
|
|
return DataError.FetchFailed;
|
|
};
|
|
|
|
if (so_opt) |so_in| {
|
|
var so = so_in;
|
|
defer so.deinit(self.allocator);
|
|
const today = fmt.todayDate(self.io);
|
|
var as_of_buf: [10]u8 = undefined;
|
|
// [10]u8 always fits "YYYY-MM-DD" (10 chars exactly).
|
|
const as_of = std.fmt.bufPrint(&as_of_buf, "{f}", .{today}) catch
|
|
@panic("getEntityFacts: 10-byte buffer cannot hold YYYY-MM-DD — unreachable");
|
|
|
|
const form_dup: ?[]u8 = if (so.form.len > 0) try self.allocator.dupe(u8, so.form) else null;
|
|
const shares_record = Edgar.SharesRecord{
|
|
.symbol = try self.allocator.dupe(u8, ""),
|
|
.shares_outstanding = so.value,
|
|
.period_end = try self.allocator.dupe(u8, so.period_end),
|
|
.form = form_dup,
|
|
.cik = try self.allocator.dupe(u8, cik),
|
|
.as_of = try self.allocator.dupe(u8, as_of),
|
|
.source = "edgar_xbrl",
|
|
};
|
|
|
|
const records = try self.allocator.alloc(Edgar.EntityFactRecord, 1);
|
|
records[0] = .{ .shares_outstanding = shares_record };
|
|
s.write(Edgar.EntityFactRecord, cik, records, cache.DataType.entity_facts.ttl());
|
|
|
|
return .{ .data = records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
}
|
|
|
|
// No shares-outstanding data for this CIK (e.g. 20-F-only
|
|
// filers like BP, XBRL-light filers like META). Negative-
|
|
// cache so we don't keep retrying.
|
|
s.writeNegative(cik, .entity_facts);
|
|
return DataError.NotFound;
|
|
}
|
|
|
|
/// Fetch ETF metrics (NPORT-P profile + sectors + holdings) for
|
|
/// a fund symbol. Cache-first via `<symbol>/etf_metrics.srf`.
|
|
///
|
|
/// On cache miss, looks up the symbol in the EDGAR ticker maps
|
|
/// (fetched on demand via `getTickerMap*`), then runs the full
|
|
/// `Edgar.fetchEtfMetrics` cascade.
|
|
pub fn getEtfMetrics(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EtfMetricRecord) {
|
|
var s = self.store();
|
|
|
|
if (!opts.force_refresh) {
|
|
if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
|
|
log.debug("{s}: etf_metrics fresh in local cache", .{symbol});
|
|
return .{
|
|
.data = cached.data,
|
|
.source = .cached,
|
|
.timestamp = cached.timestamp,
|
|
.allocator = self.allocator,
|
|
};
|
|
}
|
|
}
|
|
|
|
if (opts.skip_network) {
|
|
if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .any)) |cached| {
|
|
log.info("{s}: etf_metrics stale-cached returned (skip_network)", .{symbol});
|
|
return .{
|
|
.data = cached.data,
|
|
.source = .cached,
|
|
.timestamp = cached.timestamp,
|
|
.allocator = self.allocator,
|
|
};
|
|
}
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
if (!opts.force_refresh and self.syncFromServer(symbol, .etf_metrics)) {
|
|
if (s.read(self.allocator, Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
|
|
log.debug("{s}: etf_metrics synced from server", .{symbol});
|
|
return .{
|
|
.data = cached.data,
|
|
.source = .cached,
|
|
.timestamp = cached.timestamp,
|
|
.allocator = self.allocator,
|
|
};
|
|
}
|
|
}
|
|
|
|
log.debug("{s}: fetching ETF metrics from EDGAR", .{symbol});
|
|
self.assertNetworkAllowed("getEtfMetrics edgar.fetchEtfMetrics");
|
|
|
|
// Load the ticker maps. These are big (3-5 MB each) but the
|
|
// load happens once per CLI invocation and the parsed
|
|
// TickerMap stays alive across all getEtfMetrics calls in
|
|
// the same process.
|
|
var mf_map = self.loadMutualFundTickerMap(opts) catch |err| {
|
|
log.warn("failed to load mutual-fund ticker map: {s}", .{@errorName(err)});
|
|
return DataError.FetchFailed;
|
|
};
|
|
defer mf_map.deinit();
|
|
var co_map = self.loadCompanyTickerMap(opts) catch |err| {
|
|
log.warn("failed to load company ticker map: {s}", .{@errorName(err)});
|
|
return DataError.FetchFailed;
|
|
};
|
|
defer co_map.deinit();
|
|
|
|
var edgar = try self.getProvider(Edgar);
|
|
const result = edgar.fetchEtfMetrics(
|
|
self.io,
|
|
self.allocator,
|
|
&mf_map,
|
|
&co_map,
|
|
symbol,
|
|
20,
|
|
) catch |err| {
|
|
log.warn("{s}: etf_metrics fetch failed: {s}", .{ symbol, @errorName(err) });
|
|
return DataError.FetchFailed;
|
|
};
|
|
|
|
switch (result) {
|
|
.full => |m_in| {
|
|
var m = m_in;
|
|
defer m.deinit(self.allocator);
|
|
|
|
var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty;
|
|
errdefer {
|
|
for (records.items) |*r| r.deinit(self.allocator);
|
|
records.deinit(self.allocator);
|
|
}
|
|
try Edgar.appendEtfMetricRecords(self.allocator, &records, m);
|
|
const owned = try records.toOwnedSlice(self.allocator);
|
|
s.write(Edgar.EtfMetricRecord, symbol, owned, cache.DataType.etf_metrics.ttl());
|
|
return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
},
|
|
.profile_only => |m_in| {
|
|
var m = m_in;
|
|
defer m.deinit(self.allocator);
|
|
|
|
var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty;
|
|
errdefer {
|
|
for (records.items) |*r| r.deinit(self.allocator);
|
|
records.deinit(self.allocator);
|
|
}
|
|
try Edgar.appendEtfMetricRecords(self.allocator, &records, m);
|
|
const owned = try records.toOwnedSlice(self.allocator);
|
|
s.write(Edgar.EtfMetricRecord, symbol, owned, cache.DataType.etf_metrics.ttl());
|
|
return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
|
|
},
|
|
.not_a_fund => {
|
|
// Not a fund — write a negative entry to suppress
|
|
// retries. The user can ask `getEntityFacts(cik)`
|
|
// separately for stock-level facts.
|
|
s.writeNegative(symbol, .etf_metrics);
|
|
return DataError.NotFound;
|
|
},
|
|
.not_in_edgar => {
|
|
// Symbol isn't in either ticker map. No EDGAR data
|
|
// available; negative-cache.
|
|
s.writeNegative(symbol, .etf_metrics);
|
|
return DataError.NotFound;
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Load the EDGAR mutual-fund ticker map. Reads `[]MutualFundTickerEntry`
|
|
/// from cache when fresh; otherwise fetches via the provider
|
|
/// and writes the parsed slice to cache. The returned
|
|
/// `TickerMap` takes ownership of the entries; caller frees via
|
|
/// a single `mf_map.deinit()`.
|
|
///
|
|
/// Heavy: ~28k entries. Cheap on cache hit (fast SRF read);
|
|
/// expensive on miss (one HTTP round-trip + JSON parse).
|
|
/// Exposed publicly so commands like `enrich` can use the
|
|
/// ticker map as a fallback classifier when Wikidata returns
|
|
/// no rows for a symbol.
|
|
pub fn loadMutualFundTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.MutualFundTickerEntry) {
|
|
var s = self.store();
|
|
|
|
if (!opts.force_refresh) {
|
|
if (s.read(self.allocator, Edgar.MutualFundTickerEntry, "_edgar", null, .fresh_only)) |cached| {
|
|
if (cached.data.len > 0) {
|
|
return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, cached.data);
|
|
}
|
|
Edgar.MutualFundTickerEntry.freeSlice(self.allocator, cached.data);
|
|
}
|
|
}
|
|
|
|
log.debug("fetching EDGAR mutual-fund ticker map", .{});
|
|
self.assertNetworkAllowed("loadMutualFundTickerMap edgar.fetchMutualFundTickerMap");
|
|
var edgar = try self.getProvider(Edgar);
|
|
|
|
// Fetch + parse via the provider (correct UA + From + Accept
|
|
// + rate-limit token), cache the parsed slice, then build
|
|
// the lookup map (which takes ownership of the slice).
|
|
const entries = try edgar.fetchMutualFundTickerMap(self.allocator);
|
|
s.write(Edgar.MutualFundTickerEntry, "_edgar", entries, cache.DataType.tickers_funds.ttl());
|
|
return Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(self.allocator, entries);
|
|
}
|
|
|
|
/// Load the EDGAR company ticker map (stocks + UITs). Same shape
|
|
/// as `loadMutualFundTickerMap` for the `CompanyTickerEntry`
|
|
/// type. See that function's doc-comment for cost / use-case
|
|
/// guidance.
|
|
pub fn loadCompanyTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap(Edgar.CompanyTickerEntry) {
|
|
var s = self.store();
|
|
|
|
if (!opts.force_refresh) {
|
|
if (s.read(self.allocator, Edgar.CompanyTickerEntry, "_edgar", null, .fresh_only)) |cached| {
|
|
if (cached.data.len > 0) {
|
|
return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, cached.data);
|
|
}
|
|
Edgar.CompanyTickerEntry.freeSlice(self.allocator, cached.data);
|
|
}
|
|
}
|
|
|
|
log.debug("fetching EDGAR company ticker map", .{});
|
|
self.assertNetworkAllowed("loadCompanyTickerMap edgar.fetchCompanyTickerMap");
|
|
var edgar = try self.getProvider(Edgar);
|
|
|
|
const entries = try edgar.fetchCompanyTickerMap(self.allocator);
|
|
s.write(Edgar.CompanyTickerEntry, "_edgar", entries, cache.DataType.tickers_companies.ttl());
|
|
return Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(self.allocator, entries);
|
|
}
|
|
|
|
/// Look up a symbol in the EDGAR ticker maps. Used by the
|
|
/// `enrich` command as a fallback classifier when Wikidata
|
|
/// returns no rows for the symbol. Loads both maps (cache or
|
|
/// network), runs the lookup, frees the maps, returns the
|
|
/// digested `EdgarLookup` union.
|
|
///
|
|
/// Commands consume the union directly — they never see
|
|
/// `TickerMap` / `MutualFundTickerEntry` / `CompanyTickerEntry`
|
|
/// shapes. Provider details stay inside the service layer.
|
|
///
|
|
/// Caller owns the `title` string when the result is
|
|
/// `.company_or_uit{ .title = non-null }`. Free with the
|
|
/// allocator passed to this method (typically the same one
|
|
/// the service was initialized with).
|
|
pub fn lookupEdgarFallback(
|
|
self: *DataService,
|
|
sym: []const u8,
|
|
opts: FetchOptions,
|
|
) EdgarLookup {
|
|
var mf_opt: ?Edgar.TickerMap(Edgar.MutualFundTickerEntry) = self.loadMutualFundTickerMap(opts) catch null;
|
|
defer if (mf_opt) |*m| m.deinit();
|
|
var co_opt: ?Edgar.TickerMap(Edgar.CompanyTickerEntry) = self.loadCompanyTickerMap(opts) catch null;
|
|
defer if (co_opt) |*m| m.deinit();
|
|
|
|
return lookupInTickerMaps(
|
|
self.allocator,
|
|
sym,
|
|
if (mf_opt) |*m| m else null,
|
|
if (co_opt) |*m| m else null,
|
|
);
|
|
}
|
|
|
|
// ──────────────────────────────────────────────────────────────
|
|
/// Fetch a real-time quote for a symbol.
|
|
/// Yahoo Finance is primary (free, no API key, no 15-min delay).
|
|
/// Falls back to TwelveData if Yahoo fails.
|
|
///
|
|
/// Quotes are never cached, so `opts.force_refresh` is a no-op
|
|
/// (every call goes to the provider). `opts.skip_network = true`
|
|
/// returns FetchFailed unconditionally — there's no cached price
|
|
/// to fall back to.
|
|
pub fn getQuote(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!Quote {
|
|
if (opts.skip_network) {
|
|
log.debug("{s}: skip_network — quote unavailable (never cached)", .{symbol});
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
self.assertNetworkAllowed("getQuote");
|
|
|
|
// Primary: Yahoo Finance (free, real-time)
|
|
if (self.getProvider(Yahoo)) |yh| {
|
|
if (yh.fetchQuote(self.allocator, symbol)) |quote| {
|
|
log.debug("{s}: quote from Yahoo", .{symbol});
|
|
return quote;
|
|
} else |_| {}
|
|
} else |_| {}
|
|
|
|
// Fallback: TwelveData (requires API key, may be 15-min delayed)
|
|
var td = try self.getProvider(TwelveData);
|
|
log.debug("{s}: quote fallback to TwelveData", .{symbol});
|
|
return td.fetchQuote(self.allocator, symbol) catch
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
/// Compute trailing returns for a symbol (fetches candles + dividends).
|
|
/// Returns both as-of-date and month-end trailing returns.
|
|
/// As-of-date: end = latest close. Matches Morningstar "Trailing Returns" page.
|
|
/// Month-end: end = last business day of prior month. Matches Morningstar "Performance" page.
|
|
/// Compute trailing returns for a symbol (fetches candles + dividends + splits).
|
|
/// Returns both as-of-date and month-end trailing returns.
|
|
/// As-of-date: end = latest close. Matches Morningstar "Trailing Returns" page.
|
|
/// Month-end: end = last business day of prior month. Matches Morningstar "Performance" page.
|
|
///
|
|
/// `*_price` columns are split-adjusted, NOT dividend-adjusted (matches the
|
|
/// "price return" numbers public sources like Yahoo's chart-bar / FMP / Barchart
|
|
/// publish). `*_total` columns include dividend reinvestment (matches Morningstar
|
|
/// "Trailing Returns" / Yahoo "Performance Overview" / Koyfin "Total Return").
|
|
/// See `tmp/multi-ticker-audit.md` for the cross-validation evidence.
|
|
pub fn getTrailingReturns(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!struct {
|
|
asof_price: performance.TrailingReturns,
|
|
asof_total: ?performance.TrailingReturns,
|
|
me_price: performance.TrailingReturns,
|
|
me_total: ?performance.TrailingReturns,
|
|
candles: []Candle,
|
|
dividends: ?[]Dividend,
|
|
source: Source,
|
|
timestamp: i64,
|
|
} {
|
|
const candle_result = try self.getCandles(symbol, opts);
|
|
const c = candle_result.data;
|
|
if (c.len == 0) return DataError.FetchFailed;
|
|
|
|
const today = fmt.todayDate(self.io);
|
|
|
|
// Splits: needed to make raw `close` ratios meaningful across
|
|
// split boundaries (e.g. NVDA 10:1 on 2024-06-10). If the
|
|
// splits fetch fails, fall back to a no-splits empty slice —
|
|
// the price-return calculation will still be correct for
|
|
// tickers with no splits in the window (i.e. most of them).
|
|
var splits_buf: ?FetchResult(Split) = null;
|
|
defer if (splits_buf) |sb| sb.deinit();
|
|
const splits: []const Split = if (self.getSplits(symbol, opts)) |sr| blk: {
|
|
splits_buf = sr;
|
|
break :blk sr.data;
|
|
} else |_| &.{};
|
|
|
|
// As-of-date (end = last candle)
|
|
const asof_price = performance.trailingReturnsPriceOnly(c, splits);
|
|
// Month-end (end = last business day of prior month)
|
|
const me_price = performance.trailingReturnsPriceOnlyMonthEnd(c, splits, today);
|
|
|
|
// Total return: dividend-reinvested when dividends are
|
|
// available; otherwise fall back to adj_close-based total
|
|
// return (which captures dividends for providers like Tiingo
|
|
// that bake dividends into adj_close).
|
|
var divs: ?[]Dividend = null;
|
|
var asof_total: ?performance.TrailingReturns = null;
|
|
var me_total: ?performance.TrailingReturns = null;
|
|
|
|
// adj_close-based total return is the fallback. With Tiingo
|
|
// (the default provider) adj_close is already dividend-
|
|
// adjusted, so this gives a reasonable total-return estimate
|
|
// even when explicit dividend records are missing.
|
|
const asof_adj = performance.trailingReturns(c);
|
|
const me_adj = performance.trailingReturnsMonthEnd(c, today);
|
|
|
|
if (self.getDividends(symbol, opts)) |div_result| {
|
|
divs = div_result.data;
|
|
const asof_div = performance.trailingReturnsWithDividends(c, div_result.data);
|
|
const me_div = performance.trailingReturnsMonthEndWithDividends(c, div_result.data, today);
|
|
asof_total = performance.withDividendFallback(asof_div, asof_adj);
|
|
me_total = performance.withDividendFallback(me_div, me_adj);
|
|
} else |_| {
|
|
// No dividend data: still surface the adj_close-based
|
|
// total return rather than null, since Tiingo's
|
|
// adj_close already includes dividend adjustment.
|
|
asof_total = asof_adj;
|
|
me_total = me_adj;
|
|
}
|
|
|
|
return .{
|
|
.asof_price = asof_price,
|
|
.asof_total = asof_total,
|
|
.me_price = me_price,
|
|
.me_total = me_total,
|
|
.candles = c,
|
|
.dividends = divs,
|
|
.source = candle_result.source,
|
|
.timestamp = candle_result.timestamp,
|
|
};
|
|
}
|
|
|
|
/// Check if candle data is fresh in cache without full deserialization.
|
|
pub fn isCandleCacheFresh(self: *DataService, symbol: []const u8) bool {
|
|
var s = self.store();
|
|
return s.isCandleMetaFresh(symbol);
|
|
}
|
|
|
|
/// Read only the latest close price from cached candles (no full deserialization).
|
|
/// Returns null if no cached data exists.
|
|
pub fn getCachedLastClose(self: *DataService, symbol: []const u8) ?f64 {
|
|
var s = self.store();
|
|
return s.readLastClose(symbol);
|
|
}
|
|
|
|
/// Read the latest cached candle date for `symbol` without deserializing
|
|
/// the full candle history. Returns null if no cached metadata exists.
|
|
///
|
|
/// Callers should pair this with `isCandleCacheFresh` before trusting
|
|
/// the date: a stale cache entry can return a date from days or weeks
|
|
/// ago, which is fine for diagnostics but wrong for anything that
|
|
/// needs "the current market date".
|
|
pub fn getCachedLastDate(self: *DataService, symbol: []const u8) ?Date {
|
|
var s = self.store();
|
|
const mr = s.readCandleMeta(symbol) orelse return null;
|
|
return mr.meta.last_date;
|
|
}
|
|
|
|
/// Estimate wait time (in seconds) before a fetch for `data_type`
|
|
/// can proceed without blocking on its provider's rate limiter.
|
|
/// Returns 0 if a request can be made immediately, or if the
|
|
/// provider for this data type has no rate limiter. Returns null
|
|
/// if the relevant provider isn't instantiated yet (e.g., no API
|
|
/// key, or first call hasn't happened to lazy-init it).
|
|
///
|
|
/// The caller asks "how long until getX can proceed?" -- the
|
|
/// service maps data type to provider internally so the caller
|
|
/// doesn't have to know which provider serves which data.
|
|
pub fn estimateWaitSeconds(self: *DataService, data_type: cache.DataType) ?u64 {
|
|
const ns: u64 = switch (data_type) {
|
|
// Polygon-served: dividends and splits.
|
|
.dividends, .splits => if (self.pg) |*pg| pg.rate_limiter.estimateWaitNs() else return null,
|
|
// FMP-served: earnings.
|
|
.earnings => if (self.fmp) |*fmp| fmp.rate_limiter.estimateWaitNs() else return null,
|
|
// Cboe-served: options chains.
|
|
.options => if (self.cboe) |*cboe| cboe.rate_limiter.estimateWaitNs() else return null,
|
|
// EDGAR-served: ETF metrics, entity facts, ticker maps.
|
|
.etf_metrics, .entity_facts, .tickers_funds, .tickers_companies => if (self.edgar) |*e| e.rate_limiter.estimateWaitNs() else return null,
|
|
// Tiingo-served candles: 50/hour token bucket. When Tiingo
|
|
// isn't instantiated (no key), candles fall back to keyless
|
|
// Yahoo with no proactive limiter, so report 0 rather than
|
|
// null. `candles_meta` shares Tiingo's budget; `meta` isn't
|
|
// fetched; Wikidata (classification) has no published quota.
|
|
.candles_daily, .candles_meta => if (self.tg) |*tg| tg.rate_limiter.estimateWaitNs() else 0,
|
|
.classification, .meta => 0,
|
|
};
|
|
return if (ns == 0) 0 else @max(1, ns / std.time.ns_per_s);
|
|
}
|
|
|
|
/// Read candles from cache only (no network fetch). Used by TUI for display.
|
|
/// Returns null if no cached data exists or if the entry is a negative cache (fetch_failed).
|
|
///
|
|
/// `allocator` owns the returned `FetchResult.data`. Pass an
|
|
/// arena for "lives until reload" use cases (TUI per-portfolio
|
|
/// data); pass a per-call arena for CLI batch commands.
|
|
pub fn getCachedCandles(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(Candle) {
|
|
var s = self.store();
|
|
if (s.isNegative(symbol, .candles_daily)) return null;
|
|
const result = s.read(allocator, Candle, symbol, null, .any) orelse return null;
|
|
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
|
|
}
|
|
|
|
/// Read dividends from cache only (no network fetch). See
|
|
/// `getCachedCandles` for the allocator contract.
|
|
pub fn getCachedDividends(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(Dividend) {
|
|
var s = self.store();
|
|
const result = s.read(allocator, Dividend, symbol, null, .any) orelse return null;
|
|
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
|
|
}
|
|
|
|
/// Read earnings from cache only (no network fetch). See
|
|
/// `getCachedCandles` for the allocator contract.
|
|
pub fn getCachedEarnings(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(EarningsEvent) {
|
|
var s = self.store();
|
|
const result = s.read(allocator, EarningsEvent, symbol, earningsPostProcess, .any) orelse return null;
|
|
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
|
|
}
|
|
|
|
/// Read options from cache only (no network fetch). See
|
|
/// `getCachedCandles` for the allocator contract.
|
|
pub fn getCachedOptions(self: *DataService, allocator: std.mem.Allocator, symbol: []const u8) ?FetchResult(OptionsChain) {
|
|
var s = self.store();
|
|
const result = s.read(allocator, OptionsChain, symbol, null, .any) orelse return null;
|
|
return .{ .data = result.data, .source = .cached, .timestamp = result.timestamp, .allocator = allocator };
|
|
}
|
|
|
|
// ── Portfolio price loading ──────────────────────────────────
|
|
|
|
/// Status emitted for each symbol during price loading.
|
|
pub const SymbolStatus = enum {
|
|
/// Price resolved from fresh cache.
|
|
cached,
|
|
/// About to attempt an API fetch (emitted before the network call).
|
|
fetching,
|
|
/// Price fetched successfully from API.
|
|
fetched,
|
|
/// API fetch failed but stale cached price was used.
|
|
failed_used_stale,
|
|
/// API fetch failed and no cached price exists.
|
|
failed,
|
|
};
|
|
|
|
/// Callback for progress reporting during price loading.
|
|
/// `context` is an opaque pointer to caller-owned state.
|
|
pub const ProgressCallback = struct {
|
|
context: *anyopaque,
|
|
on_progress: *const fn (ctx: *anyopaque, index: usize, total: usize, symbol: []const u8, status: SymbolStatus) void,
|
|
|
|
fn emit(self: ProgressCallback, index: usize, total: usize, symbol: []const u8, status: SymbolStatus) void {
|
|
self.on_progress(self.context, index, total, symbol, status);
|
|
}
|
|
};
|
|
|
|
// ── Consolidated Price Loading (Parallel Server + Sequential Provider) ──
|
|
|
|
/// Configuration for loadAllPrices.
|
|
pub const LoadAllConfig = struct {
|
|
force_refresh: bool = false,
|
|
/// Skip provider fetches and server sync. Returns cached
|
|
/// data (even if stale) and treats cache miss as failure.
|
|
/// Drives `--refresh-data=never`.
|
|
skip_network: bool = false,
|
|
color: bool = true,
|
|
|
|
/// Map this config to the per-call `FetchOptions` shape.
|
|
/// Convenience for paths that need to pass through to
|
|
/// `getCandles`/`getDividends`/etc.
|
|
pub fn fetchOptions(self: LoadAllConfig) FetchOptions {
|
|
return .{ .skip_network = self.skip_network, .force_refresh = self.force_refresh };
|
|
}
|
|
};
|
|
|
|
/// Result of loadAllPrices operation.
|
|
pub const LoadAllResult = struct {
|
|
prices: std.StringHashMap(f64),
|
|
/// Number of symbols resolved from fresh local cache.
|
|
cached_count: usize,
|
|
/// Number of symbols synced from server.
|
|
server_synced_count: usize,
|
|
/// Number of symbols fetched from providers (rate-limited APIs).
|
|
provider_fetched_count: usize,
|
|
/// Number of symbols that failed all sources but used stale cache.
|
|
stale_count: usize,
|
|
/// Number of symbols that failed completely (no data).
|
|
failed_count: usize,
|
|
/// Latest candle date seen.
|
|
latest_date: ?Date,
|
|
|
|
/// Free the prices hashmap. Call this if you don't transfer ownership.
|
|
pub fn deinit(self: *LoadAllResult) void {
|
|
self.prices.deinit();
|
|
}
|
|
};
|
|
|
|
/// Progress callback for aggregate (parallel) progress reporting.
|
|
/// Called periodically during parallel operations with current counts.
|
|
pub const AggregateProgressCallback = struct {
|
|
context: *anyopaque,
|
|
on_progress: *const fn (ctx: *anyopaque, completed: usize, total: usize, phase: Phase) void,
|
|
|
|
pub const Phase = enum {
|
|
/// Checking local cache
|
|
cache_check,
|
|
/// Syncing from ZFIN_SERVER
|
|
server_sync,
|
|
/// Fetching from rate-limited providers
|
|
provider_fetch,
|
|
/// Done
|
|
complete,
|
|
};
|
|
|
|
fn emit(self: AggregateProgressCallback, completed: usize, total: usize, phase: Phase) void {
|
|
self.on_progress(self.context, completed, total, phase);
|
|
}
|
|
};
|
|
|
|
/// Thread-safe counter for parallel progress tracking.
|
|
const AtomicCounter = struct {
|
|
value: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
|
|
|
|
fn increment(self: *AtomicCounter) usize {
|
|
return self.value.fetchAdd(1, .monotonic);
|
|
}
|
|
|
|
fn load(self: *const AtomicCounter) usize {
|
|
return self.value.load(.monotonic);
|
|
}
|
|
};
|
|
|
|
/// Per-symbol result from parallel server sync.
|
|
const ServerSyncResult = struct {
|
|
symbol: []const u8,
|
|
success: bool,
|
|
};
|
|
|
|
/// Load prices for portfolio and watchlist symbols with automatic parallelization.
|
|
///
|
|
/// When ZFIN_SERVER is configured:
|
|
/// 1. Check local cache (fast, parallel-safe)
|
|
/// 2. Parallel sync from server for cache misses
|
|
/// 3. Sequential provider fallback for server failures
|
|
///
|
|
/// When ZFIN_SERVER is not configured:
|
|
/// Falls back to sequential loading with per-symbol progress.
|
|
///
|
|
/// Progress is reported via `aggregate_progress` for parallel phases
|
|
/// and `symbol_progress` for sequential provider fallback.
|
|
pub fn loadAllPrices(
|
|
self: *DataService,
|
|
portfolio_syms: ?[]const []const u8,
|
|
watch_syms: []const []const u8,
|
|
config: LoadAllConfig,
|
|
aggregate_progress: ?AggregateProgressCallback,
|
|
symbol_progress: ?ProgressCallback,
|
|
) LoadAllResult {
|
|
var result = LoadAllResult{
|
|
.prices = std.StringHashMap(f64).init(self.allocator),
|
|
.cached_count = 0,
|
|
.server_synced_count = 0,
|
|
.provider_fetched_count = 0,
|
|
.stale_count = 0,
|
|
.failed_count = 0,
|
|
.latest_date = null,
|
|
};
|
|
|
|
// Combine all symbols
|
|
const portfolio_count = if (portfolio_syms) |ps| ps.len else 0;
|
|
const watch_count = watch_syms.len;
|
|
const total_count = portfolio_count + watch_count;
|
|
|
|
if (total_count == 0) return result;
|
|
|
|
// Build combined symbol list
|
|
var all_symbols = std.ArrayList([]const u8).initCapacity(self.allocator, total_count) catch return result;
|
|
defer all_symbols.deinit(self.allocator);
|
|
|
|
if (portfolio_syms) |ps| {
|
|
for (ps) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append portfolio sym({s}): {t}", .{ sym, err });
|
|
}
|
|
for (watch_syms) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append watch sym({s}): {t}", .{ sym, err });
|
|
|
|
// force_refresh does NOT wipe the candle cache. It flows
|
|
// through to getCandles (via config.fetchOptions()), which
|
|
// ignores the TTL and does an incremental top-up — see the
|
|
// `--refresh-data=force` contract. The Phase-1 fast path below
|
|
// is skipped on force_refresh so every symbol is re-validated
|
|
// against the provider. A full wipe + re-download from scratch
|
|
// is reserved for `cache clear`.
|
|
|
|
// Phase 1: Check local cache (fast path)
|
|
var needs_fetch: std.ArrayList([]const u8) = .empty;
|
|
defer needs_fetch.deinit(self.allocator);
|
|
|
|
if (aggregate_progress) |p| p.emit(0, total_count, .cache_check);
|
|
|
|
for (all_symbols.items) |sym| {
|
|
if (!config.force_refresh and self.isCandleCacheFresh(sym)) {
|
|
if (self.getCachedLastClose(sym)) |close| {
|
|
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err });
|
|
self.updateLatestDate(&result, sym);
|
|
}
|
|
result.cached_count += 1;
|
|
} else {
|
|
needs_fetch.append(self.allocator, sym) catch |err| log.warn("loadAllPrices needs_fetch append({s}): {t}", .{ sym, err });
|
|
}
|
|
}
|
|
|
|
if (aggregate_progress) |p| p.emit(result.cached_count, total_count, .cache_check);
|
|
|
|
if (needs_fetch.items.len == 0) {
|
|
if (aggregate_progress) |p| p.emit(total_count, total_count, .complete);
|
|
return result;
|
|
}
|
|
|
|
// Offline mode: skip server sync and provider fetch entirely.
|
|
// For symbols without a fresh cache, fall back to stale cache
|
|
// before giving up.
|
|
if (config.skip_network) {
|
|
for (needs_fetch.items) |sym| {
|
|
if (self.getCachedLastClose(sym)) |close| {
|
|
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err });
|
|
self.updateLatestDate(&result, sym);
|
|
result.stale_count += 1;
|
|
} else {
|
|
result.failed_count += 1;
|
|
}
|
|
}
|
|
if (aggregate_progress) |p| p.emit(total_count, total_count, .complete);
|
|
return result;
|
|
}
|
|
|
|
// Phase 2: Server sync (parallel if server configured)
|
|
var server_failures: std.ArrayList([]const u8) = .empty;
|
|
defer server_failures.deinit(self.allocator);
|
|
|
|
if (self.config.server_url != null) {
|
|
self.parallelServerSync(
|
|
needs_fetch.items,
|
|
&result,
|
|
&server_failures,
|
|
aggregate_progress,
|
|
total_count,
|
|
);
|
|
} else {
|
|
// No server — all need provider fetch
|
|
for (needs_fetch.items) |sym| {
|
|
server_failures.append(self.allocator, sym) catch |err| log.warn("loadAllPrices server_failures append({s}): {t}", .{ sym, err });
|
|
}
|
|
}
|
|
|
|
// Phase 3: Sequential provider fallback for server failures
|
|
if (server_failures.items.len > 0) {
|
|
if (aggregate_progress) |p| p.emit(
|
|
result.cached_count + result.server_synced_count,
|
|
total_count,
|
|
.provider_fetch,
|
|
);
|
|
|
|
self.sequentialProviderFetch(
|
|
server_failures.items,
|
|
&result,
|
|
symbol_progress,
|
|
total_count - server_failures.items.len, // offset for progress display
|
|
config.fetchOptions(),
|
|
);
|
|
}
|
|
|
|
if (aggregate_progress) |p| p.emit(total_count, total_count, .complete);
|
|
return result;
|
|
}
|
|
|
|
/// Fetch live intraday quotes for `symbols` in parallel, returning
|
|
/// a map of symbol → live last price. Symbols whose quote fetch
|
|
/// fails (or that the provider can't price) are simply absent; the
|
|
/// caller falls back to the last cached close.
|
|
///
|
|
/// This is a pure live-price fetch: quotes are never cached, so it
|
|
/// neither reads nor writes the candle cache. It exists for the
|
|
/// TUI refresh key (`r`), whose job is "give me current prices,"
|
|
/// distinct from candle-history maintenance (TTL/startup) and from
|
|
/// `--refresh-data=force` (incremental candle top-up).
|
|
///
|
|
/// Unlike `getQuote` (single-symbol, Yahoo→TwelveData fallback),
|
|
/// this is Yahoo-only: Yahoo is keyless with no shared rate
|
|
/// limiter, so each worker can safely own its HTTP client.
|
|
/// TwelveData's shared rate limiter makes it unsafe to fan out, and
|
|
/// its fallback role isn't worth the complexity for a bulk refresh.
|
|
///
|
|
/// Concurrency mirrors `parallelServerSync`: one task per symbol in
|
|
/// a single `std.Io.Group`, each with its own `Yahoo` client (a
|
|
/// shared `std.http.Client` is not safe across threads — see
|
|
/// `tryOneSync`). Relies on a thread-safe `allocator`/`io`, the
|
|
/// same assumption the server-sync fan-out already makes.
|
|
///
|
|
/// The returned map's keys borrow `symbols`: keep `symbols` alive
|
|
/// while using the map, and `deinit()` the map when done.
|
|
pub fn loadLiveQuotes(self: *DataService, symbols: []const []const u8) std.StringHashMap(f64) {
|
|
var prices = std.StringHashMap(f64).init(self.allocator);
|
|
if (symbols.len == 0) return prices;
|
|
|
|
self.assertNetworkAllowed("loadLiveQuotes");
|
|
|
|
const QuoteSlot = struct { symbol: []const u8, price: ?f64 = null };
|
|
const slots = self.allocator.alloc(QuoteSlot, symbols.len) catch return prices;
|
|
defer self.allocator.free(slots);
|
|
for (slots, 0..) |*slot, i| slot.* = .{ .symbol = symbols[i] };
|
|
|
|
const worker = struct {
|
|
fn run(io: std.Io, allocator: std.mem.Allocator, slot: *QuoteSlot) std.Io.Cancelable!void {
|
|
try io.checkCancel();
|
|
var yh = Yahoo.init(io, allocator);
|
|
defer yh.deinit();
|
|
// Quote borrows `symbol` and carries no owned memory,
|
|
// so the f64 close is all we keep — nothing to free.
|
|
slot.price = if (yh.fetchQuote(allocator, slot.symbol)) |q| q.close else |_| null;
|
|
}
|
|
};
|
|
|
|
var group: std.Io.Group = .init;
|
|
for (slots) |*slot| group.async(self.io, worker.run, .{ self.io, self.allocator, slot });
|
|
group.await(self.io) catch |err| log.debug("loadLiveQuotes group await: {t}", .{err});
|
|
|
|
for (slots) |slot| {
|
|
if (slot.price) |p| prices.put(slot.symbol, p) catch |err| log.warn("loadLiveQuotes put({s}): {t}", .{ slot.symbol, err });
|
|
}
|
|
return prices;
|
|
}
|
|
|
|
/// Parallel server sync via `std.Io.Group`.
|
|
///
|
|
/// Concurrency shape: one task per symbol, spawned into a
|
|
/// single `Group`. The `std.Io` implementation owns
|
|
/// scheduling and concurrency limits (e.g. `Io.Threaded`
|
|
/// sizes its pool from CPU count); we don't second-guess it
|
|
/// with our own worker cap or work-stealing queue.
|
|
///
|
|
/// Each task hits `io.checkCancel()` before its sync, so a
|
|
/// cancelation request propagating through `Group.await`
|
|
/// stops pending work at task granularity.
|
|
fn parallelServerSync(
|
|
self: *DataService,
|
|
symbols: []const []const u8,
|
|
result: *LoadAllResult,
|
|
failures: *std.ArrayList([]const u8),
|
|
aggregate_progress: ?AggregateProgressCallback,
|
|
total_count: usize,
|
|
) void {
|
|
if (aggregate_progress) |p| p.emit(result.cached_count, total_count, .server_sync);
|
|
|
|
// Shared state for tasks
|
|
var completed = AtomicCounter{};
|
|
const sync_results = self.allocator.alloc(ServerSyncResult, symbols.len) catch {
|
|
// Allocation failed — fall back to marking all as failures
|
|
for (symbols) |sym| failures.append(self.allocator, sym) catch |err| log.warn("parallelServerSync slots-alloc-fallback failures append({s}): {t}", .{ sym, err });
|
|
return;
|
|
};
|
|
defer self.allocator.free(sync_results);
|
|
|
|
// Initialize results
|
|
for (sync_results, 0..) |*sr, i| {
|
|
sr.* = .{ .symbol = symbols[i], .success = false };
|
|
}
|
|
|
|
const worker = struct {
|
|
fn run(io: std.Io, svc: *DataService, slot: *ServerSyncResult, done: *AtomicCounter) std.Io.Cancelable!void {
|
|
defer _ = done.increment();
|
|
try io.checkCancel();
|
|
slot.success = svc.syncCandlesFromServer(slot.symbol);
|
|
}
|
|
};
|
|
|
|
// Spawn one task per symbol. Group.async requires an
|
|
// eventual Group.await/cancel to release resources; the
|
|
// single await below covers all paths.
|
|
var group: std.Io.Group = .init;
|
|
for (sync_results) |*sr| {
|
|
group.async(self.io, worker.run, .{ self.io, self, sr, &completed });
|
|
}
|
|
|
|
// Progress reporting while the group runs
|
|
if (aggregate_progress) |p| {
|
|
while (completed.load() < symbols.len) {
|
|
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(50), .awake) catch |err| {
|
|
log.debug("parallelServerSync progress-poll sleep interrupted: {t}", .{err});
|
|
break;
|
|
};
|
|
p.emit(result.cached_count + completed.load(), total_count, .server_sync);
|
|
}
|
|
}
|
|
|
|
// Wait for all tasks. On cancelation the unstarted tasks
|
|
// exit at their checkCancel point; partial results (slots
|
|
// that completed) are still processed below — they came
|
|
// from successful cache writes.
|
|
group.await(self.io) catch |err| {
|
|
log.debug("parallelServerSync group await: {t}", .{err});
|
|
};
|
|
|
|
// Process results
|
|
for (sync_results) |sr| {
|
|
if (sr.success) {
|
|
// Server sync succeeded — read from cache
|
|
if (self.getCachedLastClose(sr.symbol)) |close| {
|
|
result.prices.put(sr.symbol, close) catch |err| log.warn("syncFromServer cache-after-sync put({s}): {t}", .{ sr.symbol, err });
|
|
self.updateLatestDate(result, sr.symbol);
|
|
result.server_synced_count += 1;
|
|
} else {
|
|
// Sync said success but can't read cache — treat as failure
|
|
failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer success-but-no-cache failures append({s}): {t}", .{ sr.symbol, err });
|
|
}
|
|
} else {
|
|
failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer fail-result failures append({s}): {t}", .{ sr.symbol, err });
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Sequential provider fetch for symbols that failed server sync.
|
|
fn sequentialProviderFetch(
|
|
self: *DataService,
|
|
symbols: []const []const u8,
|
|
result: *LoadAllResult,
|
|
progress: ?ProgressCallback,
|
|
index_offset: usize,
|
|
opts: FetchOptions,
|
|
) void {
|
|
const total = index_offset + symbols.len;
|
|
|
|
for (symbols, 0..) |sym, i| {
|
|
const display_idx = index_offset + i;
|
|
|
|
// Notify: about to fetch
|
|
if (progress) |p| p.emit(display_idx, total, sym, .fetching);
|
|
|
|
// Try provider fetch
|
|
if (self.getCandles(sym, opts)) |candle_result| {
|
|
defer self.allocator.free(candle_result.data);
|
|
if (candle_result.data.len > 0) {
|
|
const last = candle_result.data[candle_result.data.len - 1];
|
|
result.prices.put(sym, last.close) catch |err| log.warn("loadAllPrices candle-close put({s}): {t}", .{ sym, err });
|
|
if (result.latest_date == null or last.date.days > result.latest_date.?.days) {
|
|
result.latest_date = last.date;
|
|
}
|
|
}
|
|
result.provider_fetched_count += 1;
|
|
if (progress) |p| p.emit(display_idx, total, sym, .fetched);
|
|
continue;
|
|
} else |_| {}
|
|
|
|
// Provider failed — try stale cache
|
|
result.failed_count += 1;
|
|
if (self.getCachedLastClose(sym)) |close| {
|
|
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices stale-fallback put({s}): {t}", .{ sym, err });
|
|
result.stale_count += 1;
|
|
if (progress) |p| p.emit(display_idx, total, sym, .failed_used_stale);
|
|
} else {
|
|
if (progress) |p| p.emit(display_idx, total, sym, .failed);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Update latest_date in result from cached candle metadata.
|
|
fn updateLatestDate(self: *DataService, result: *LoadAllResult, symbol: []const u8) void {
|
|
var s = self.store();
|
|
if (s.readCandleMeta(symbol)) |cm| {
|
|
const d = cm.meta.last_date;
|
|
if (result.latest_date == null or d.days > result.latest_date.?.days) {
|
|
result.latest_date = d;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── CUSIP Resolution ──────────────────────────────────────────
|
|
|
|
/// Look up multiple CUSIPs in a single batch request via OpenFIGI.
|
|
/// Results array is parallel to the input cusips array (same length, same order).
|
|
/// Caller owns the returned slice and all strings within each CusipResult.
|
|
pub fn lookupCusips(self: *DataService, cusips: []const []const u8) DataError![]CusipResult {
|
|
return OpenFigi.lookupCusips(self.io, self.allocator, cusips, self.config.openfigi_key) catch
|
|
return DataError.FetchFailed;
|
|
}
|
|
|
|
/// A single CUSIP-to-ticker mapping record in the cache file.
|
|
const CusipEntry = struct {
|
|
cusip: []const u8 = "",
|
|
ticker: []const u8 = "",
|
|
};
|
|
|
|
/// CUSIP->ticker lookup table loaded from `cusip_tickers.srf`.
|
|
///
|
|
/// Zero-copy: keys and values are slices into `backing` (the raw
|
|
/// file bytes parsed with `parse_allocator = .none`). Nothing is
|
|
/// duped per entry — the whole-file buffer IS the storage, and it
|
|
/// stays alive for the table's lifetime, released together with
|
|
/// the map table in `deinit`.
|
|
///
|
|
/// This is the L1 tier of CUSIP resolution: callers consult it
|
|
/// before reaching for the server or OpenFIGI.
|
|
pub const CusipTickerMap = struct {
|
|
map: std.StringHashMap([]const u8),
|
|
/// Raw bytes of `cusip_tickers.srf`; every map key and value
|
|
/// points into this buffer. `&.{}` when the file was missing
|
|
/// or unreadable (freeing a zero-length slice is a no-op).
|
|
backing: []const u8,
|
|
|
|
pub fn get(self: CusipTickerMap, cusip: []const u8) ?[]const u8 {
|
|
return self.map.get(cusip);
|
|
}
|
|
|
|
pub fn contains(self: CusipTickerMap, cusip: []const u8) bool {
|
|
return self.map.contains(cusip);
|
|
}
|
|
|
|
pub fn count(self: CusipTickerMap) u32 {
|
|
return self.map.count();
|
|
}
|
|
|
|
/// Release the map table and the backing buffer. Both were
|
|
/// allocated with the map's allocator at load time, so we
|
|
/// reuse it here — the two lifetimes are bound together by
|
|
/// construction, which is the whole point of the wrapper.
|
|
pub fn deinit(self: *CusipTickerMap) void {
|
|
const allocator = self.map.allocator;
|
|
self.map.deinit();
|
|
allocator.free(self.backing);
|
|
}
|
|
};
|
|
|
|
/// Load the CUSIP->ticker cache file into a `CusipTickerMap`. The
|
|
/// returned table owns the file bytes; release it with
|
|
/// `CusipTickerMap.deinit`.
|
|
///
|
|
/// Missing file → empty table (the common first-run case). First
|
|
/// occurrence wins on duplicate CUSIPs, which tolerates the
|
|
/// historical double-append bug in cache files written before
|
|
/// `cacheCusipTicker` learned to dedup.
|
|
///
|
|
/// The on-disk format is CUSIP-keyed (`cusip::X,ticker::Y`); the
|
|
/// returned map is keyed the same way for O(1) forward lookup.
|
|
pub fn loadCusipTickerMap(self: *DataService, allocator: std.mem.Allocator) CusipTickerMap {
|
|
const map = std.StringHashMap([]const u8).init(allocator);
|
|
const path = std.fs.path.join(allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch
|
|
return .{ .map = map, .backing = &.{} };
|
|
defer allocator.free(path);
|
|
|
|
const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, allocator, .limited(4 * 1024 * 1024)) catch
|
|
return .{ .map = map, .backing = &.{} };
|
|
// From here `data` is the table's backing store: keys and
|
|
// values are slices into it (parse_allocator = .none, so the
|
|
// parser borrows rather than copies). Freed by
|
|
// `CusipTickerMap.deinit`, never here — that's the lifetime
|
|
// contract that lets us skip per-entry dupes entirely.
|
|
var result: CusipTickerMap = .{ .map = map, .backing = data };
|
|
|
|
var reader = std.Io.Reader.fixed(data);
|
|
var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return result;
|
|
defer it.deinit();
|
|
|
|
while (it.next() catch return result) |fields| {
|
|
const entry = fields.to(CusipEntry, .{}) catch continue;
|
|
if (entry.cusip.len == 0 or entry.ticker.len == 0) continue;
|
|
// First occurrence wins; getOrPut stores the borrowed
|
|
// slices directly — they live in `backing`, no dupe.
|
|
const gop = result.map.getOrPut(entry.cusip) catch continue;
|
|
if (!gop.found_existing) gop.value_ptr.* = entry.ticker;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// Append CUSIP->ticker mappings to `cusip_tickers.srf`, skipping
|
|
/// any whose CUSIP is already on disk and any duplicates within
|
|
/// `entries`. One read + one atomic write regardless of batch size.
|
|
///
|
|
/// Read-append-atomic-write (rather than open-for-append) so a
|
|
/// concurrent reader never sees a valid header plus a partial
|
|
/// trailing record — see `cache/store.zig appendRaw` for the same
|
|
/// pattern and rationale. `#!srfv1` directives are emitted only
|
|
/// when the file is being created.
|
|
fn appendCusipEntries(self: *DataService, entries: []const CusipEntry) void {
|
|
if (entries.len == 0) return;
|
|
|
|
// One load gives us both the dedup set and the existing bytes
|
|
// to concat (`backing`). Missing/empty file → empty map + empty
|
|
// backing → directives emitted below.
|
|
var existing_map = self.loadCusipTickerMap(self.allocator);
|
|
defer existing_map.deinit();
|
|
const existing = existing_map.backing;
|
|
|
|
// Keep only entries new to the file and unique within the batch.
|
|
var seen = std.StringHashMap(void).init(self.allocator);
|
|
defer seen.deinit();
|
|
var to_write: std.ArrayList(CusipEntry) = .empty;
|
|
defer to_write.deinit(self.allocator);
|
|
for (entries) |e| {
|
|
if (e.cusip.len == 0 or e.ticker.len == 0) continue;
|
|
if (existing_map.contains(e.cusip)) continue;
|
|
const gop = seen.getOrPut(e.cusip) catch continue;
|
|
if (gop.found_existing) continue;
|
|
to_write.append(self.allocator, e) catch continue;
|
|
}
|
|
if (to_write.items.len == 0) return;
|
|
|
|
const path = std.fs.path.join(self.allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return;
|
|
defer self.allocator.free(path);
|
|
if (std.fs.path.dirnamePosix(path)) |dir| {
|
|
std.Io.Dir.cwd().createDirPath(self.io, dir) catch |err| log.warn("cusip-cache createDirPath({s}): {t}", .{ dir, err });
|
|
}
|
|
|
|
const emit_directives = existing.len == 0;
|
|
var aw: std.Io.Writer.Allocating = .init(self.allocator);
|
|
defer aw.deinit();
|
|
aw.writer.print("{f}", .{srf.fmt(CusipEntry, to_write.items, .{ .emit_directives = emit_directives })}) catch return;
|
|
const encoded = aw.writer.buffered();
|
|
if (encoded.len == 0) return;
|
|
|
|
// Concat existing + new, then atomic-write.
|
|
const combined = self.allocator.alloc(u8, existing.len + encoded.len) catch return;
|
|
defer self.allocator.free(combined);
|
|
@memcpy(combined[0..existing.len], existing);
|
|
@memcpy(combined[existing.len..], encoded);
|
|
|
|
atomic.writeFileAtomic(self.io, self.allocator, path, combined) catch |err| log.warn("cusip-cache writeFileAtomic({s}): {t}", .{ path, err });
|
|
}
|
|
|
|
/// Append a single CUSIP->ticker mapping to the cache file
|
|
/// (dedup-aware). Thin wrapper over `appendCusipEntries`; the
|
|
/// `lookup` command's single-CUSIP path.
|
|
pub fn cacheCusipTicker(self: *DataService, cusip: []const u8, ticker: []const u8) void {
|
|
self.appendCusipEntries(&.{.{ .cusip = cusip, .ticker = ticker }});
|
|
}
|
|
|
|
/// Resolve a set of CUSIPs to tickers via the three-tier cascade,
|
|
/// persisting newly-learned mappings to `cusip_tickers.srf` (union
|
|
/// policy: the local file accumulates everything it ever learns and
|
|
/// converges toward the shared server set).
|
|
///
|
|
/// Tiers, cheapest first:
|
|
/// L1 local `cusip_tickers.srf` (always; no network)
|
|
/// L2 server `GET /cusips` whole-file sync (if ZFIN_SERVER set)
|
|
/// L3 OpenFIGI batch lookup (whatever still misses)
|
|
///
|
|
/// `skip_network = true` restricts resolution to L1 (the local
|
|
/// cache) — for offline mode (`--refresh-data=never`). L2/L3 and
|
|
/// the persist-back are skipped entirely; cached CUSIPs still
|
|
/// resolve, uncached ones stay unresolved.
|
|
///
|
|
/// Best-effort: network failures degrade to "fewer entries
|
|
/// resolved" rather than erroring. The returned `CusipTickerMap` is
|
|
/// a zero-copy view over the (possibly just-rewritten) local file
|
|
/// and covers every CUSIP any tier could resolve. Callers resolve
|
|
/// forward-per-holding: look up each holding's CUSIP against it,
|
|
/// which sidesteps the "do I have every CUSIP for this ticker?"
|
|
/// completeness problem entirely.
|
|
///
|
|
/// Empty/duplicate CUSIPs in `cusips` are ignored. The caller owns
|
|
/// the returned map (`deinit`); pass a scratch allocator to scope
|
|
/// it to a single command invocation.
|
|
pub fn resolveCusips(self: *DataService, allocator: std.mem.Allocator, cusips: []const []const u8, skip_network: bool) CusipTickerMap {
|
|
var result = self.loadCusipTickerMap(allocator);
|
|
|
|
// Offline mode serves only L1. Also the warm-cache fast path:
|
|
// when nothing is missing there's no scratch, no network, no
|
|
// rewrite.
|
|
if (skip_network or !anyMissing(result, cusips)) return result;
|
|
|
|
// Scratch arena for minted entries; decouples their lifetime
|
|
// from the server body / OpenFIGI result buffers freed below.
|
|
var scratch = std.heap.ArenaAllocator.init(self.allocator);
|
|
defer scratch.deinit();
|
|
const sa = scratch.allocator();
|
|
var minted = std.StringHashMap([]const u8).init(sa); // cusip -> ticker
|
|
|
|
// L2: server whole-file sync. Degrades to no-op until the
|
|
// `GET /cusips` route exists (a 404 surfaces as NotFound from
|
|
// client.get); when it lands it's purely additive — no change
|
|
// here. The server is expected to serve the file via its
|
|
// existing `handleStaticSrfFile` machinery (same shape as
|
|
// `/_edgar/tickers_funds`).
|
|
if (self.config.server_url) |server_url| {
|
|
if (self.fetchServerCusips(server_url)) |body| {
|
|
defer self.allocator.free(body);
|
|
mergeCusipBody(sa, &minted, result, body);
|
|
}
|
|
}
|
|
|
|
// L3: OpenFIGI for whatever still misses.
|
|
self.mintMissingViaOpenFigi(sa, &minted, result, cusips);
|
|
|
|
if (minted.count() == 0) return result; // nothing new learned
|
|
|
|
// Persist the union, then reload so the returned map is a clean
|
|
// single-buffer zero-copy view over the updated file.
|
|
var ents: std.ArrayList(CusipEntry) = .empty;
|
|
// Reserve up front so the collection loop is infallible. On OOM
|
|
// (vanishingly unlikely for a small list), skip persistence and
|
|
// return the L1 view — some CUSIPs stay unresolved this run
|
|
// rather than erroring.
|
|
ents.ensureTotalCapacity(sa, minted.count()) catch return result;
|
|
var mit = minted.iterator();
|
|
while (mit.next()) |kv| ents.appendAssumeCapacity(.{ .cusip = kv.key_ptr.*, .ticker = kv.value_ptr.* });
|
|
self.appendCusipEntries(ents.items);
|
|
|
|
result.deinit();
|
|
return self.loadCusipTickerMap(allocator);
|
|
}
|
|
|
|
/// True if any non-empty CUSIP in `cusips` is absent from `map`.
|
|
fn anyMissing(map: CusipTickerMap, cusips: []const []const u8) bool {
|
|
for (cusips) |c| {
|
|
if (c.len == 0) continue;
|
|
if (!map.contains(c)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// Merge a CUSIP->ticker SRF body (as served by `GET /cusips`) into
|
|
/// `out`, skipping any CUSIP already present in `have` or `out`.
|
|
/// Strings are duped into `arena`. Pure with respect to I/O, so it's
|
|
/// unit-tested directly with fixture bytes (the live L2 path can't
|
|
/// be exercised until the server route exists).
|
|
fn mergeCusipBody(arena: std.mem.Allocator, out: *std.StringHashMap([]const u8), have: CusipTickerMap, body: []const u8) void {
|
|
var reader = std.Io.Reader.fixed(body);
|
|
var it = srf.iterator(&reader, arena, .{ .parse_allocator = .none }) catch return;
|
|
defer it.deinit();
|
|
while (it.next() catch return) |fields| {
|
|
const e = fields.to(CusipEntry, .{}) catch continue;
|
|
if (e.cusip.len == 0 or e.ticker.len == 0) continue;
|
|
if (have.contains(e.cusip) or out.contains(e.cusip)) continue;
|
|
const kc = arena.dupe(u8, e.cusip) catch continue;
|
|
const vc = arena.dupe(u8, e.ticker) catch continue;
|
|
out.put(kc, vc) catch continue;
|
|
}
|
|
}
|
|
|
|
/// L2 seam: fetch the whole CUSIP->ticker map from the server via
|
|
/// `GET {server}/cusips`. Returns the raw SRF body (caller frees
|
|
/// with `self.allocator`) or null on any failure. Best-effort: no
|
|
/// retry and no torn-body archival (this is a shared reference
|
|
/// file, not per-symbol cache) — a bad/absent response just
|
|
/// degrades to the OpenFIGI tier.
|
|
fn fetchServerCusips(self: *DataService, server_url: []const u8) ?[]u8 {
|
|
const url = std.fmt.allocPrint(self.allocator, "{s}/cusips", .{server_url}) catch return null;
|
|
defer self.allocator.free(url);
|
|
|
|
var client = http.Client.init(self.io, self.allocator);
|
|
defer client.deinit();
|
|
|
|
var response = client.get(url) catch |err| {
|
|
log.debug("cusips server sync failed: {s}", .{@errorName(err)});
|
|
return null;
|
|
};
|
|
defer response.deinit();
|
|
|
|
if (!cache.Store.looksCompleteSrf(response.body)) {
|
|
log.debug("cusips server response not complete SRF ({d} bytes) — ignoring", .{response.body.len});
|
|
return null;
|
|
}
|
|
return self.allocator.dupe(u8, response.body) catch null;
|
|
}
|
|
|
|
/// L3: resolve still-missing CUSIPs through OpenFIGI (batched 100
|
|
/// per request, the API's job limit), recording hits into `out`
|
|
/// (duped into `arena`). De-dups the lookup set against `have`,
|
|
/// `out`, and itself. Best-effort: a failed batch logs and is
|
|
/// skipped; remaining batches still run.
|
|
fn mintMissingViaOpenFigi(self: *DataService, arena: std.mem.Allocator, out: *std.StringHashMap([]const u8), have: CusipTickerMap, cusips: []const []const u8) void {
|
|
var seen = std.StringHashMap(void).init(arena);
|
|
var to_lookup: std.ArrayList([]const u8) = .empty;
|
|
for (cusips) |c| {
|
|
if (c.len == 0) continue;
|
|
if (have.contains(c) or out.contains(c)) continue;
|
|
const gop = seen.getOrPut(c) catch continue;
|
|
if (gop.found_existing) continue;
|
|
to_lookup.append(arena, c) catch continue;
|
|
}
|
|
if (to_lookup.items.len == 0) return;
|
|
|
|
const batch_size = 100; // OpenFIGI accepts up to 100 jobs/request.
|
|
var start: usize = 0;
|
|
while (start < to_lookup.items.len) : (start += batch_size) {
|
|
const end = @min(start + batch_size, to_lookup.items.len);
|
|
const batch = to_lookup.items[start..end];
|
|
|
|
const figi = self.lookupCusips(batch) catch |err| {
|
|
log.warn("resolveCusips: OpenFIGI lookup of {d} CUSIP(s) failed: {s}", .{ batch.len, @errorName(err) });
|
|
continue;
|
|
};
|
|
defer {
|
|
for (figi) |r| {
|
|
if (r.ticker) |t| self.allocator.free(t);
|
|
if (r.name) |n| self.allocator.free(n);
|
|
if (r.security_type) |s| self.allocator.free(s);
|
|
}
|
|
self.allocator.free(figi);
|
|
}
|
|
|
|
// Results are parallel to `batch` (same length + order).
|
|
for (figi, 0..) |r, i| {
|
|
if (!r.found) continue;
|
|
const ticker = r.ticker orelse continue;
|
|
const kc = arena.dupe(u8, batch[i]) catch continue;
|
|
const vc = arena.dupe(u8, ticker) catch continue;
|
|
out.put(kc, vc) catch continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Utility ──────────────────────────────────────────────────
|
|
|
|
/// Sleep before retrying after a rate limit error.
|
|
/// Uses the provider's rate limiter if available, otherwise a fixed 10s backoff.
|
|
fn rateLimitBackoff(self: *DataService) void {
|
|
if (self.td) |*td| {
|
|
td.rate_limiter.backoff();
|
|
} else {
|
|
std.Io.sleep(self.io, std.Io.Duration.fromSeconds(10), .awake) catch |err| log.debug("rate-limit backoff sleep interrupted: {t}", .{err});
|
|
}
|
|
}
|
|
|
|
// ── Server sync ──────────────────────────────────────────────
|
|
|
|
/// Try to sync a cache file from the configured zfin-server.
|
|
/// Returns true if the file was successfully synced, false on any error.
|
|
/// Silently returns false if no server is configured.
|
|
///
|
|
/// Applies a single retry with a short delay when the first attempt
|
|
/// fails at the HTTP layer OR produces a torn body (integrity
|
|
/// mismatch / `looksCompleteSrf` rejection). Motivation: refreshes
|
|
/// fan out 20+ symbols across 8 parallel threads, and the tear
|
|
/// pattern we've observed so far looks transient per-connection.
|
|
/// One retry papers over single-packet hiccups without dramatically
|
|
/// extending refresh wall time. If the retry also fails the
|
|
/// archive grows by one more `.bin`/`.meta` pair — two captures
|
|
/// from the same refresh are the most valuable diagnostic signal
|
|
/// we can produce (same body shape? same byte offset? same time
|
|
/// delta? all answers we can't get from a single failure).
|
|
fn syncFromServer(self: *DataService, symbol: []const u8, data_type: cache.DataType) bool {
|
|
const server_url = self.config.server_url orelse return false;
|
|
const endpoint = switch (data_type) {
|
|
.candles_daily => "/candles",
|
|
.candles_meta => "/candles_meta",
|
|
.dividends => "/dividends",
|
|
.earnings => "/earnings",
|
|
.options => "/options",
|
|
.splits => "/splits",
|
|
.meta => return false,
|
|
.classification => "/classification",
|
|
.etf_metrics => "/etf_metrics",
|
|
.entity_facts => "/entity_facts",
|
|
// Provider-internal cache files (ticker-map indexes)
|
|
// are not served — clients fetch them directly from
|
|
// the SEC. The DataService caches the JSON via
|
|
// `Store` after fetching; the server has no role.
|
|
.tickers_funds, .tickers_companies => return false,
|
|
};
|
|
|
|
const full_url = std.fmt.allocPrint(self.allocator, "{s}/{s}{s}", .{ server_url, symbol, endpoint }) catch return false;
|
|
defer self.allocator.free(full_url);
|
|
|
|
const max_attempts: u8 = 2;
|
|
const retry_delay_ms: u64 = 250;
|
|
|
|
var attempt: u8 = 0;
|
|
while (attempt < max_attempts) : (attempt += 1) {
|
|
if (attempt > 0) {
|
|
log.debug(
|
|
"{s}: retrying {s} server sync (attempt {d}/{d}) after {d}ms delay",
|
|
.{ symbol, @tagName(data_type), attempt + 1, max_attempts, retry_delay_ms },
|
|
);
|
|
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(retry_delay_ms), .awake) catch |err| log.debug("syncFromServer retry-delay sleep interrupted: {t}", .{err});
|
|
}
|
|
switch (self.tryOneSync(symbol, data_type, full_url)) {
|
|
.ok => return true,
|
|
// Torn or network error — retry if attempts remain.
|
|
.torn, .net_err => {},
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
const SyncAttempt = enum { ok, torn, net_err };
|
|
|
|
/// One attempt at syncing a file from the server. Archives a torn
|
|
/// body when detected but does NOT retry — the caller decides that.
|
|
fn tryOneSync(self: *DataService, symbol: []const u8, data_type: cache.DataType, full_url: []const u8) SyncAttempt {
|
|
// Per-attempt start/finish trace. The "started" line emits
|
|
// before any blocking call; the "finished" line emits on every
|
|
// exit path. If a sync wedges in `client.get`, you'll see the
|
|
// started line with no matching finished line — the missing
|
|
// finished entries identify which symbols are stuck. Pair this
|
|
// with the per-stage `http: stage=...` lines from `net/http.zig`
|
|
// to pinpoint which transport stage stalled.
|
|
//
|
|
// wall-clock required: per-attempt elapsed for diagnosing
|
|
// partial-success/stall patterns under parallel fan-out.
|
|
// `.awake` (monotonic) avoids spurious negatives on clock skew.
|
|
const t_start = std.Io.Timestamp.now(self.io, .awake).nanoseconds;
|
|
log.debug("{s}: tryOneSync started ({s})", .{ symbol, @tagName(data_type) });
|
|
|
|
var client = http.Client.init(self.io, self.allocator);
|
|
defer client.deinit();
|
|
|
|
var response = client.get(full_url) catch |err| {
|
|
const elapsed_ms = @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms);
|
|
// Operator-visible: surfaces meaningful failures
|
|
// (`NoAddressReturned`, `ConnectionRefused`,
|
|
// `TlsInitializationFailed`, etc.) instead of swallowing
|
|
// them. Network-shaped errors are exactly what the user
|
|
// needs to see when sync stops working — keeping this at
|
|
// debug level meant a DNS-truncation bug was visible only
|
|
// to anyone running with debug logging on, which cost
|
|
// hours of diagnosis time.
|
|
log.warn("{s}: server sync failed for {s}: {s} (elapsed_ms={d})", .{ symbol, @tagName(data_type), @errorName(err), elapsed_ms });
|
|
log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), elapsed_ms });
|
|
return .net_err;
|
|
};
|
|
defer response.deinit();
|
|
|
|
// Integrity check: if the server advertised an ETag in
|
|
// `"sha256:<hex>"` form, compare the body's actual sha256
|
|
// against it. Catches mid-stream truncation that Zig's
|
|
// std.http.Client.fetch silently accepts on the Content-Length
|
|
// path (EndOfStream from a cut transport is swallowed as a
|
|
// normal termination). Archive the mismatching body with the
|
|
// advertised etag so post-mortem can see exactly what was
|
|
// promised vs what arrived. Deployments with no ETag or a
|
|
// non-sha256 etag fall through to `looksCompleteSrf` below
|
|
// (backward-compatible with pre-fix servers).
|
|
switch (response.verifyIntegrity()) {
|
|
.mismatch => |m| {
|
|
cache.Store.archiveTornBody(
|
|
self.io,
|
|
self.allocator,
|
|
self.config.cache_dir,
|
|
symbol,
|
|
data_type,
|
|
response.body,
|
|
.{
|
|
.failure_reason = .etag_mismatch,
|
|
.http_status = @intFromEnum(response.status),
|
|
.server_url = full_url,
|
|
.server_etag = response.etag,
|
|
},
|
|
) catch |err| {
|
|
log.debug(
|
|
"{s}: failed to archive etag-mismatch {s} body: {s}",
|
|
.{ symbol, @tagName(data_type), @errorName(err) },
|
|
);
|
|
};
|
|
log.debug(
|
|
"{s}: {s} server response failed integrity check ({d} bytes, expected sha256={s}, actual={s}) — archived under _torn/, not writing to cache",
|
|
.{ symbol, @tagName(data_type), response.body.len, m.expected_hex, m.actual_hex },
|
|
);
|
|
log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
|
return .torn;
|
|
},
|
|
.ok, .not_applicable => {},
|
|
}
|
|
|
|
// Validate the response body looks like a complete SRF file before
|
|
// writing it to cache. This guards against HTTP body truncation
|
|
// (TCP reset, Content-Length mismatch, proxy that flushed a
|
|
// partial response, etc.) — torn bodies get written atomically
|
|
// to the cache otherwise, producing the classic SRF parse error
|
|
// on the next read:
|
|
// error(srf): custom parse of value YYYY-MM failed : InvalidDateFormat
|
|
//
|
|
// When the check rejects a body, archive the raw bytes + context
|
|
// under `{cache_dir}/_torn/` so the next time this recurs we
|
|
// have ammunition for root-cause analysis. The log line is kept
|
|
// at debug level on purpose — user explicitly asked that routine
|
|
// rejections not be noisy in production runs. The `.meta`
|
|
// sidecar on disk is the durable signal.
|
|
if (!cache.Store.looksCompleteSrf(response.body)) {
|
|
cache.Store.archiveTornBody(
|
|
self.io,
|
|
self.allocator,
|
|
self.config.cache_dir,
|
|
symbol,
|
|
data_type,
|
|
response.body,
|
|
.{
|
|
.failure_reason = .looks_complete_srf_failed,
|
|
.http_status = @intFromEnum(response.status),
|
|
.server_url = full_url,
|
|
.server_etag = response.etag,
|
|
},
|
|
) catch |err| {
|
|
log.debug(
|
|
"{s}: failed to archive torn {s} body: {s}",
|
|
.{ symbol, @tagName(data_type), @errorName(err) },
|
|
);
|
|
};
|
|
log.debug(
|
|
"{s}: rejecting torn {s} server response ({d} bytes) — archived under _torn/, not writing to cache",
|
|
.{ symbol, @tagName(data_type), response.body.len },
|
|
);
|
|
log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
|
return .torn;
|
|
}
|
|
|
|
// Write to local cache
|
|
var s = self.store();
|
|
s.writeRaw(symbol, data_type, response.body) catch |err| {
|
|
log.debug("{s}: failed to write synced {s} to cache: {s}", .{ symbol, @tagName(data_type), @errorName(err) });
|
|
log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
|
return .net_err;
|
|
};
|
|
log.debug("{s}: synced {s} from server ({d} bytes)", .{ symbol, @tagName(data_type), response.body.len });
|
|
log.debug("{s}: tryOneSync finished ({s}) result=ok elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
|
return .ok;
|
|
}
|
|
|
|
/// Sync candle data (both daily and meta) from the server.
|
|
fn syncCandlesFromServer(self: *DataService, symbol: []const u8) bool {
|
|
const daily = self.syncFromServer(symbol, .candles_daily);
|
|
const meta = self.syncFromServer(symbol, .candles_meta);
|
|
return daily and meta;
|
|
}
|
|
|
|
/// Mutual funds use 5-letter tickers ending in X (e.g. FDSCX, VSTCX, FAGIX).
|
|
/// These don't have quarterly earnings — skip the fetch rather than
|
|
/// round-tripping to the provider just to get an empty response.
|
|
fn isMutualFund(symbol: []const u8) bool {
|
|
return symbol.len == 5 and symbol[4] == 'X';
|
|
}
|
|
|
|
// ── User config files ─────────────────────────────────────────
|
|
|
|
/// Load and parse accounts.srf from the same directory as the given portfolio path.
|
|
/// Returns null if the file doesn't exist or can't be parsed.
|
|
/// Caller owns the returned AccountMap and must call deinit().
|
|
pub fn loadAccountMap(self: *DataService, allocator: std.mem.Allocator, portfolio_path: []const u8) ?analysis.AccountMap {
|
|
const dir_end = if (std.mem.lastIndexOfScalar(u8, portfolio_path, std.fs.path.sep)) |idx| idx + 1 else 0;
|
|
const acct_path = std.fmt.allocPrint(self.allocator, "{s}accounts.srf", .{portfolio_path[0..dir_end]}) catch return null;
|
|
defer self.allocator.free(acct_path);
|
|
|
|
const data = std.Io.Dir.cwd().readFileAlloc(self.io, acct_path, self.allocator, .limited(1024 * 1024)) catch return null;
|
|
defer self.allocator.free(data);
|
|
|
|
return analysis.parseAccountsFile(allocator, data) catch null;
|
|
}
|
|
|
|
/// Load and parse `transaction_log.srf` from the same directory as
|
|
/// the given portfolio path. Returns null if the file doesn't
|
|
/// exist or can't be parsed — the contributions pipeline falls
|
|
/// back to the pre-transaction-log behavior (no transfer netting)
|
|
/// when null is returned.
|
|
///
|
|
/// Caller owns the returned `TransactionLog` and must call
|
|
/// `deinit()`.
|
|
pub fn loadTransferLog(self: *DataService, portfolio_path: []const u8) ?transaction_log.TransactionLog {
|
|
const dir_end = if (std.mem.lastIndexOfScalar(u8, portfolio_path, std.fs.path.sep)) |idx| idx + 1 else 0;
|
|
const path = std.fmt.allocPrint(self.allocator, "{s}transaction_log.srf", .{portfolio_path[0..dir_end]}) catch return null;
|
|
defer self.allocator.free(path);
|
|
|
|
const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, self.allocator, .limited(1024 * 1024)) catch return null;
|
|
defer self.allocator.free(data);
|
|
|
|
return transaction_log.parseTransactionLogFile(self.allocator, data) catch null;
|
|
}
|
|
};
|
|
|
|
// ── Tests ─────────────────────────────────────────────────────────
|
|
|
|
test "isPermanentProviderFailure: NotFound is permanent" {
|
|
try std.testing.expect(isPermanentProviderFailure(error.NotFound));
|
|
}
|
|
|
|
test "isPermanentProviderFailure: RequestFailed is transient" {
|
|
try std.testing.expect(!isPermanentProviderFailure(error.RequestFailed));
|
|
}
|
|
|
|
test "isPermanentProviderFailure: ServerError is transient" {
|
|
try std.testing.expect(!isPermanentProviderFailure(error.ServerError));
|
|
}
|
|
|
|
test "isPermanentProviderFailure: Unauthorized is transient" {
|
|
// Auth misconfigs are user-fixable (set the API key); not a reason
|
|
// to permanently suppress retries.
|
|
try std.testing.expect(!isPermanentProviderFailure(error.Unauthorized));
|
|
}
|
|
|
|
test "isPermanentProviderFailure: InvalidResponse is transient" {
|
|
// Parse errors are usually a provider format change or one-off
|
|
// garbage response — retrying later is fine.
|
|
try std.testing.expect(!isPermanentProviderFailure(error.InvalidResponse));
|
|
}
|
|
|
|
test "isPermanentProviderFailure: PaymentRequired is transient" {
|
|
// FMP marks plan-locked symbols with HTTP 402; user can upgrade
|
|
// their plan or rotate providers, so don't poison the cache.
|
|
try std.testing.expect(!isPermanentProviderFailure(error.PaymentRequired));
|
|
}
|
|
|
|
test "isPermanentProviderFailure: RateLimited is transient" {
|
|
// Rate-limit is the textbook transient case; the caller already
|
|
// handles it specially with backoff + retry.
|
|
try std.testing.expect(!isPermanentProviderFailure(error.RateLimited));
|
|
}
|
|
|
|
test "isMutualFund identifies mutual funds" {
|
|
// Standard mutual fund tickers (5 letters ending in X)
|
|
try std.testing.expect(DataService.isMutualFund("FDSCX"));
|
|
try std.testing.expect(DataService.isMutualFund("VSTCX"));
|
|
try std.testing.expect(DataService.isMutualFund("FAGIX"));
|
|
try std.testing.expect(DataService.isMutualFund("VFINX"));
|
|
|
|
// Not mutual funds
|
|
try std.testing.expect(!DataService.isMutualFund("AAPL"));
|
|
try std.testing.expect(!DataService.isMutualFund("VTI"));
|
|
try std.testing.expect(!DataService.isMutualFund("SPY"));
|
|
try std.testing.expect(!DataService.isMutualFund("GOOGL"));
|
|
try std.testing.expect(!DataService.isMutualFund("")); // empty
|
|
try std.testing.expect(!DataService.isMutualFund("X")); // too short
|
|
try std.testing.expect(!DataService.isMutualFund("FDSCA")); // 5 letters but not ending in X
|
|
try std.testing.expect(!DataService.isMutualFund("FDSCXA")); // 6 letters ending in A
|
|
}
|
|
|
|
test "DataService init/deinit lifecycle" {
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{
|
|
.cache_dir = "/tmp/zfin-test-cache",
|
|
};
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Should be able to access config
|
|
try std.testing.expectEqualStrings("/tmp/zfin-test-cache", svc.config.cache_dir);
|
|
// Providers should be null (lazy init)
|
|
try std.testing.expect(svc.td == null);
|
|
try std.testing.expect(svc.pg == null);
|
|
try std.testing.expect(svc.fmp == null);
|
|
try std.testing.expect(svc.yh == null);
|
|
try std.testing.expect(svc.tg == null);
|
|
}
|
|
|
|
test "DataService store helper creates valid store" {
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{
|
|
.cache_dir = "/tmp/zfin-test-cache",
|
|
};
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
const s = svc.store();
|
|
try std.testing.expectEqualStrings("/tmp/zfin-test-cache", s.cache_dir);
|
|
}
|
|
|
|
test "DataService getProvider returns NoApiKey without key" {
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{
|
|
.cache_dir = "/tmp/zfin-test-cache",
|
|
// No API keys set
|
|
};
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// TwelveData requires API key
|
|
const td_result = svc.getProvider(TwelveData);
|
|
try std.testing.expectError(DataError.NoApiKey, td_result);
|
|
|
|
// Polygon requires API key
|
|
const pg_result = svc.getProvider(Polygon);
|
|
try std.testing.expectError(DataError.NoApiKey, pg_result);
|
|
|
|
// Yahoo doesn't require API key
|
|
const yh_result = svc.getProvider(Yahoo);
|
|
try std.testing.expect(yh_result != error.NoApiKey);
|
|
}
|
|
|
|
test "DataService getProvider initializes provider with key" {
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{
|
|
.cache_dir = "/tmp/zfin-test-cache",
|
|
.tiingo_key = "test-tiingo-key",
|
|
};
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// First call initializes
|
|
const tg1 = try svc.getProvider(Tiingo);
|
|
try std.testing.expect(svc.tg != null);
|
|
|
|
// Second call returns same instance
|
|
const tg2 = try svc.getProvider(Tiingo);
|
|
try std.testing.expect(tg1 == tg2);
|
|
}
|
|
|
|
test "DataService LoadAllResult default values" {
|
|
const allocator = std.testing.allocator;
|
|
var result = DataService.LoadAllResult{
|
|
.prices = std.StringHashMap(f64).init(allocator),
|
|
.cached_count = 0,
|
|
.server_synced_count = 0,
|
|
.provider_fetched_count = 0,
|
|
.stale_count = 0,
|
|
.failed_count = 0,
|
|
.latest_date = null,
|
|
};
|
|
defer result.deinit();
|
|
|
|
try std.testing.expectEqual(@as(usize, 0), result.prices.count());
|
|
}
|
|
|
|
test "FetchResult type construction" {
|
|
// Verify FetchResult works for different types
|
|
const candle_result = FetchResult(Candle){
|
|
.data = &.{},
|
|
.source = .cached,
|
|
.timestamp = 0,
|
|
.allocator = std.testing.allocator,
|
|
};
|
|
try std.testing.expect(candle_result.source == .cached);
|
|
|
|
const div_result = FetchResult(Dividend){
|
|
.data = &.{},
|
|
.source = .fetched,
|
|
.timestamp = 12345,
|
|
.allocator = std.testing.allocator,
|
|
};
|
|
try std.testing.expect(div_result.source == .fetched);
|
|
try std.testing.expectEqual(@as(i64, 12345), div_result.timestamp);
|
|
}
|
|
|
|
test "FetchOptions default is fully permissive" {
|
|
// Default-init should allow normal fetch behavior.
|
|
const opts: FetchOptions = .{};
|
|
try std.testing.expect(!opts.skip_network);
|
|
try std.testing.expect(!opts.force_refresh);
|
|
}
|
|
|
|
test "LoadAllConfig.fetchOptions maps fields through" {
|
|
const cfg = DataService.LoadAllConfig{
|
|
.force_refresh = true,
|
|
.skip_network = false,
|
|
};
|
|
const opts = cfg.fetchOptions();
|
|
try std.testing.expect(opts.force_refresh);
|
|
try std.testing.expect(!opts.skip_network);
|
|
|
|
const cfg2 = DataService.LoadAllConfig{
|
|
.skip_network = true,
|
|
};
|
|
const opts2 = cfg2.fetchOptions();
|
|
try std.testing.expect(opts2.skip_network);
|
|
try std.testing.expect(!opts2.force_refresh);
|
|
}
|
|
|
|
test "getCandles offline mode returns cached data without network" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
// Construct a service with a cache pre-populated with candle data.
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Pre-populate cache via the Store API.
|
|
var store = svc.store();
|
|
var candles = [_]Candle{
|
|
.{ .date = Date.fromYmd(2026, 5, 19), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 },
|
|
.{ .date = Date.fromYmd(2026, 5, 20), .open = 104, .high = 106, .low = 103, .close = 105, .adj_close = 105, .volume = 1100 },
|
|
};
|
|
store.cacheCandles("TEST", candles[0..], .tiingo, 0);
|
|
|
|
// Set the test guard: any network call would panic. We expect
|
|
// the offline-mode path NOT to touch the network.
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
const result = try svc.getCandles("TEST", .{ .skip_network = true });
|
|
defer result.deinit();
|
|
|
|
try std.testing.expectEqual(@as(usize, 2), result.data.len);
|
|
try std.testing.expect(result.data[0].date.eql(Date.fromYmd(2026, 5, 19)));
|
|
try std.testing.expect(result.data[1].date.eql(Date.fromYmd(2026, 5, 20)));
|
|
try std.testing.expectEqual(Source.cached, result.source);
|
|
}
|
|
|
|
test "getCandles offline mode with no cache returns FetchFailed" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Network guard is on. With no cache and skip_network=true,
|
|
// we must return FetchFailed without panicking.
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
const err = svc.getCandles("NEVERHEARDOFIT", .{ .skip_network = true });
|
|
try std.testing.expectError(DataError.FetchFailed, err);
|
|
}
|
|
|
|
test "fetchCached offline mode returns stale-cached data" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Pre-populate dividend cache with a TTL in the past (stale).
|
|
var store = svc.store();
|
|
var divs = [_]Dividend{
|
|
.{ .ex_date = Date.fromYmd(2026, 3, 15), .amount = 0.50, .type = .regular },
|
|
};
|
|
// Manually set TTL to 1 second (long since expired) by writing
|
|
// through writeWithSource with a tiny TTL.
|
|
store.writeWithSource(Dividend, "TEST", divs[0..], .{ .seconds = -1_000_000 }, "test");
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
// Even though the cache is stale, skip_network must return it
|
|
// rather than touching the network.
|
|
const result = try svc.getDividends("TEST", .{ .skip_network = true });
|
|
defer result.deinit();
|
|
|
|
try std.testing.expectEqual(@as(usize, 1), result.data.len);
|
|
try std.testing.expectEqual(Source.cached, result.source);
|
|
}
|
|
|
|
test "getQuote offline mode returns FetchFailed (quotes never cached)" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
// Quotes have no cache to fall back to in offline mode.
|
|
const err = svc.getQuote("AAPL", .{ .skip_network = true });
|
|
try std.testing.expectError(DataError.FetchFailed, err);
|
|
}
|
|
|
|
test "loadAllPrices offline mode skips network and returns cached" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var store = svc.store();
|
|
// Symbol with fresh cache.
|
|
var fresh_candles = [_]Candle{
|
|
.{ .date = Date.fromYmd(2026, 5, 20), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 },
|
|
};
|
|
store.cacheCandles("FRESH", fresh_candles[0..], .tiingo, 0);
|
|
|
|
// Symbol with no cache at all.
|
|
// (no setup needed — just passes a symbol that doesn't exist)
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
const symbols = [_][]const u8{ "FRESH", "MISSING" };
|
|
var result = svc.loadAllPrices(
|
|
symbols[0..],
|
|
&.{},
|
|
.{ .skip_network = true },
|
|
null,
|
|
null,
|
|
);
|
|
defer result.prices.deinit();
|
|
|
|
// FRESH should resolve from cache.
|
|
try std.testing.expect(result.prices.contains("FRESH"));
|
|
try std.testing.expectEqual(@as(f64, 104), result.prices.get("FRESH").?);
|
|
// MISSING should not be in the prices map.
|
|
try std.testing.expect(!result.prices.contains("MISSING"));
|
|
// failed_count should reflect MISSING.
|
|
try std.testing.expectEqual(@as(usize, 1), result.failed_count);
|
|
}
|
|
|
|
test "loadAllPrices force_refresh tops up without wiping the candle cache" {
|
|
// Regression: force_refresh must mean "ignore TTL + incremental
|
|
// top-up", NOT "delete the cache and re-download from scratch".
|
|
// The old behavior invalidated (deleted) candles_daily before the
|
|
// fetch, which forced a full network re-download. With the cache
|
|
// already covering through today, force_refresh must serve from
|
|
// the surviving cache and touch no network.
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var store = svc.store();
|
|
// Dated far in the future so getCandles' "last cached date is
|
|
// today-or-later" branch fires deterministically regardless of the
|
|
// test clock — an incremental fetch would have nothing to pull and
|
|
// never reaches the network.
|
|
var candles = [_]Candle{
|
|
.{ .date = Date.fromYmd(2099, 12, 31), .open = 100, .high = 105, .low = 99, .close = 104, .adj_close = 104, .volume = 1000 },
|
|
};
|
|
store.cacheCandles("HELD", candles[0..], .tiingo, 0);
|
|
|
|
// Any provider/network attempt now panics. If force_refresh wiped
|
|
// the cache (old behavior), getCandles would fall through to a full
|
|
// re-fetch and trip this.
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
const symbols = [_][]const u8{"HELD"};
|
|
var result = svc.loadAllPrices(
|
|
symbols[0..],
|
|
&.{},
|
|
.{ .force_refresh = true },
|
|
null,
|
|
null,
|
|
);
|
|
defer result.prices.deinit();
|
|
|
|
// Served from the (un-wiped) cache.
|
|
try std.testing.expect(result.prices.contains("HELD"));
|
|
try std.testing.expectEqual(@as(f64, 104), result.prices.get("HELD").?);
|
|
// The candle cache survived the force-refresh.
|
|
try std.testing.expect(svc.getCachedLastClose("HELD") != null);
|
|
}
|
|
|
|
test "getClassification: skip_network with no cache returns FetchFailed" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const err = svc.getClassification("NEVERHEARDOFIT", .{ .skip_network = true });
|
|
try std.testing.expectError(DataError.FetchFailed, err);
|
|
}
|
|
|
|
test "getClassification: cache hit returns cached data without network" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Pre-populate the classification cache.
|
|
var s = svc.store();
|
|
var records = [_]Wikidata.ClassificationRecord{.{
|
|
.symbol = "AAPL",
|
|
.name = "Apple Inc.",
|
|
.country = "US",
|
|
.as_of = "2026-05-25",
|
|
.source = "wikidata",
|
|
}};
|
|
s.write(Wikidata.ClassificationRecord, "AAPL", records[0..], .{ .seconds = cache.Ttl.classification });
|
|
|
|
// Network guard on — must return from cache without touching network.
|
|
svc.panic_on_network_attempt = true;
|
|
const result = try svc.getClassification("AAPL", .{});
|
|
defer result.deinit();
|
|
try std.testing.expectEqual(@as(usize, 1), result.data.len);
|
|
try std.testing.expectEqualStrings("AAPL", result.data[0].symbol);
|
|
try std.testing.expectEqualStrings("Apple Inc.", result.data[0].name.?);
|
|
try std.testing.expectEqual(Source.cached, result.source);
|
|
}
|
|
|
|
test "populateGeo: country US -> geo US" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var record: Wikidata.ClassificationRecord = .{
|
|
.symbol = try allocator.dupe(u8, "TEST"),
|
|
.country = try allocator.dupe(u8, "US"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
defer record.deinit(allocator);
|
|
|
|
try svc.populateGeo(&record);
|
|
try std.testing.expect(record.geo != null);
|
|
try std.testing.expectEqualStrings("US", record.geo.?);
|
|
}
|
|
|
|
test "populateGeo: country GB -> geo International Developed" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var record: Wikidata.ClassificationRecord = .{
|
|
.symbol = try allocator.dupe(u8, "TEST"),
|
|
.country = try allocator.dupe(u8, "GB"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
defer record.deinit(allocator);
|
|
|
|
try svc.populateGeo(&record);
|
|
try std.testing.expect(record.geo != null);
|
|
try std.testing.expectEqualStrings("International Developed", record.geo.?);
|
|
}
|
|
|
|
test "populateGeo: null country -> noop" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var record: Wikidata.ClassificationRecord = .{
|
|
.symbol = try allocator.dupe(u8, "TEST"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
defer record.deinit(allocator);
|
|
|
|
try svc.populateGeo(&record);
|
|
try std.testing.expectEqual(@as(?[]const u8, null), record.geo);
|
|
}
|
|
|
|
test "populateGeo: existing geo not overwritten" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var record: Wikidata.ClassificationRecord = .{
|
|
.symbol = try allocator.dupe(u8, "TEST"),
|
|
.country = try allocator.dupe(u8, "US"),
|
|
.geo = try allocator.dupe(u8, "Already Set"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
defer record.deinit(allocator);
|
|
|
|
try svc.populateGeo(&record);
|
|
try std.testing.expectEqualStrings("Already Set", record.geo.?);
|
|
}
|
|
|
|
test "getClassification: sparse Wikidata + EDGAR managed_fund hit produces merged record" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Seed both EDGAR ticker map caches with at least one entry
|
|
// each so the synthesizeClassification path doesn't try to
|
|
// fetch them (the load helpers treat empty cached slices as
|
|
// "miss" and fall through to a network fetch).
|
|
var s = svc.store();
|
|
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
|
|
.symbol = "FAGIX",
|
|
.cik = "0000275309",
|
|
}};
|
|
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
|
|
var co_entries = [_]Edgar.CompanyTickerEntry{.{
|
|
.symbol = "DUMMY",
|
|
.cik = "0000000001",
|
|
}};
|
|
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
|
|
|
|
// Seed an etf_metrics negative cache so getEtfMetrics doesn't
|
|
// try to fetch from the network.
|
|
s.writeNegative("FAGIX", .etf_metrics);
|
|
|
|
// Sparse Wikidata records (length 1, only name set -- not useful).
|
|
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
|
|
sparse[0] = .{
|
|
.symbol = try allocator.dupe(u8, "FAGIX"),
|
|
.name = try allocator.dupe(u8, "Test Fund"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
|
|
// Drive directly through synthesizeClassification (skip the
|
|
// Wikidata fetch). It takes ownership of `sparse`.
|
|
svc.panic_on_network_attempt = true; // any provider call -> panic
|
|
const merged = try svc.synthesizeClassification("FAGIX", sparse, .{ .skip_network = true });
|
|
defer Wikidata.ClassificationRecord.freeSlice(allocator, merged);
|
|
|
|
try std.testing.expectEqual(@as(usize, 1), merged.len);
|
|
const c = merged[0];
|
|
try std.testing.expectEqualStrings("FAGIX", c.symbol);
|
|
try std.testing.expect(c.is_etf);
|
|
try std.testing.expectEqualStrings("Fund", c.asset_class.?);
|
|
try std.testing.expectEqualStrings("US", c.country.?);
|
|
try std.testing.expectEqualStrings("US", c.geo.?);
|
|
try std.testing.expectEqualStrings("edgar_fallback", c.source);
|
|
// Wikidata's name preserved on merge.
|
|
try std.testing.expectEqualStrings("Test Fund", c.name.?);
|
|
}
|
|
|
|
test "synthesizeClassification: no EDGAR hit returns NotFound" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Seed both ticker maps with throwaway entries so the
|
|
// EDGAR lookup returns .none for our test symbol but doesn't
|
|
// try to fetch the maps from the network.
|
|
var s = svc.store();
|
|
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
|
|
.symbol = "DUMMY1",
|
|
.cik = "0000000001",
|
|
}};
|
|
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
|
|
var co_entries = [_]Edgar.CompanyTickerEntry{.{
|
|
.symbol = "DUMMY2",
|
|
.cik = "0000000002",
|
|
}};
|
|
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
|
|
|
|
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
|
|
sparse[0] = .{
|
|
.symbol = try allocator.dupe(u8, "NEVERHEARDOFIT"),
|
|
.name = try allocator.dupe(u8, "ghost"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
try std.testing.expectError(error.NotFound, svc.synthesizeClassification("NEVERHEARDOFIT", sparse, .{ .skip_network = true }));
|
|
}
|
|
|
|
test "synthesizeClassification: company_or_uit without ETF/TRUST keyword still routes to multi-row" {
|
|
// PTY shape: closed-end fund whose company_tickers title is
|
|
// "PIMCO CORPORATE & INCOME OPPORTUNITY FUND" -- no "ETF" or
|
|
// "TRUST" in the title, so lookupInTickerMaps returns
|
|
// .company_or_uit{is_etf=false}. But it's still fund-shaped
|
|
// and should produce multi-row metadata in enrich.
|
|
//
|
|
// The downstream signal for "fund-like, emit multi-row" is
|
|
// ClassificationRecord.is_etf. Set it to true for any
|
|
// EDGAR-found .company_or_uit hit (even when the title
|
|
// doesn't carry the ETF/TRUST keyword), so PTY-shape
|
|
// closed-end funds get the same treatment as ETFs.
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var s = svc.store();
|
|
// Throwaway MF entry so the MF lookup returns null.
|
|
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
|
|
.symbol = "DUMMY",
|
|
.cik = "0000000001",
|
|
}};
|
|
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
|
|
// PTY in the company map with NO ETF/TRUST in title.
|
|
var co_entries = [_]Edgar.CompanyTickerEntry{.{
|
|
.symbol = "PTY",
|
|
.cik = "0001202604",
|
|
.title = "PIMCO CORPORATE & INCOME OPPORTUNITY FUND",
|
|
}};
|
|
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
|
|
s.writeNegative("PTY", .etf_metrics);
|
|
|
|
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
|
|
sparse[0] = .{
|
|
.symbol = try allocator.dupe(u8, "PTY"),
|
|
.name = try allocator.dupe(u8, "PIMCO Corporate & Income Opportunity Fund"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const merged = try svc.synthesizeClassification("PTY", sparse, .{ .skip_network = true });
|
|
defer Wikidata.ClassificationRecord.freeSlice(allocator, merged);
|
|
|
|
try std.testing.expectEqual(@as(usize, 1), merged.len);
|
|
const c = merged[0];
|
|
// is_etf MUST be true so enrich routes through emitEtfRows
|
|
// (multi-row sleeve breakdown). The asset_class stays "Fund"
|
|
// because no ETF/TRUST keyword in title.
|
|
try std.testing.expect(c.is_etf);
|
|
try std.testing.expectEqualStrings("Fund", c.asset_class.?);
|
|
}
|
|
|
|
test "synthesizeClassification: NPORT-P series_name beats Wikidata's index name for funds" {
|
|
// SOXX shape: Wikidata returns the underlying INDEX name
|
|
// ("PHLX Semiconductor Sector") which is technically what the
|
|
// ticker symbol is for, but downstream consumers want the
|
|
// FUND name ("iShares Semiconductor ETF") that NPORT-P
|
|
// <seriesName> carries. Series_name is more authoritative
|
|
// for the fund itself.
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var s = svc.store();
|
|
var mf_entries = [_]Edgar.MutualFundTickerEntry{.{
|
|
.symbol = "DUMMY",
|
|
.cik = "0000000001",
|
|
}};
|
|
s.write(Edgar.MutualFundTickerEntry, "_edgar", mf_entries[0..], cache.DataType.tickers_funds.ttl());
|
|
var co_entries = [_]Edgar.CompanyTickerEntry{.{
|
|
.symbol = "SOXX",
|
|
.cik = "0001100663",
|
|
.title = "iShares Trust",
|
|
}};
|
|
s.write(Edgar.CompanyTickerEntry, "_edgar", co_entries[0..], cache.DataType.tickers_companies.ttl());
|
|
|
|
// Pre-seed etf_metrics with a profile row carrying the
|
|
// NPORT-P seriesName.
|
|
var etf_records = [_]Edgar.EtfMetricRecord{
|
|
.{ .profile = .{
|
|
.symbol = try allocator.dupe(u8, "SOXX"),
|
|
.series_name = try allocator.dupe(u8, "iShares Semiconductor ETF"),
|
|
.cik = try allocator.dupe(u8, "0001100663"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "edgar"),
|
|
} },
|
|
};
|
|
defer for (etf_records) |r| r.deinit(allocator);
|
|
s.write(Edgar.EtfMetricRecord, "SOXX", etf_records[0..], cache.DataType.etf_metrics.ttl());
|
|
|
|
// Wikidata returned only the index name (sparse).
|
|
var sparse = try allocator.alloc(Wikidata.ClassificationRecord, 1);
|
|
sparse[0] = .{
|
|
.symbol = try allocator.dupe(u8, "SOXX"),
|
|
.name = try allocator.dupe(u8, "PHLX Semiconductor Sector"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "wikidata"),
|
|
};
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const merged = try svc.synthesizeClassification("SOXX", sparse, .{ .skip_network = true });
|
|
defer Wikidata.ClassificationRecord.freeSlice(allocator, merged);
|
|
|
|
try std.testing.expectEqual(@as(usize, 1), merged.len);
|
|
const c = merged[0];
|
|
// Series_name from NPORT-P wins -- not Wikidata's index name.
|
|
try std.testing.expectEqualStrings("iShares Semiconductor ETF", c.name.?);
|
|
}
|
|
|
|
test "getEntityFacts: skip_network with no cache returns FetchFailed" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const err = svc.getEntityFacts("0000999999", .{ .skip_network = true });
|
|
try std.testing.expectError(DataError.FetchFailed, err);
|
|
}
|
|
|
|
test "getEntityFacts: cache hit returns cached shares-outstanding" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var s = svc.store();
|
|
var records = [_]Edgar.EntityFactRecord{
|
|
.{ .shares_outstanding = .{
|
|
.symbol = "",
|
|
.shares_outstanding = 14687356000,
|
|
.period_end = "2026-04-17",
|
|
.form = "10-Q",
|
|
.cik = "0000320193",
|
|
.as_of = "2026-05-25",
|
|
.source = "edgar_xbrl",
|
|
} },
|
|
};
|
|
s.write(Edgar.EntityFactRecord, "0000320193", records[0..], .{ .seconds = cache.Ttl.entity_facts });
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const result = try svc.getEntityFacts("0000320193", .{});
|
|
defer result.deinit();
|
|
try std.testing.expectEqual(@as(usize, 1), result.data.len);
|
|
switch (result.data[0]) {
|
|
.shares_outstanding => |so| {
|
|
try std.testing.expectEqual(@as(u64, 14687356000), so.shares_outstanding);
|
|
try std.testing.expectEqualStrings("0000320193", so.cik);
|
|
},
|
|
}
|
|
try std.testing.expectEqual(Source.cached, result.source);
|
|
}
|
|
|
|
test "getEtfMetrics: skip_network with no cache returns FetchFailed" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const err = svc.getEtfMetrics("NEVERHEARDOFIT", .{ .skip_network = true });
|
|
try std.testing.expectError(DataError.FetchFailed, err);
|
|
}
|
|
|
|
test "getEtfMetrics: cache hit returns cached profile + sectors + holdings" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
const config = Config{ .cache_dir = dir_path };
|
|
var svc = DataService.init(io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
var s = svc.store();
|
|
var records = [_]Edgar.EtfMetricRecord{
|
|
.{ .profile = .{
|
|
.symbol = "VTI",
|
|
.cik = "0000036405",
|
|
.as_of = "2026-05-25",
|
|
.source = "edgar",
|
|
} },
|
|
.{ .sector = .{
|
|
.symbol = "VTI",
|
|
.code = "EC/CORP",
|
|
.description = "Equity / Corporate",
|
|
.pct_of_portfolio = 99.7,
|
|
.as_of = "2026-05-25",
|
|
.source = "edgar",
|
|
} },
|
|
.{ .holding = .{
|
|
.symbol = "VTI",
|
|
.name = "NVIDIA Corp",
|
|
.pct_of_portfolio = 6.57,
|
|
.as_of = "2026-05-25",
|
|
.source = "edgar",
|
|
} },
|
|
};
|
|
s.write(Edgar.EtfMetricRecord, "VTI", records[0..], .{ .seconds = cache.Ttl.etf_metrics });
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const result = try svc.getEtfMetrics("VTI", .{});
|
|
defer result.deinit();
|
|
try std.testing.expectEqual(@as(usize, 3), result.data.len);
|
|
try std.testing.expect(result.data[0] == .profile);
|
|
try std.testing.expect(result.data[1] == .sector);
|
|
try std.testing.expect(result.data[2] == .holding);
|
|
try std.testing.expectEqualStrings("VTI", result.data[0].profile.symbol);
|
|
try std.testing.expectEqual(Source.cached, result.source);
|
|
}
|
|
|
|
test "DataService getProvider initializes Wikidata with user_email" {
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{
|
|
.cache_dir = "/tmp/zfin-test-cache",
|
|
.user_email = "test@example.com",
|
|
};
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
const wd1 = try svc.getProvider(Wikidata);
|
|
try std.testing.expect(svc.wikidata != null);
|
|
try std.testing.expectEqualStrings("test@example.com", wd1.user_email);
|
|
|
|
// Second call returns same instance.
|
|
const wd2 = try svc.getProvider(Wikidata);
|
|
try std.testing.expect(wd1 == wd2);
|
|
}
|
|
|
|
test "DataService getProvider returns NoApiKey for Wikidata without user_email" {
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{ .cache_dir = "/tmp/zfin-test-cache" };
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
const wd_result = svc.getProvider(Wikidata);
|
|
try std.testing.expectError(DataError.NoApiKey, wd_result);
|
|
|
|
const ed_result = svc.getProvider(Edgar);
|
|
try std.testing.expectError(DataError.NoApiKey, ed_result);
|
|
}
|
|
|
|
test "estimateWaitSeconds returns null when relevant provider not instantiated" {
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{ .cache_dir = "/tmp/zfin-test-cache" };
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// No providers initialized yet (lazy). Each rate-limited data
|
|
// type returns null because its provider is missing.
|
|
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.dividends));
|
|
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.splits));
|
|
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.earnings));
|
|
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.options));
|
|
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.etf_metrics));
|
|
try std.testing.expectEqual(@as(?u64, null), svc.estimateWaitSeconds(.entity_facts));
|
|
}
|
|
|
|
test "estimateWaitSeconds returns 0 for types without rate limiters" {
|
|
// candles_daily, classification, etc. are served by providers
|
|
// that don't have a rate limiter (Tiingo, Wikidata). The
|
|
// function returns 0 for these regardless of provider state --
|
|
// there's nothing to wait for.
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{ .cache_dir = "/tmp/zfin-test-cache" };
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.candles_daily));
|
|
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.candles_meta));
|
|
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.classification));
|
|
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.meta));
|
|
}
|
|
|
|
test "estimateWaitSeconds returns 0 for fresh rate-limited providers" {
|
|
// Once the provider is instantiated, an unused rate limiter
|
|
// returns 0 (no wait). This is the steady-state happy path
|
|
// for the call at the top of each refresh iteration.
|
|
const allocator = std.testing.allocator;
|
|
const config = Config{
|
|
.cache_dir = "/tmp/zfin-test-cache",
|
|
.polygon_key = "test-polygon-key",
|
|
.fmp_key = "test-fmp-key",
|
|
};
|
|
var svc = DataService.init(std.testing.io, allocator, config);
|
|
defer svc.deinit();
|
|
|
|
// Touch each provider to lazy-init it. We don't care about the
|
|
// returned pointer; just need svc.pg / svc.fmp to be non-null.
|
|
_ = try svc.getProvider(Polygon);
|
|
_ = try svc.getProvider(Fmp);
|
|
|
|
// Fresh limiters have full token bucket -> 0 wait.
|
|
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.dividends));
|
|
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.splits));
|
|
try std.testing.expectEqual(@as(?u64, 0), svc.estimateWaitSeconds(.earnings));
|
|
}
|
|
|
|
// ── lookupInTickerMaps ────────────────────────────────────────
|
|
//
|
|
// Pure function — no I/O. Consumed by `lookupEdgarFallback`,
|
|
// which loads the maps then calls this. Tests construct
|
|
// synthetic ticker-map data directly to exercise every branch
|
|
// without touching the cache or network.
|
|
|
|
fn testNewMfEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8) !Edgar.MutualFundTickerEntry {
|
|
return .{
|
|
.symbol = try allocator.dupe(u8, symbol),
|
|
.cik = try allocator.dupe(u8, cik),
|
|
};
|
|
}
|
|
|
|
fn testNewCoEntry(allocator: std.mem.Allocator, symbol: []const u8, cik: []const u8, title: ?[]const u8) !Edgar.CompanyTickerEntry {
|
|
return .{
|
|
.symbol = try allocator.dupe(u8, symbol),
|
|
.cik = try allocator.dupe(u8, cik),
|
|
.title = if (title) |t| try allocator.dupe(u8, t) else null,
|
|
};
|
|
}
|
|
|
|
test "lookupInTickerMaps: both maps null -> .none" {
|
|
const allocator = std.testing.allocator;
|
|
const result = lookupInTickerMaps(allocator, "ANY", null, null);
|
|
defer freeEdgarLookup(allocator, result);
|
|
try std.testing.expect(result == .none);
|
|
}
|
|
|
|
test "lookupInTickerMaps: symbol in MF map -> .managed_fund" {
|
|
const allocator = std.testing.allocator;
|
|
const entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
|
|
entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322");
|
|
var map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, entries);
|
|
defer map.deinit();
|
|
|
|
const result = lookupInTickerMaps(allocator, "FAGIX", &map, null);
|
|
defer freeEdgarLookup(allocator, result);
|
|
try std.testing.expect(result == .managed_fund);
|
|
}
|
|
|
|
test "lookupInTickerMaps: symbol in company map with TRUST title -> ETF hint" {
|
|
const allocator = std.testing.allocator;
|
|
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
|
|
entries[0] = try testNewCoEntry(allocator, "SPY", "0000884394", "SPDR S&P 500 ETF TRUST");
|
|
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
|
|
defer map.deinit();
|
|
|
|
const result = lookupInTickerMaps(allocator, "SPY", null, &map);
|
|
defer freeEdgarLookup(allocator, result);
|
|
try std.testing.expect(result == .company_or_uit);
|
|
try std.testing.expect(result.company_or_uit.is_etf);
|
|
try std.testing.expectEqualStrings("SPDR S&P 500 ETF TRUST", result.company_or_uit.title.?);
|
|
}
|
|
|
|
test "lookupInTickerMaps: company map with operating-company title -> not ETF" {
|
|
const allocator = std.testing.allocator;
|
|
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
|
|
entries[0] = try testNewCoEntry(allocator, "AAPL", "0000320193", "Apple Inc.");
|
|
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
|
|
defer map.deinit();
|
|
|
|
const result = lookupInTickerMaps(allocator, "AAPL", null, &map);
|
|
defer freeEdgarLookup(allocator, result);
|
|
try std.testing.expect(result == .company_or_uit);
|
|
try std.testing.expect(!result.company_or_uit.is_etf);
|
|
}
|
|
|
|
test "lookupInTickerMaps: not in either map -> .none" {
|
|
const allocator = std.testing.allocator;
|
|
const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
|
|
mf_entries[0] = try testNewMfEntry(allocator, "FAGIX", "0000225322");
|
|
var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries);
|
|
defer mf_map.deinit();
|
|
|
|
const result = lookupInTickerMaps(allocator, "MISSING", &mf_map, null);
|
|
defer freeEdgarLookup(allocator, result);
|
|
try std.testing.expect(result == .none);
|
|
}
|
|
|
|
test "lookupInTickerMaps: MF map takes precedence over company map" {
|
|
// If a symbol appears in both (rare but possible — class
|
|
// shares of an open-end fund vs the fund's parent company),
|
|
// we prefer the MF answer. Lock in the contract.
|
|
const allocator = std.testing.allocator;
|
|
const mf_entries = try allocator.alloc(Edgar.MutualFundTickerEntry, 1);
|
|
mf_entries[0] = try testNewMfEntry(allocator, "DUP", "0000000001");
|
|
const co_entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
|
|
co_entries[0] = try testNewCoEntry(allocator, "DUP", "0000000002", "DUP TRUST");
|
|
var mf_map = try Edgar.TickerMap(Edgar.MutualFundTickerEntry).fromEntries(allocator, mf_entries);
|
|
defer mf_map.deinit();
|
|
var co_map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, co_entries);
|
|
defer co_map.deinit();
|
|
|
|
const result = lookupInTickerMaps(allocator, "DUP", &mf_map, &co_map);
|
|
defer freeEdgarLookup(allocator, result);
|
|
try std.testing.expect(result == .managed_fund);
|
|
}
|
|
|
|
test "lookupInTickerMaps: company map with null title -> .company_or_uit, no ETF" {
|
|
// Defensive: if EDGAR's company file has a row with no
|
|
// title, we still return the lookup but can't infer ETF
|
|
// status from a missing string.
|
|
const allocator = std.testing.allocator;
|
|
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
|
|
entries[0] = try testNewCoEntry(allocator, "BARE", "0000000001", null);
|
|
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
|
|
defer map.deinit();
|
|
|
|
const result = lookupInTickerMaps(allocator, "BARE", null, &map);
|
|
defer freeEdgarLookup(allocator, result);
|
|
try std.testing.expect(result == .company_or_uit);
|
|
try std.testing.expect(!result.company_or_uit.is_etf);
|
|
try std.testing.expect(result.company_or_uit.title == null);
|
|
}
|
|
|
|
test "lookupInTickerMaps: returned title is owned (survives map deinit)" {
|
|
// Critical for the service.lookupEdgarFallback contract:
|
|
// the maps get freed before the EdgarLookup is returned to
|
|
// the caller. The title must survive that.
|
|
const allocator = std.testing.allocator;
|
|
const entries = try allocator.alloc(Edgar.CompanyTickerEntry, 1);
|
|
entries[0] = try testNewCoEntry(allocator, "VTI", "0000884394", "VANGUARD TOTAL STOCK MARKET ETF");
|
|
|
|
const result = blk: {
|
|
var map = try Edgar.TickerMap(Edgar.CompanyTickerEntry).fromEntries(allocator, entries);
|
|
defer map.deinit();
|
|
break :blk lookupInTickerMaps(allocator, "VTI", null, &map);
|
|
};
|
|
defer freeEdgarLookup(allocator, result);
|
|
|
|
// Map is gone. Title must still be readable.
|
|
try std.testing.expect(result == .company_or_uit);
|
|
try std.testing.expectEqualStrings("VANGUARD TOTAL STOCK MARKET ETF", result.company_or_uit.title.?);
|
|
try std.testing.expect(result.company_or_uit.is_etf);
|
|
}
|
|
|
|
test "freeEdgarLookup: handles all three union variants without leak" {
|
|
const allocator = std.testing.allocator;
|
|
|
|
// .managed_fund — no-op
|
|
freeEdgarLookup(allocator, .managed_fund);
|
|
|
|
// .none — no-op
|
|
freeEdgarLookup(allocator, .none);
|
|
|
|
// .company_or_uit with null title — no-op
|
|
freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = null, .is_etf = false } });
|
|
|
|
// .company_or_uit with non-null title — frees the title.
|
|
const owned = try allocator.dupe(u8, "Some Title");
|
|
freeEdgarLookup(allocator, .{ .company_or_uit = .{ .title = owned, .is_etf = true } });
|
|
// testing.allocator panics on leak — passing this test means
|
|
// the title was freed.
|
|
}
|
|
|
|
// ── CUSIP->ticker cache (loadCusipTickerMap / cacheCusipTicker) ──
|
|
|
|
test "loadCusipTickerMap: missing file returns empty map" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
|
|
var map = svc.loadCusipTickerMap(allocator);
|
|
defer map.deinit();
|
|
try std.testing.expectEqual(@as(usize, 0), map.count());
|
|
}
|
|
|
|
test "cacheCusipTicker + loadCusipTickerMap: write/read round-trip" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
|
|
// Placeholder CUSIPs/tickers — never real PII.
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
svc.cacheCusipTicker("222222222", "BBB");
|
|
|
|
var map = svc.loadCusipTickerMap(allocator);
|
|
defer map.deinit();
|
|
try std.testing.expectEqual(@as(usize, 2), map.count());
|
|
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
|
try std.testing.expectEqualStrings("BBB", map.get("222222222").?);
|
|
}
|
|
|
|
test "cacheCusipTicker: dedups repeated CUSIP (the historical bug)" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
|
|
// Write the same CUSIP three times — must collapse to one row.
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
|
|
var map = svc.loadCusipTickerMap(allocator);
|
|
defer map.deinit();
|
|
try std.testing.expectEqual(@as(usize, 1), map.count());
|
|
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
|
|
|
// The on-disk file should physically contain exactly one data
|
|
// row (plus the directive header), proving dedup at the writer.
|
|
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
|
|
defer allocator.free(path);
|
|
const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024));
|
|
defer allocator.free(data);
|
|
var row_count: usize = 0;
|
|
var lines = std.mem.splitScalar(u8, data, '\n');
|
|
while (lines.next()) |line| {
|
|
if (std.mem.indexOf(u8, line, "cusip::") != null) row_count += 1;
|
|
}
|
|
try std.testing.expectEqual(@as(usize, 1), row_count);
|
|
}
|
|
|
|
test "loadCusipTickerMap: first occurrence wins on duplicate rows" {
|
|
// Tolerate a pre-existing file written by the buggy appender
|
|
// (duplicate rows). The reader must not crash and must keep the
|
|
// first mapping.
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
// Hand-write a file with a duplicate row (as the old bug did).
|
|
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
|
|
defer allocator.free(path);
|
|
try std.Io.Dir.cwd().writeFile(io, .{
|
|
.sub_path = path,
|
|
.data = "#!srfv1\ncusip::111111111,ticker::AAA\ncusip::111111111,ticker::AAA\n",
|
|
});
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
|
|
var map = svc.loadCusipTickerMap(allocator);
|
|
defer map.deinit();
|
|
try std.testing.expectEqual(@as(usize, 1), map.count());
|
|
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
|
}
|
|
|
|
// ── CUSIP resolution cascade (resolveCusips / appendCusipEntries) ──
|
|
|
|
test "appendCusipEntries: batches, dedups vs file and within batch" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
|
|
// Seed one entry on disk.
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
|
|
// Batch: 111 already on disk (skip), 222 + 333 new, 222 repeated
|
|
// within the batch (skip the second).
|
|
const batch = [_]DataService.CusipEntry{
|
|
.{ .cusip = "111111111", .ticker = "ZZZ" },
|
|
.{ .cusip = "222222222", .ticker = "BBB" },
|
|
.{ .cusip = "333333333", .ticker = "CCC" },
|
|
.{ .cusip = "222222222", .ticker = "BBB" },
|
|
};
|
|
svc.appendCusipEntries(batch[0..]);
|
|
|
|
var map = svc.loadCusipTickerMap(allocator);
|
|
defer map.deinit();
|
|
try std.testing.expectEqual(@as(u32, 3), map.count());
|
|
try std.testing.expectEqualStrings("AAA", map.get("111111111").?); // file wins
|
|
try std.testing.expectEqualStrings("BBB", map.get("222222222").?);
|
|
try std.testing.expectEqualStrings("CCC", map.get("333333333").?);
|
|
|
|
// Physically exactly 3 data rows (plus the directive header).
|
|
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
|
|
defer allocator.free(path);
|
|
const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024));
|
|
defer allocator.free(data);
|
|
var rows: usize = 0;
|
|
var lines = std.mem.splitScalar(u8, data, '\n');
|
|
while (lines.next()) |line| {
|
|
if (std.mem.indexOf(u8, line, "cusip::") != null) rows += 1;
|
|
}
|
|
try std.testing.expectEqual(@as(usize, 3), rows);
|
|
}
|
|
|
|
test "mergeCusipBody: merges new entries, skips those already in `have` or the batch" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
|
|
// `have` already maps 111 -> AAA (local is authoritative).
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
var have = svc.loadCusipTickerMap(allocator);
|
|
defer have.deinit();
|
|
|
|
var arena = std.heap.ArenaAllocator.init(allocator);
|
|
defer arena.deinit();
|
|
var out = std.StringHashMap([]const u8).init(arena.allocator());
|
|
|
|
// Server body: 111 conflicts with `have` (ignored), 222 + 333 are
|
|
// new, 222 repeated (the second is skipped).
|
|
const body =
|
|
"#!srfv1\n" ++
|
|
"cusip::111111111,ticker::ZZZ\n" ++
|
|
"cusip::222222222,ticker::BBB\n" ++
|
|
"cusip::333333333,ticker::CCC\n" ++
|
|
"cusip::222222222,ticker::BBB\n";
|
|
DataService.mergeCusipBody(arena.allocator(), &out, have, body);
|
|
|
|
try std.testing.expectEqual(@as(u32, 2), out.count());
|
|
try std.testing.expectEqualStrings("BBB", out.get("222222222").?);
|
|
try std.testing.expectEqualStrings("CCC", out.get("333333333").?);
|
|
try std.testing.expect(out.get("111111111") == null); // have wins
|
|
}
|
|
|
|
test "resolveCusips: warm cache resolves without touching the network" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
// No server_url; assert L2/L3 are never reached for an all-hit set.
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
svc.cacheCusipTicker("222222222", "BBB");
|
|
|
|
// Duplicate + empty CUSIP in the request must be tolerated.
|
|
const want = [_][]const u8{ "111111111", "222222222", "111111111", "" };
|
|
var map = svc.resolveCusips(allocator, want[0..], false);
|
|
defer map.deinit();
|
|
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
|
try std.testing.expectEqualStrings("BBB", map.get("222222222").?);
|
|
}
|
|
|
|
test "resolveCusips: skip_network serves L1 only, never hits the network" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
// A miss would normally fall through to L2/L3; skip_network must
|
|
// prevent any network attempt even so.
|
|
svc.panic_on_network_attempt = true;
|
|
|
|
svc.cacheCusipTicker("111111111", "AAA");
|
|
|
|
// "999999999" is absent from L1 — with skip_network it stays
|
|
// unresolved rather than triggering a server/OpenFIGI lookup.
|
|
const want = [_][]const u8{ "111111111", "999999999" };
|
|
var map = svc.resolveCusips(allocator, want[0..], true);
|
|
defer map.deinit();
|
|
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
|
try std.testing.expect(map.get("999999999") == null);
|
|
}
|
|
|
|
test "getEtfProfile: carries holding CUSIP through the model boundary" {
|
|
const allocator = std.testing.allocator;
|
|
const io = std.testing.io;
|
|
var tmp = std.testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
|
defer allocator.free(dir_path);
|
|
|
|
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
|
defer svc.deinit();
|
|
|
|
// Seed etf_metrics: a profile row + a holding carrying a CUSIP but
|
|
// no ticker (the common NPORT-P shape — placeholder values only).
|
|
var etf_records = [_]Edgar.EtfMetricRecord{
|
|
.{ .profile = .{
|
|
.symbol = try allocator.dupe(u8, "TESTF"),
|
|
.series_name = try allocator.dupe(u8, "Test Fund"),
|
|
.cik = try allocator.dupe(u8, "0000000002"),
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "edgar"),
|
|
} },
|
|
.{ .holding = .{
|
|
.symbol = try allocator.dupe(u8, "TESTF"),
|
|
.name = try allocator.dupe(u8, "Placeholder Corp"),
|
|
.cusip = try allocator.dupe(u8, "999999999"),
|
|
.pct_of_portfolio = 12.5,
|
|
.as_of = try allocator.dupe(u8, "2026-06-01"),
|
|
.source = try allocator.dupe(u8, "edgar"),
|
|
} },
|
|
};
|
|
defer for (etf_records) |r| r.deinit(allocator);
|
|
var s = svc.store();
|
|
s.write(Edgar.EtfMetricRecord, "TESTF", etf_records[0..], cache.DataType.etf_metrics.ttl());
|
|
|
|
svc.panic_on_network_attempt = true;
|
|
const result = try svc.getEtfProfile("TESTF", .{ .skip_network = true });
|
|
defer result.deinit();
|
|
|
|
const holdings = result.data.holdings orelse return error.NoHoldings;
|
|
try std.testing.expectEqual(@as(usize, 1), holdings.len);
|
|
try std.testing.expectEqualStrings("999999999", holdings[0].cusip orelse return error.NoCusip);
|
|
try std.testing.expect(holdings[0].symbol == null); // filing had no ticker
|
|
}
|