update srf/wire edgar and wikidata into service

This commit is contained in:
Emil Lerch 2026-05-29 12:23:43 -07:00
parent cc2087fd07
commit 52afd75696
25 changed files with 913 additions and 321 deletions

View file

@ -32,7 +32,7 @@ repos:
- id: test
name: Run zig build test
entry: zig
args: ["build", "coverage", "-Dcoverage-threshold=71"]
args: ["build", "coverage", "-Dcoverage-threshold=72"]
language: system
types: [file]
pass_filenames: false

View file

@ -589,7 +589,7 @@ zig build test # run all tests (single binary, discovers all tests
zig build run -- <args> # build and run CLI
zig build docs # generate library documentation
zig build coverage # run tests with kcov coverage (Linux only). See "Coverage" section.
zig build coverage -Dcoverage-threshold=65 # fail build if coverage < N% (pre-commit uses 65)
zig build coverage -Dcoverage-threshold=72 # fail build if coverage < N% (see .pre-commit-config.yaml for current floor)
```
**Tooling** (managed via `.mise.toml`):
@ -729,13 +729,15 @@ stdout:
Total test coverage: 65.15% (15399/23638)
```
**The pre-commit hook enforces a coverage floor.** The current
floor is **65%** (set in `.pre-commit-config.yaml`). The hook runs
`zig build coverage -Dcoverage-threshold=65` and fails the commit
if coverage drops below that threshold. Bumping the floor over time
is encouraged — every time we push the actual coverage materially
higher, raise the floor in the pre-commit config in the same commit
so the gain is locked in.
**The pre-commit hook enforces a coverage floor.** The exact
threshold lives in `.pre-commit-config.yaml` as the
`-Dcoverage-threshold=N` flag on the `test` hook — that's the
source of truth, always. The hook runs
`zig build coverage -Dcoverage-threshold=N` and fails the commit
if coverage drops below `N`. Bumping the floor over time is
encouraged — every time we push the actual coverage materially
higher, raise the threshold in the pre-commit config in the same
commit so the gain is locked in.
**Coverage expectations for new work:**

View file

@ -13,8 +13,8 @@
.hash = "z2d-0.11.0-j5P_HtLzDwBGyQt49DrT0v4BuVqI_SRs6CXsuj7eBVhR",
},
.srf = .{
.url = "git+https://git.lerch.org/lobo/srf#12b755660e96ed65c645975110214fcc9c66ca4d",
.hash = "srf-0.0.0-qZj5743KAQAykeIHzFJdRDwgAA-Yy1RLaj0Lw4W5Rphx",
.url = "git+https://git.lerch.org/lobo/srf#4a3e5f00f15b0e0ba79d06ffe69dbcfa052baa5b",
.hash = "srf-0.0.0-qZj572nkAQAAz3zEg6fdD8A7PJnQ9je3zCeAOJS5PoZj",
},
},
.paths = .{

View file

@ -67,18 +67,20 @@ pub fn parse(str: []const u8) !Date {
return fromYmd(y, m, d);
}
/// Hook for srf Record.to(T) coercion.
pub fn srfParse(str: []const u8) !Date {
return parse(str);
/// Hook for srf coercion via `FieldIterator.to(T, ...)`. Returns a
/// `CoercionResult(Date)` with `require_free_original = true` so SRF
/// frees the consumed source string after parsing.
pub fn srfParse(str: []const u8) !srf.CoercionResult(Date) {
return .initFree(try parse(str));
}
/// Hook for srf Record.from(T) serialization.
pub fn srfFormat(self: Date, allocator: std.mem.Allocator, comptime field_name: []const u8) !srf.Value {
_ = field_name;
/// Hook for srf serialization. Writes "YYYY-MM-DD" to the writer
/// using the "string" type (untyped) so the value parses cleanly
/// back through `Date.srfParse` on read.
pub fn srfFormat(self: Date, comptime field_name: []const u8, writer: *std.Io.Writer) std.Io.Writer.Error!void {
const ymd = epochDaysToYmd(self.days);
const y: u16 = @intCast(ymd.year);
const buf = try std.fmt.allocPrint(allocator, "{d:0>4}-{d:0>2}-{d:0>2}", .{ y, ymd.month, ymd.day });
return .{ .string = buf };
try writer.print("{s}::{d:0>4}-{d:0>2}-{d:0>2}", .{ field_name, y, ymd.month, ymd.day });
}
/// Zig 0.15+ format method: writes "YYYY-MM-DD" to the writer.

View file

@ -193,11 +193,11 @@ pub fn parseAccountsFile(allocator: std.mem.Allocator, data: []const u8) !Accoun
}
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData;
defer it.deinit();
while (try it.next()) |fields| {
const entry = fields.to(AccountTaxEntry) catch continue;
const entry = fields.to(AccountTaxEntry, .{}) catch continue;
try entries.append(allocator, .{
.account = try allocator.dupe(u8, entry.account),
.tax_type = entry.tax_type,

View file

@ -469,12 +469,15 @@ const SrfProjection = union(enum) {
/// Returns default config if data is null or unparseable.
///
/// Uses an internal stack-backed FixedBufferAllocator for the SRF
/// iterator's scratch (`alloc_strings = false` keeps strings borrowing
/// from `data`, so the iterator only needs scratch for field-row
/// bookkeeping). The 8 KB buffer comfortably fits any realistic
/// projections.srf a handful of config + birthdate + event records.
/// On overflow the parse aborts and we return the default config,
/// matching the existing "unparseable → defaults" contract.
/// iterator's scratch. The default `parse_allocator` keeps short
/// string values borrowing from `data` (no copy) and transparently
/// allocates from the iterator's fallback arena for any
/// multi-line/binary values (e.g. an event `name` containing a
/// comma, which `srf.fmt` encodes with a length prefix). The 8 KB
/// buffer comfortably fits any realistic projections.srf a
/// handful of config + birthdate + event records. On overflow the
/// parse aborts and we return the default config, matching the
/// existing "unparseable → defaults" contract.
///
/// Format (union-tagged SRF records):
/// type::config,target_stock_pct:num:80
@ -491,7 +494,7 @@ pub fn parseProjectionsConfig(data: ?[]const u8) UserConfig {
const scratch = fba.allocator();
var reader = std.Io.Reader.fixed(raw);
var it = srf.iterator(&reader, scratch, .{ .alloc_strings = false }) catch return config;
var it = srf.iterator(&reader, scratch, .{}) catch return config;
defer it.deinit();
var saw_horizon = false;
@ -505,7 +508,7 @@ pub fn parseProjectionsConfig(data: ?[]const u8) UserConfig {
var annotation_count: u8 = 0;
while (it.next() catch null) |field_it| {
const rec = field_it.to(SrfProjection) catch continue;
const rec = field_it.to(SrfProjection, .{}) catch continue;
switch (rec) {
.config => |c| {
config.target_stock_pct = c.target_stock_pct orelse config.target_stock_pct;

View file

@ -19,7 +19,7 @@
//! extractMetric(series, .net_worth) -> []MetricPoint for rendering
//!
//! For rollup generation, `buildRollupRecords` emits a flat slice suitable
//! for `srf.fmtFrom` without any of the per-lot detail the rollup is a
//! for `srf.fmt` without any of the per-lot detail the rollup is a
//! summary cache, not a replacement for the per-day snapshot files.
const std = @import("std");
@ -392,7 +392,7 @@ pub const RollupRow = struct {
};
/// Produce a rollup-row slice from a TimelineSeries. Pure function
/// caller owns the result, ready to hand to `srf.fmtFrom`.
/// caller owns the result, ready to hand to `srf.fmt`.
pub fn buildRollupRecords(
allocator: std.mem.Allocator,
points: []const TimelinePoint,

76
src/cache/store.zig vendored
View file

@ -12,6 +12,8 @@ const EarningsEvent = @import("../models/earnings.zig").EarningsEvent;
const EtfProfile = @import("../models/etf_profile.zig").EtfProfile;
const Holding = @import("../models/etf_profile.zig").Holding;
const SectorWeight = @import("../models/etf_profile.zig").SectorWeight;
const Wikidata = @import("../providers/Wikidata.zig");
const Edgar = @import("../providers/Edgar.zig");
// Wall-clock policy
//
@ -264,6 +266,11 @@ pub const Store = struct {
EarningsEvent => .earnings,
OptionsChain => .options,
EtfProfile => .etf_profile,
Wikidata.ClassificationRecord => .classification,
Edgar.EtfMetricRecord => .etf_metrics,
Edgar.EntityFactRecord => .entity_facts,
Edgar.MutualFundTickerMapBlob => .tickers_funds,
Edgar.CompanyTickerMapBlob => .tickers_companies,
else => @compileError("unsupported type for Store"),
};
}
@ -318,7 +325,7 @@ pub const Store = struct {
}
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, self.allocator, .{ .alloc_strings = false }) catch return null;
var it = srf.iterator(&reader, self.allocator, .{}) catch return null;
defer it.deinit();
if (freshness == .fresh_only) {
@ -465,7 +472,7 @@ pub const Store = struct {
// below frees these duped strings after we're done with the
// merged list. Keep the post-process logic in lockstep with
// the deinit handling they're a pair.
const existing_result = self.read(T, symbol, mergePostProcess(T), .any);
const existing_result = self.read(T, symbol, null, .any);
const existing: []const T = if (existing_result) |r| r.data else &.{};
defer if (existing_result != null) {
if (comptime @hasDecl(T, "deinit")) {
@ -544,26 +551,6 @@ pub const Store = struct {
@compileError("mergeKey only defined for Dividend and Split");
}
/// Post-process callback for the merge primitive's `read` call.
/// Dupes any heap-allocated string fields into stable memory so
/// the `existing` slice's records survive past the SRF
/// iterator's backing buffer being freed. Paired with each
/// type's `deinit` to release the duped strings after merge.
/// Splits have no string fields so the callback is null.
fn mergePostProcess(comptime T: type) ?*const fn (*T, std.mem.Allocator) anyerror!void {
return switch (T) {
Dividend => &struct {
fn pp(div: *Dividend, allocator: std.mem.Allocator) anyerror!void {
if (div.currency) |c| {
div.currency = try allocator.dupe(u8, c);
}
}
}.pp,
Split => null,
else => @compileError("mergePostProcess only defined for Dividend and Split"),
};
}
fn findKeyIndex(comptime T: type, items: []const T, key: i32) ?usize {
for (items, 0..) |it, i| {
if (mergeKey(T, it) == key) return i;
@ -872,7 +859,7 @@ pub const Store = struct {
/// Schema for the SRF `.meta` sidecar emitted by `archiveTornBody`.
/// Each field becomes a `key:type:value` entry in a single record
/// under a `#!srfv1` header. Optional fields with `null` defaults
/// are silently skipped by `srf.fmtFrom` when unset which is the
/// are silently skipped by `srf.fmt` when unset which is the
/// behavior we want for the http_*, server_*, and `?[]const u8`
/// fields that only some detection paths populate.
const TearRecord = struct {
@ -1018,7 +1005,7 @@ pub const Store = struct {
var aw: std.Io.Writer.Allocating = .init(allocator);
defer aw.deinit();
const records = [_]TearRecord{record};
try aw.writer.print("{f}", .{srf.fmtFrom(TearRecord, allocator, &records, .{})});
try aw.writer.print("{f}", .{srf.fmt(TearRecord, &records, .{})});
try atomic.writeFileAtomic(io, allocator, meta_path, aw.writer.buffered());
}
@ -1096,12 +1083,12 @@ pub const Store = struct {
defer self.allocator.free(data);
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, self.allocator, .{ .alloc_strings = false }) catch return null;
var it = srf.iterator(&reader, self.allocator, .{ .parse_allocator = .none }) catch return null;
defer it.deinit();
const created = it.created orelse std.Io.Timestamp.now(self.io, .real).toSeconds();
const fields = (it.next() catch return null) orelse return null;
const meta = fields.to(CandleMeta) catch return null;
const meta = fields.to(CandleMeta, .{}) catch return null;
return .{ .meta = meta, .created = created };
}
@ -1280,7 +1267,16 @@ pub const Store = struct {
comptime freshness: Freshness,
) ?CacheResult(T) {
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return null;
// `.parse_allocator = .{ .custom = .initTo(allocator) }` tells SRF
// to dupe field values (the data we keep) into the caller's
// allocator while letting field keys borrow from `data` (we only
// need them long enough for `fields.to(T, .{})` to match against
// compile-time field names). Records returned from `it.next()`
// then own their value strings via the caller's allocator,
// ready to outlive the iterator without any further duping.
var it = srf.iterator(&reader, allocator, .{
.parse_allocator = .{ .custom = .initTo(allocator) },
}) catch return null;
defer it.deinit();
if (freshness == .fresh_only) {
@ -1305,7 +1301,7 @@ pub const Store = struct {
}
while (it.next() catch return null) |fields| {
var item = fields.to(T) catch continue;
var item = fields.to(T, .{}) catch continue;
if (comptime postProcess) |pp| {
pp(&item, allocator) catch {
if (comptime @hasDecl(T, "deinit")) item.deinit(allocator);
@ -1335,7 +1331,7 @@ pub const Store = struct {
errdefer aw.deinit();
var opts = options;
opts.created = std.Io.Timestamp.now(io, .real).toSeconds();
try aw.writer.print("{f}", .{srf.fmtFrom(T, allocator, items, opts)});
try aw.writer.print("{f}", .{srf.fmt(T, items, opts)});
return aw.toOwnedSlice();
}
@ -1344,7 +1340,7 @@ pub const Store = struct {
fn serializeCandles(allocator: std.mem.Allocator, candles: []const Candle, options: srf.FormatOptions) ![]const u8 {
var aw: std.Io.Writer.Allocating = .init(allocator);
errdefer aw.deinit();
try aw.writer.print("{f}", .{srf.fmtFrom(Candle, allocator, candles, options)});
try aw.writer.print("{f}", .{srf.fmt(Candle, candles, options)});
return aw.toOwnedSlice();
}
@ -1359,17 +1355,17 @@ pub const Store = struct {
const items = [_]CandleMeta{meta};
var opts = options;
opts.created = std.Io.Timestamp.now(io, .real).toSeconds();
try aw.writer.print("{f}", .{srf.fmtFrom(CandleMeta, allocator, &items, opts)});
try aw.writer.print("{f}", .{srf.fmt(CandleMeta, &items, opts)});
return aw.toOwnedSlice();
}
fn deserializeCandleMeta(allocator: std.mem.Allocator, data: []const u8) !CandleMeta {
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return error.InvalidData;
defer it.deinit();
const fields = (try it.next()) orelse return error.InvalidData;
return fields.to(CandleMeta) catch error.InvalidData;
return fields.to(CandleMeta, .{}) catch error.InvalidData;
}
// Private serialization: options (bespoke)
@ -1407,7 +1403,7 @@ pub const Store = struct {
errdefer aw.deinit();
var opts = options;
opts.created = std.Io.Timestamp.now(io, .real).toSeconds();
try aw.writer.print("{f}", .{srf.fmtFrom(OptionsRecord, allocator, records.items, opts)});
try aw.writer.print("{f}", .{srf.fmt(OptionsRecord, records.items, opts)});
return aw.toOwnedSlice();
}
@ -1439,7 +1435,7 @@ pub const Store = struct {
}
while (try it.next()) |fields| {
const opt_rec = fields.to(OptionsRecord) catch continue;
const opt_rec = fields.to(OptionsRecord, .{}) catch continue;
switch (opt_rec) {
.chain => |ch| {
const idx = chains.items.len;
@ -1506,7 +1502,7 @@ pub const Store = struct {
errdefer aw.deinit();
var opts = options;
opts.created = std.Io.Timestamp.now(io, .real).toSeconds();
try aw.writer.print("{f}", .{srf.fmtFrom(EtfRecord, allocator, records.items, opts)});
try aw.writer.print("{f}", .{srf.fmt(EtfRecord, records.items, opts)});
return aw.toOwnedSlice();
}
@ -1527,7 +1523,7 @@ pub const Store = struct {
}
while (try it.next()) |fields| {
const etf_rec = fields.to(EtfRecord) catch continue;
const etf_rec = fields.to(EtfRecord, .{}) catch continue;
switch (etf_rec) {
.meta => |m| {
profile = m;
@ -1563,7 +1559,7 @@ pub const Store = struct {
pub fn serializePortfolio(allocator: std.mem.Allocator, lots: []const Lot) ![]const u8 {
var aw: std.Io.Writer.Allocating = .init(allocator);
errdefer aw.deinit();
try aw.writer.print("{f}", .{srf.fmtFrom(Lot, allocator, lots, .{})});
try aw.writer.print("{f}", .{srf.fmt(Lot, lots, .{})});
return aw.toOwnedSlice();
}
@ -1582,13 +1578,13 @@ pub fn deserializePortfolio(allocator: std.mem.Allocator, data: []const u8) !Por
}
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData;
defer it.deinit();
var skipped: usize = 0;
while (try it.next()) |fields| {
const line = it.state.line;
var lot = fields.to(Lot) catch {
var lot = fields.to(Lot, .{}) catch {
std.log.warn("portfolio: could not parse record at line {d}", .{line});
skipped += 1;
continue;

View file

@ -1219,7 +1219,7 @@ const srf = @import("srf");
/// field added to Lot is automatically included.
fn lotToString(allocator: std.mem.Allocator, lot: portfolio_mod.Lot) ![]const u8 {
const lots = [_]portfolio_mod.Lot{lot};
return std.fmt.allocPrint(allocator, "{f}", .{srf.fmtFrom(portfolio_mod.Lot, allocator, &lots, .{ .emit_directives = false })});
return std.fmt.allocPrint(allocator, "{f}", .{srf.fmt(portfolio_mod.Lot, &lots, .{ .emit_directives = false })});
}
/// Staleness color based on age vs threshold.

View file

@ -279,7 +279,7 @@ fn getFileInfo(io: std.Io, allocator: std.mem.Allocator, cache_dir: []const u8,
defer allocator.free(data);
var reader = std.Io.Reader.fixed(data);
const it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch
const it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch
return .{ .exists = true, .size = stat.size };
defer it.deinit();

View file

@ -796,12 +796,12 @@ pub fn loadWatchlist(io: std.Io, allocator: std.mem.Allocator, path: []const u8)
const WatchEntry = struct { symbol: []const u8 };
var reader = std.Io.Reader.fixed(file_data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return null;
var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return null;
defer it.deinit();
var syms: std.ArrayList([]const u8) = .empty;
while (it.next() catch null) |fields| {
const entry = fields.to(WatchEntry) catch continue;
const entry = fields.to(WatchEntry, .{}) catch continue;
const duped = allocator.dupe(u8, entry.symbol) catch continue;
syms.append(allocator, duped) catch {
allocator.free(duped);

View file

@ -305,7 +305,7 @@ fn runPortfolio(
}
/// Regenerate `history/rollup.srf` from `snapshots`. Uses
/// `timeline.buildRollupRecords` + `srf.fmtFrom` + atomic write.
/// `timeline.buildRollupRecords` + `srf.fmt` + atomic write.
fn rebuildRollup(
io: std.Io,
allocator: std.mem.Allocator,
@ -322,7 +322,7 @@ fn rebuildRollup(
var aw: std.Io.Writer.Allocating = .init(allocator);
defer aw.deinit();
try aw.writer.print("{f}", .{srf.fmtFrom(timeline.RollupRow, allocator, rows, .{
try aw.writer.print("{f}", .{srf.fmt(timeline.RollupRow, rows, .{
.emit_directives = true,
.created = now_s,
})});

View file

@ -941,7 +941,7 @@ fn runAnalysis(
/// Render a snapshot to SRF bytes. Caller owns result.
///
/// Each section is emitted as a homogeneous record slice via
/// `srf.fmtFrom`. The first section (meta) carries `emit_directives =
/// `srf.fmt`. The first section (meta) carries `emit_directives =
/// true` so the `#!srfv1` header and `#!created=...` line are written
/// once at the top; subsequent sections set `emit_directives = false`
/// to suppress a duplicate header.
@ -954,17 +954,17 @@ pub fn renderSnapshot(allocator: std.mem.Allocator, snap: Snapshot) ![]const u8
// same `fmtFrom` pipeline as the rest of the sections. This also
// puts the `#!created=...` header at the top of the file.
const meta_rows: [1]MetaRow = .{snap.meta};
try w.print("{f}", .{srf.fmtFrom(MetaRow, allocator, &meta_rows, .{
try w.print("{f}", .{srf.fmt(MetaRow, &meta_rows, .{
.emit_directives = true,
.created = snap.meta.captured_at,
})});
// Subsequent sections: records only (no header).
const tail_opts: srf.FormatOptions = .{ .emit_directives = false };
try w.print("{f}", .{srf.fmtFrom(TotalRow, allocator, snap.totals, tail_opts)});
try w.print("{f}", .{srf.fmtFrom(TaxTypeRow, allocator, snap.tax_types, tail_opts)});
try w.print("{f}", .{srf.fmtFrom(AccountRow, allocator, snap.accounts, tail_opts)});
try w.print("{f}", .{srf.fmtFrom(LotRow, allocator, snap.lots, tail_opts)});
try w.print("{f}", .{srf.fmt(TotalRow, snap.totals, tail_opts)});
try w.print("{f}", .{srf.fmt(TaxTypeRow, snap.tax_types, tail_opts)});
try w.print("{f}", .{srf.fmt(AccountRow, snap.accounts, tail_opts)});
try w.print("{f}", .{srf.fmt(LotRow, snap.lots, tail_opts)});
return aw.toOwnedSlice();
}

View file

@ -53,28 +53,26 @@ pub const ProjectedRetirement = union(enum) {
date: Date,
/// SRF parser hook. Accepts `reached` (case-insensitive) or
/// `YYYY-MM-DD`. Any other shape is rejected.
pub fn srfParse(str: []const u8) !ProjectedRetirement {
if (std.ascii.eqlIgnoreCase(str, "reached")) return .reached;
/// `YYYY-MM-DD`. Any other shape is rejected. Returns
/// `CoercionResult(...).initFree(...)` so SRF frees the consumed
/// source string after parsing.
pub fn srfParse(str: []const u8) !srf.CoercionResult(ProjectedRetirement) {
if (std.ascii.eqlIgnoreCase(str, "reached")) return .initFree(.reached);
const d = Date.parse(str) catch return error.InvalidProjectedRetirement;
return .{ .date = d };
return .initFree(.{ .date = d });
}
/// SRF serializer hook. Emits `reached` or `YYYY-MM-DD`.
/// SRF serializer hook. Emits `reached` or `YYYY-MM-DD` directly
/// to the writer using the "string" type (untyped).
pub fn srfFormat(
self: ProjectedRetirement,
allocator: std.mem.Allocator,
comptime field_name: []const u8,
) !srf.Value {
_ = field_name;
return switch (self) {
.reached => .{ .string = try allocator.dupe(u8, "reached") },
.date => |d| blk: {
const buf = try allocator.alloc(u8, 10);
_ = d.format(buf[0..10]);
break :blk .{ .string = buf };
},
};
writer: *std.Io.Writer,
) std.Io.Writer.Error!void {
switch (self) {
.reached => try writer.print("{s}::reached", .{field_name}),
.date => |d| try writer.print("{s}::{f}", .{ field_name, d }),
}
}
pub fn eql(a: ProjectedRetirement, b: ProjectedRetirement) bool {
@ -171,14 +169,14 @@ pub fn parseImportedValues(
bytes: []const u8,
) !ImportedValues {
var reader = std.Io.Reader.fixed(bytes);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidSrf;
var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return error.InvalidSrf;
defer it.deinit();
var points: std.ArrayList(HistoryPoint) = .empty;
errdefer points.deinit(allocator);
while (it.next() catch return error.InvalidSrf) |fields| {
const point = fields.to(HistoryPoint) catch return error.InvalidSrf;
const point = fields.to(HistoryPoint, .{}) catch return error.InvalidSrf;
try points.append(allocator, point);
}
@ -199,20 +197,20 @@ pub fn parseImportedValues(
// Tests
test "ProjectedRetirement.srfParse: reached" {
const v = try ProjectedRetirement.srfParse("reached");
try std.testing.expect(v == .reached);
const r = try ProjectedRetirement.srfParse("reached");
try std.testing.expect(r.value == .reached);
// Case-insensitive.
const v2 = try ProjectedRetirement.srfParse("REACHED");
try std.testing.expect(v2 == .reached);
const r2 = try ProjectedRetirement.srfParse("REACHED");
try std.testing.expect(r2.value == .reached);
}
test "ProjectedRetirement.srfParse: date" {
const v = try ProjectedRetirement.srfParse("2030-01-15");
try std.testing.expect(v == .date);
try std.testing.expectEqual(@as(i16, 2030), v.date.year());
try std.testing.expectEqual(@as(u8, 1), v.date.month());
try std.testing.expectEqual(@as(u8, 15), v.date.day());
const r = try ProjectedRetirement.srfParse("2030-01-15");
try std.testing.expect(r.value == .date);
try std.testing.expectEqual(@as(i16, 2030), r.value.date.year());
try std.testing.expectEqual(@as(u8, 1), r.value.date.month());
try std.testing.expectEqual(@as(u8, 15), r.value.date.day());
}
test "ProjectedRetirement.srfParse: invalid" {

View file

@ -71,9 +71,14 @@ pub fn parseSnapshotBytes(
bytes: []const u8,
) Error!snapshot.Snapshot {
var reader = std.Io.Reader.fixed(bytes);
// `alloc_strings = false` tells srf to return string values as
// slices into `bytes` rather than duping into its own arena.
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidSrf;
// Default `parse_allocator` (`.none_with_fallback`): short
// strings borrow from `bytes`; multi-line or comma-bearing
// values (e.g. an account name with a comma, which `srf.fmt`
// encodes with a length prefix) land in the iterator's
// fallback arena and are freed by `it.deinit()`. Snapshot
// consumers dupe what they need into the caller's allocator
// before the iterator dies.
var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidSrf;
defer it.deinit();
var meta_opt: ?snapshot.MetaRow = null;
@ -95,7 +100,7 @@ pub fn parseSnapshotBytes(
// record kind we don't know about). Every other srf error
// indicates malformed data in a record we SHOULD understand, so
// we propagate it up rather than silently losing rows.
const rec = field_it.to(SnapshotRecord) catch |err| switch (err) {
const rec = field_it.to(SnapshotRecord, .{}) catch |err| switch (err) {
error.ActiveTagDoesNotExist => continue,
else => return error.InvalidSrf,
};

View file

@ -721,10 +721,4 @@ test "looksLikeUnquotedGlob: empty arg returns false" {
test {
std.testing.refAllDecls(@This());
// Wikidata and EDGAR providers aren't yet imported via
// `service.zig`; pull them in here for test discovery in the
// meantime. Drop these once the providers are wired through
// the data service.
_ = @import("providers/Wikidata.zig");
_ = @import("providers/Edgar.zig");
}

View file

@ -57,11 +57,11 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !
}
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData;
defer it.deinit();
while (try it.next()) |fields| {
const entry = fields.to(ClassificationEntry) catch continue;
const entry = fields.to(ClassificationEntry, .{}) catch continue;
try entries.append(allocator, .{
.symbol = try allocator.dupe(u8, entry.symbol),
.sector = if (entry.sector) |s| try allocator.dupe(u8, s) else null,

View file

@ -1,6 +1,6 @@
//! Snapshot record types the wire format for `history/<date>-portfolio.srf`.
//!
//! Each record kind below is a plain struct suitable for `srf.fmtFrom`
//! Each record kind below is a plain struct suitable for `srf.fmt`
//! on the write side and `srf.iterator` + `FieldIterator.to(Union)` on
//! the read side (see `src/history.zig`, which demuxes via a tagged
//! `SnapshotRecord` union whose `srf_tag_field = "kind"`). Field order

View file

@ -87,38 +87,35 @@ pub const DestLot = union(enum) {
/// srf parser hook. Accepts `cash` (case-insensitive) or
/// `SYMBOL@YYYY-MM-DD`. Any other shape is rejected.
pub fn srfParse(str: []const u8) !DestLot {
if (std.ascii.eqlIgnoreCase(str, "cash")) return .{ .cash = {} };
///
/// The `cash` variant returns `.initFree(...)` since no slice
/// of `str` is retained. The `lot` variant returns `.init(...)`
/// because `DestLot.lot.symbol` borrows a slice of `str`;
/// freeing `str` would dangle the symbol. Callers consuming
/// the resulting `DestLot.lot` are responsible for duping
/// `symbol` if it must outlive the source buffer.
pub fn srfParse(str: []const u8) !srf.CoercionResult(DestLot) {
if (std.ascii.eqlIgnoreCase(str, "cash")) return .initFree(.{ .cash = {} });
const at = std.mem.indexOfScalar(u8, str, '@') orelse return error.InvalidDestLot;
if (at == 0) return error.InvalidDestLot;
if (at + 1 >= str.len) return error.InvalidDestLot;
const sym = str[0..at];
const date_str = str[at + 1 ..];
const date = Date.parse(date_str) catch return error.InvalidDestLot;
return .{ .lot = .{ .symbol = sym, .open_date = date } };
return .init(.{ .lot = .{ .symbol = sym, .open_date = date } });
}
/// srf serializer hook. Emits `cash` or `SYMBOL@YYYY-MM-DD`.
/// Allocates the output buffer via `allocator` caller (the SRF
/// OwnedRecord machinery) manages the lifetime.
/// srf serializer hook. Emits `cash` or `SYMBOL@YYYY-MM-DD`
/// directly to the writer using the "string" type (untyped).
pub fn srfFormat(
self: DestLot,
allocator: std.mem.Allocator,
comptime field_name: []const u8,
) !srf.Value {
_ = field_name;
return switch (self) {
.cash => .{ .string = try allocator.dupe(u8, "cash") },
.lot => |l| blk: {
// SYMBOL up to ~20 chars + '@' + 10-char date.
const buf = try std.fmt.allocPrint(allocator, "{s}@", .{l.symbol});
defer allocator.free(buf);
var out = try allocator.alloc(u8, buf.len + 10);
@memcpy(out[0..buf.len], buf);
_ = try std.fmt.bufPrint(out[buf.len..][0..10], "{f}", .{l.open_date});
break :blk .{ .string = out };
},
};
writer: *std.Io.Writer,
) std.Io.Writer.Error!void {
switch (self) {
.cash => try writer.print("{s}::cash", .{field_name}),
.lot => |l| try writer.print("{s}::{s}@{f}", .{ field_name, l.symbol, l.open_date }),
}
}
/// Equality for tests and duplicate-dest_lot detection in the matcher.
@ -263,11 +260,11 @@ pub fn parseTransactionLogFile(
}
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData;
defer it.deinit();
while (try it.next()) |fields| {
const parsed = fields.to(TransferRecord) catch |err| {
const parsed = fields.to(TransferRecord, .{}) catch |err| {
// Tests intentionally feed malformed records to exercise the
// skip path; real parse failures stay visible outside tests.
if (!builtin.is_test) {
@ -310,28 +307,28 @@ pub fn parseTransactionLogFile(
const testing = std.testing;
test "DestLot.srfParse: cash token (lowercase)" {
const d = try DestLot.srfParse("cash");
try testing.expect(d == .cash);
const r = try DestLot.srfParse("cash");
try testing.expect(r.value == .cash);
}
test "DestLot.srfParse: cash token (mixed case)" {
const d = try DestLot.srfParse("Cash");
try testing.expect(d == .cash);
const d2 = try DestLot.srfParse("CASH");
try testing.expect(d2 == .cash);
const r = try DestLot.srfParse("Cash");
try testing.expect(r.value == .cash);
const r2 = try DestLot.srfParse("CASH");
try testing.expect(r2.value == .cash);
}
test "DestLot.srfParse: SYMBOL@DATE" {
const d = try DestLot.srfParse("SYM@2026-05-03");
try testing.expect(d == .lot);
try testing.expectEqualStrings("SYM", d.lot.symbol);
try testing.expectEqual(Date.fromYmd(2026, 5, 3).days, d.lot.open_date.days);
const r = try DestLot.srfParse("SYM@2026-05-03");
try testing.expect(r.value == .lot);
try testing.expectEqualStrings("SYM", r.value.lot.symbol);
try testing.expectEqual(Date.fromYmd(2026, 5, 3).days, r.value.lot.open_date.days);
}
test "DestLot.srfParse: multi-char symbol with hyphen" {
const d = try DestLot.srfParse("SYM-ABC@2026-05-03");
try testing.expect(d == .lot);
try testing.expectEqualStrings("SYM-ABC", d.lot.symbol);
const r = try DestLot.srfParse("SYM-ABC@2026-05-03");
try testing.expect(r.value == .lot);
try testing.expectEqualStrings("SYM-ABC", r.value.lot.symbol);
}
test "DestLot.srfParse: missing @ rejected (non-cash)" {
@ -353,20 +350,22 @@ test "DestLot.srfParse: malformed date rejected" {
test "DestLot.srfFormat: cash" {
var buf: [64]u8 = undefined;
var fba = std.heap.FixedBufferAllocator.init(&buf);
const v = try (DestLot{ .cash = {} }).srfFormat(fba.allocator(), "dest_lot");
try testing.expectEqualStrings("cash", v.string);
var w = std.Io.Writer.fixed(&buf);
try (DestLot{ .cash = {} }).srfFormat("dest_lot", &w);
try testing.expectEqualStrings("dest_lot::cash", w.buffered());
}
test "DestLot.srfFormat: lot round-trip" {
var buf: [64]u8 = undefined;
var fba = std.heap.FixedBufferAllocator.init(&buf);
var w = std.Io.Writer.fixed(&buf);
const orig: DestLot = .{ .lot = .{ .symbol = "SYM", .open_date = Date.fromYmd(2026, 5, 3) } };
const v = try orig.srfFormat(fba.allocator(), "dest_lot");
try testing.expectEqualStrings("SYM@2026-05-03", v.string);
// Round-trip back through parse
const parsed = try DestLot.srfParse(v.string);
try testing.expect(orig.eql(parsed));
try orig.srfFormat("dest_lot", &w);
try testing.expectEqualStrings("dest_lot::SYM@2026-05-03", w.buffered());
// Round-trip back through parse strip the "dest_lot::" prefix.
const written = w.buffered();
const value_str = written[std.mem.indexOfScalar(u8, written, ':').? + 2 ..];
const parsed = try DestLot.srfParse(value_str);
try testing.expect(orig.eql(parsed.value));
}
test "DestLot.eql: cash vs cash" {

View file

@ -429,7 +429,7 @@ pub const EtfMetrics = struct {
holdings: []Holding, // owned
sectors: []SectorWeight, // owned
pub fn deinit(self: *EtfMetrics, allocator: std.mem.Allocator) void {
pub fn deinit(self: EtfMetrics, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
if (self.series_name) |s| allocator.free(s);
allocator.free(self.cik);
@ -665,13 +665,13 @@ pub fn parseStockTickerMap(allocator: std.mem.Allocator, json_bytes: []const u8)
/// re-parsing the full JSON. All owned strings allocated by the
/// caller's allocator; caller must free via `deinit`.
pub const SubmissionsSummary = struct {
entity_name: ?[]u8 = null,
entity_type: ?[]u8 = null,
sic_description: ?[]u8 = null,
entity_name: ?[]const u8 = null,
entity_type: ?[]const u8 = null,
sic_description: ?[]const u8 = null,
/// URL to the most-recent NPORT-P primary_doc.xml, if any.
latest_nport_p_url: ?[]u8 = null,
latest_nport_p_url: ?[]const u8 = null,
pub fn deinit(self: *SubmissionsSummary, allocator: std.mem.Allocator) void {
pub fn deinit(self: SubmissionsSummary, allocator: std.mem.Allocator) void {
if (self.entity_name) |s| allocator.free(s);
if (self.entity_type) |s| allocator.free(s);
if (self.sic_description) |s| allocator.free(s);
@ -731,10 +731,10 @@ fn parseSubmissionsFeed(
/// reasoning ("a 10-Q is 3 months stale, a 40-F is 12 months stale").
pub const SharesOutstanding = struct {
value: u64,
period_end: []u8, // owned
form: []u8, // owned
period_end: []const u8, // owned
form: []const u8, // owned
pub fn deinit(self: *SharesOutstanding, allocator: std.mem.Allocator) void {
pub fn deinit(self: SharesOutstanding, allocator: std.mem.Allocator) void {
allocator.free(self.period_end);
allocator.free(self.form);
}
@ -749,20 +749,43 @@ pub const SharesOutstanding = struct {
/// (per the project's source-pure invariant: every row in a shared
/// classification file must self-identify which source produced it).
pub const SharesRecord = struct {
symbol: []u8, // owned
symbol: []const u8, // owned
shares_outstanding: u64,
period_end: []u8, // owned, YYYY-MM-DD
form: ?[]u8 = null, // owned (e.g. "10-Q", "40-F")
cik: []u8, // owned
as_of: []u8, // owned (date scraper ran)
period_end: []const u8, // owned, YYYY-MM-DD
form: ?[]const u8 = null, // owned (e.g. "10-Q", "40-F")
cik: []const u8, // owned
as_of: []const u8, // owned (date scraper ran)
source: []const u8, // no default
pub fn deinit(self: *SharesRecord, allocator: std.mem.Allocator) void {
pub fn deinit(self: SharesRecord, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
allocator.free(self.period_end);
if (self.form) |f| allocator.free(f);
allocator.free(self.cik);
allocator.free(self.as_of);
allocator.free(self.source);
}
};
/// Tagged union of XBRL-derived per-entity facts. Stored in the
/// per-CIK `entity_facts.srf` cache file. Currently only carries
/// `shares_outstanding`; new variants (revenue, net income, EPS,
/// etc.) get added here as new methods on `Edgar` extract them.
/// SRF's default `type` discriminator is what we want, so no
/// `srf_tag_field` override is declared.
pub const EntityFactRecord = union(enum) {
shares_outstanding: SharesRecord,
pub fn deinit(self: EntityFactRecord, allocator: std.mem.Allocator) void {
switch (self) {
.shares_outstanding => |r| r.deinit(allocator),
}
}
/// Free a slice of records, calling deinit on each element first.
pub fn freeSlice(allocator: std.mem.Allocator, recs: []const EntityFactRecord) void {
for (recs) |r| r.deinit(allocator);
allocator.free(recs);
}
};
@ -771,22 +794,23 @@ pub const SharesRecord = struct {
/// holds the whole fund's data (profile + N sectors + M holdings) in
/// nested arrays for parsing convenience.
pub const EtfProfileRecord = struct {
symbol: []u8, // owned
series_name: ?[]u8 = null, // owned
cik: []u8, // owned
series_id: ?[]u8 = null, // owned
symbol: []const u8, // owned
series_name: ?[]const u8 = null, // owned
cik: []const u8, // owned
series_id: ?[]const u8 = null, // owned
net_assets: ?f64 = null,
period_end: ?[]u8 = null, // owned, YYYY-MM-DD
as_of: []u8, // owned
period_end: ?[]const u8 = null, // owned, YYYY-MM-DD
as_of: []const u8, // owned
source: []const u8, // no default
pub fn deinit(self: *EtfProfileRecord, allocator: std.mem.Allocator) void {
pub fn deinit(self: EtfProfileRecord, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
if (self.series_name) |s| allocator.free(s);
allocator.free(self.cik);
if (self.series_id) |s| allocator.free(s);
if (self.period_end) |s| allocator.free(s);
allocator.free(self.as_of);
allocator.free(self.source);
}
};
@ -795,18 +819,19 @@ pub const EtfProfileRecord = struct {
/// NPORT-P abbreviation; `description` is the human-readable
/// translation per `sectorDescription`.
pub const EtfSectorRecord = struct {
symbol: []u8, // owned
code: []u8, // owned, e.g. "EC/CORP"
description: []u8, // owned, e.g. "Equity / Corporate"
symbol: []const u8, // owned
code: []const u8, // owned, e.g. "EC/CORP"
description: []const u8, // owned, e.g. "Equity / Corporate"
pct_of_portfolio: f64,
as_of: []u8, // owned
as_of: []const u8, // owned
source: []const u8, // no default
pub fn deinit(self: *EtfSectorRecord, allocator: std.mem.Allocator) void {
pub fn deinit(self: EtfSectorRecord, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
allocator.free(self.code);
allocator.free(self.description);
allocator.free(self.as_of);
allocator.free(self.source);
}
};
@ -815,17 +840,17 @@ pub const EtfSectorRecord = struct {
/// downstream display can prefer ticker > cusip > lei without
/// refetching.
pub const EtfHoldingRecord = struct {
symbol: []u8, // owned; the FUND's symbol
name: []u8, // owned; holding's company / instrument name
ticker: ?[]u8 = null, // owned
cusip: ?[]u8 = null, // owned
lei: ?[]u8 = null, // owned
country: ?[]u8 = null, // owned, ISO-3166 alpha-2
symbol: []const u8, // owned; the FUND's symbol
name: []const u8, // owned; holding's company / instrument name
ticker: ?[]const u8 = null, // owned
cusip: ?[]const u8 = null, // owned
lei: ?[]const u8 = null, // owned
country: ?[]const u8 = null, // owned, ISO-3166 alpha-2
pct_of_portfolio: f64,
as_of: []u8, // owned
as_of: []const u8, // owned
source: []const u8, // no default
pub fn deinit(self: *EtfHoldingRecord, allocator: std.mem.Allocator) void {
pub fn deinit(self: EtfHoldingRecord, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
allocator.free(self.name);
if (self.ticker) |s| allocator.free(s);
@ -833,6 +858,7 @@ pub const EtfHoldingRecord = struct {
if (self.lei) |s| allocator.free(s);
if (self.country) |s| allocator.free(s);
allocator.free(self.as_of);
allocator.free(self.source);
}
};
@ -846,13 +872,19 @@ pub const EtfMetricRecord = union(enum) {
sector: EtfSectorRecord,
holding: EtfHoldingRecord,
pub fn deinit(self: *EtfMetricRecord, allocator: std.mem.Allocator) void {
switch (self.*) {
.profile => |*r| r.deinit(allocator),
.sector => |*r| r.deinit(allocator),
.holding => |*r| r.deinit(allocator),
pub fn deinit(self: EtfMetricRecord, allocator: std.mem.Allocator) void {
switch (self) {
.profile => |r| r.deinit(allocator),
.sector => |r| r.deinit(allocator),
.holding => |r| r.deinit(allocator),
}
}
/// Free a slice of records, calling deinit on each element first.
pub fn freeSlice(allocator: std.mem.Allocator, recs: []const EtfMetricRecord) void {
for (recs) |r| r.deinit(allocator);
allocator.free(recs);
}
};
/// Decompose one fund's internal `EtfMetrics` struct into the SRF-
@ -874,7 +906,7 @@ pub fn appendEtfMetricRecords(
.net_assets = metrics.net_assets,
.period_end = if (metrics.period_end) |s| try allocator.dupe(u8, s) else null,
.as_of = try allocator.dupe(u8, metrics.as_of),
.source = "edgar",
.source = try allocator.dupe(u8, "edgar"),
} });
for (metrics.sectors) |s| {
try out.append(allocator, .{ .sector = .{
@ -883,7 +915,7 @@ pub fn appendEtfMetricRecords(
.description = try allocator.dupe(u8, s.description),
.pct_of_portfolio = s.pct_of_portfolio,
.as_of = try allocator.dupe(u8, metrics.as_of),
.source = "edgar",
.source = try allocator.dupe(u8, "edgar"),
} });
}
for (metrics.holdings) |h| {
@ -896,7 +928,7 @@ pub fn appendEtfMetricRecords(
.country = if (h.country) |c| try allocator.dupe(u8, c) else null,
.pct_of_portfolio = h.pct_of_portfolio,
.as_of = try allocator.dupe(u8, metrics.as_of),
.source = "edgar",
.source = try allocator.dupe(u8, "edgar"),
} });
}
}

View file

@ -77,7 +77,7 @@ pub const ClassificationRecord = struct {
as_of: []const u8, // owned
source: []const u8, // no default provenance always emitted
pub fn deinit(self: *ClassificationRecord, allocator: std.mem.Allocator) void {
pub fn deinit(self: ClassificationRecord, allocator: std.mem.Allocator) void {
allocator.free(self.symbol);
if (self.name) |s| allocator.free(s);
if (self.sector) |s| allocator.free(s);
@ -87,6 +87,13 @@ pub const ClassificationRecord = struct {
if (self.inception_date) |s| allocator.free(s);
if (self.cik) |s| allocator.free(s);
allocator.free(self.as_of);
allocator.free(self.source);
}
/// Free a slice of records, calling deinit on each element first.
pub fn freeSlice(allocator: std.mem.Allocator, recs: []const ClassificationRecord) void {
for (recs) |r| r.deinit(allocator);
allocator.free(recs);
}
};
@ -391,7 +398,7 @@ fn parse(
existing_or_new.value_ptr.* = .{
.symbol = try allocator.dupe(u8, ticker),
.as_of = try allocator.dupe(u8, as_of),
.source = "wikidata",
.source = try allocator.dupe(u8, "wikidata"),
};
}
const rec = existing_or_new.value_ptr;

View file

@ -21,35 +21,6 @@ pub const FigiResult = struct {
found: bool,
};
/// Look up a single CUSIP via OpenFIGI. Caller must free returned strings.
/// Returns null ticker if not found.
pub fn lookupCusip(
io: std.Io,
allocator: std.mem.Allocator,
cusip: []const u8,
api_key: ?[]const u8,
) !FigiResult {
const results = try lookupCusips(io, allocator, &.{cusip}, api_key);
defer {
for (results) |r| {
if (r.ticker) |t| allocator.free(t);
if (r.name) |n| allocator.free(n);
if (r.security_type) |s| allocator.free(s);
}
allocator.free(results);
}
if (results.len == 0) return .{ .ticker = null, .name = null, .security_type = null, .found = false };
// Copy results since we're freeing the batch
const r = results[0];
return .{
.ticker = if (r.ticker) |t| try allocator.dupe(u8, t) else null,
.name = if (r.name) |n| try allocator.dupe(u8, n) else null,
.security_type = if (r.security_type) |s| try allocator.dupe(u8, s) else null,
.found = r.found,
};
}
/// Look up multiple CUSIPs in a single batch request. Caller owns all returned slices.
/// Results array is parallel to the input cusips array (same length, same order).
pub fn lookupCusips(

View file

@ -33,6 +33,8 @@ const alphavantage = @import("providers/alphavantage.zig");
const OpenFigi = @import("providers/openfigi.zig");
const Yahoo = @import("providers/yahoo.zig").Yahoo;
const Tiingo = @import("providers/tiingo.zig").Tiingo;
const Wikidata = @import("providers/Wikidata.zig");
const Edgar = @import("providers/Edgar.zig");
const fmt = @import("format.zig");
const performance = @import("analytics/performance.zig");
const http = @import("net/http.zig");
@ -176,15 +178,11 @@ pub fn FetchResult(comptime T: type) type {
}
// PostProcess callbacks
// These are passed to Store.read to handle type-specific
// concerns: string duping (serialization plumbing) and domain transforms.
/// Dupe the currency string so it outlives the SRF iterator's backing buffer.
fn dividendPostProcess(div: *Dividend, allocator: std.mem.Allocator) anyerror!void {
if (div.currency) |c| {
div.currency = try allocator.dupe(u8, c);
}
}
// `Store.read` parses with `parse_allocator = .{ .allocator = ... }`,
// so SRF dupes every owned string into the caller's allocator
// automatically. PostProcess callbacks remain only for non-trivial
// post-parse logic (e.g. recomputing derived fields). String duping
// is NOT a valid reason to add a postProcess.
/// Recompute surprise/surprise_percent from actual and estimate fields.
/// SRF only stores actual and estimate; surprise is derived.
@ -244,6 +242,8 @@ pub const DataService = struct {
av: ?AlphaVantage = null,
yh: ?Yahoo = null,
tg: ?Tiingo = null,
wikidata: ?Wikidata = null,
edgar: ?Edgar = null,
/// Test-only guard: when true, any code path that would touch
/// the network panics with a clear message. Used by offline-mode
@ -301,6 +301,8 @@ pub const DataService = struct {
if (self.av) |*av| av.deinit();
if (self.yh) |*yh| yh.deinit();
if (self.tg) |*tg| tg.deinit();
if (self.wikidata) |*w| w.deinit();
if (self.edgar) |*e| e.deinit();
}
// Provider accessor
@ -311,6 +313,12 @@ pub const DataService = struct {
if (T == Cboe or T == Yahoo) {
// CBOE and Yahoo have no API key
@field(self, field_name) = T.init(self.io, self.allocator);
} else if (T == Wikidata or T == Edgar) {
// Open-data providers identified by contact email rather
// than an API key. The email goes in User-Agent + From
// headers per each provider's politeness contract.
const email = self.config.user_email orelse return DataError.NoApiKey;
@field(self, field_name) = T.init(self.io, self.allocator, email);
} else {
// All we're doing here is lower casing the type name, then
// appending _key to it, so AlphaVantage -> alphavantage_key
@ -801,7 +809,7 @@ pub const DataService = struct {
/// Fetch dividend history for a symbol.
pub fn getDividends(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Dividend) {
return self.fetchCached(Dividend, symbol, dividendPostProcess, opts);
return self.fetchCached(Dividend, symbol, null, opts);
}
/// Fetch split history for a symbol.
@ -931,6 +939,422 @@ pub const DataService = struct {
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
// Wikidata + EDGAR providers
/// Fetch the Wikidata classification record for a single symbol
/// (name, sector, industry, country, inception date, CIK,
/// instance-of). Cache-first; on miss, runs a 1-symbol batched
/// SPARQL query.
///
/// `opts.skip_network = true` returns cached data even if stale,
/// `FetchFailed` on cache miss. `opts.force_refresh = true`
/// ignores the cache and re-fetches.
///
/// Callers fetching classifications for many symbols should use
/// `getClassifications(symbols)` instead Wikidata's SPARQL API
/// is naturally batched, and one query for N symbols is much
/// cheaper than N queries for 1 symbol each.
pub fn getClassification(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Wikidata.ClassificationRecord) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: classification fresh in local cache", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
if (opts.skip_network) {
if (s.read(Wikidata.ClassificationRecord, symbol, null, .any)) |cached| {
log.info("{s}: classification stale-cached returned (skip_network)", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
// Try server sync before hitting Wikidata.
if (!opts.force_refresh and self.syncFromServer(symbol, .classification)) {
if (s.read(Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: classification synced from server", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
log.debug("{s}: fetching classification from Wikidata", .{symbol});
self.assertNetworkAllowed("getClassification wikidata.fetch");
var wd = try self.getProvider(Wikidata);
const symbols = [_][]const u8{symbol};
const fetched = wd.fetch(self.allocator, &symbols) catch |err| {
if (err == error.RateLimited) {
self.rateLimitBackoff();
break_blk: {
const retried = wd.fetch(self.allocator, &symbols) catch break :break_blk;
if (retried.len > 0) {
s.write(Wikidata.ClassificationRecord, symbol, retried, .{ .seconds = cache.Ttl.classification, .jitter_pct = 8 });
return .{ .data = retried, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
self.allocator.free(retried);
}
}
log.warn("{s}: wikidata fetch failed: {s}", .{ symbol, @errorName(err) });
return DataError.FetchFailed;
};
if (fetched.len == 0) {
self.allocator.free(fetched);
// Wikidata had no row for this symbol. Negative-cache to
// suppress retries until the user explicitly refreshes.
s.writeNegative(symbol, .classification);
return DataError.NotFound;
}
s.write(Wikidata.ClassificationRecord, symbol, fetched, .{ .seconds = cache.Ttl.classification, .jitter_pct = 8 });
return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
/// Batched classification fetch. Wikidata's SPARQL API takes a
/// `VALUES ?ticker { ... }` set in one query; this method runs
/// that query for the requested set, splits the response into
/// per-symbol cache writes, and returns the slice. Symbols not
/// in Wikidata are silently dropped from the result (the user-
/// facing cache for them gets a negative entry).
///
/// The cache is consulted first per-symbol; only the symbols
/// that miss the cache (or are stale) are passed to the SPARQL
/// query. This minimizes the upstream load when most symbols
/// were already classified in a prior run.
pub fn getClassifications(
self: *DataService,
result_allocator: std.mem.Allocator,
symbols: []const []const u8,
opts: FetchOptions,
) DataError![]Wikidata.ClassificationRecord {
if (symbols.len == 0) return &.{};
var s = self.store();
// Identify cache misses.
var to_fetch: std.ArrayList([]const u8) = .empty;
defer to_fetch.deinit(self.allocator);
var cached_records: std.ArrayList(Wikidata.ClassificationRecord) = .empty;
errdefer {
for (cached_records.items) |*r| {
var m = r.*;
m.deinit(self.allocator);
}
cached_records.deinit(self.allocator);
}
for (symbols) |sym| {
if (!opts.force_refresh) {
if (s.read(Wikidata.ClassificationRecord, sym, null, .fresh_only)) |cached| {
// The on-disk shape is a length-1 slice.
if (cached.data.len > 0) {
try cached_records.append(self.allocator, cached.data[0]);
// Free the rest if any (shouldn't happen for
// per-symbol classification, but defensive).
for (cached.data[1..]) |*r| {
var m = r.*;
m.deinit(self.allocator);
}
self.allocator.free(cached.data);
continue;
}
self.allocator.free(cached.data);
}
}
try to_fetch.append(self.allocator, sym);
}
if (to_fetch.items.len == 0) {
// All cached assemble result from cached_records.
const out = try result_allocator.alloc(Wikidata.ClassificationRecord, cached_records.items.len);
@memcpy(out, cached_records.items);
cached_records.clearRetainingCapacity();
return out;
}
if (opts.skip_network) {
// Offline mode: return what we have from cache.
const out = try result_allocator.alloc(Wikidata.ClassificationRecord, cached_records.items.len);
@memcpy(out, cached_records.items);
cached_records.clearRetainingCapacity();
return out;
}
log.debug("fetching {d} classifications from Wikidata", .{to_fetch.items.len});
self.assertNetworkAllowed("getClassifications wikidata.fetch");
var wd = try self.getProvider(Wikidata);
const fetched = wd.fetch(self.allocator, to_fetch.items) catch |err| {
log.warn("wikidata batch fetch failed: {s}", .{@errorName(err)});
return DataError.FetchFailed;
};
defer self.allocator.free(fetched);
// Write each fetched record to its per-symbol cache file.
for (fetched) |rec| {
const single = [_]Wikidata.ClassificationRecord{rec};
s.write(Wikidata.ClassificationRecord, rec.symbol, &single, .{ .seconds = cache.Ttl.classification, .jitter_pct = 8 });
}
// Combine cached + fetched into the result.
const total = cached_records.items.len + fetched.len;
const out = try result_allocator.alloc(Wikidata.ClassificationRecord, total);
@memcpy(out[0..cached_records.items.len], cached_records.items);
@memcpy(out[cached_records.items.len..], fetched);
cached_records.clearRetainingCapacity();
return out;
}
/// Fetch XBRL-derived entity facts for a CIK (currently
/// shares-outstanding; extensible to revenue / net income / EPS
/// as new variants are added to `Edgar.EntityFactRecord`).
///
/// CIK is the cache key the file lives at
/// `<cache_dir>/<cik>/entity_facts.srf`. A single dual-class
/// issuer (BRK.A / BRK.B) shares one entity_facts file because
/// both class symbols resolve to the same CIK.
pub fn getEntityFacts(self: *DataService, cik: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EntityFactRecord) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| {
log.debug("CIK {s}: entity_facts fresh in local cache", .{cik});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
if (opts.skip_network) {
if (s.read(Edgar.EntityFactRecord, cik, null, .any)) |cached| {
log.info("CIK {s}: entity_facts stale-cached returned (skip_network)", .{cik});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
if (!opts.force_refresh and self.syncFromServer(cik, .entity_facts)) {
if (s.read(Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| {
log.debug("CIK {s}: entity_facts synced from server", .{cik});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
log.debug("CIK {s}: fetching entity facts from EDGAR", .{cik});
self.assertNetworkAllowed("getEntityFacts edgar.fetchSharesOutstanding");
var edgar = try self.getProvider(Edgar);
const so_opt = edgar.fetchSharesOutstanding(self.allocator, cik) catch |err| {
log.warn("CIK {s}: shares fetch failed: {s}", .{ cik, @errorName(err) });
return DataError.FetchFailed;
};
if (so_opt) |so_in| {
var so = so_in;
defer so.deinit(self.allocator);
const today = fmt.todayDate(self.io);
var as_of_buf: [10]u8 = undefined;
// [10]u8 always fits "YYYY-MM-DD" (10 chars exactly).
const as_of = std.fmt.bufPrint(&as_of_buf, "{f}", .{today}) catch
@panic("getEntityFacts: 10-byte buffer cannot hold YYYY-MM-DD — unreachable");
const form_dup: ?[]u8 = if (so.form.len > 0) try self.allocator.dupe(u8, so.form) else null;
const shares_record = Edgar.SharesRecord{
.symbol = try self.allocator.dupe(u8, ""),
.shares_outstanding = so.value,
.period_end = try self.allocator.dupe(u8, so.period_end),
.form = form_dup,
.cik = try self.allocator.dupe(u8, cik),
.as_of = try self.allocator.dupe(u8, as_of),
.source = "edgar_xbrl",
};
const records = try self.allocator.alloc(Edgar.EntityFactRecord, 1);
records[0] = .{ .shares_outstanding = shares_record };
s.write(Edgar.EntityFactRecord, cik, records, .{ .seconds = cache.Ttl.entity_facts, .jitter_pct = 8 });
return .{ .data = records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
}
// No shares-outstanding data for this CIK (e.g. 20-F-only
// filers like BP, XBRL-light filers like META). Negative-
// cache so we don't keep retrying.
s.writeNegative(cik, .entity_facts);
return DataError.NotFound;
}
/// Fetch ETF metrics (NPORT-P profile + sectors + holdings) for
/// a fund symbol. Cache-first via `<symbol>/etf_metrics.srf`.
///
/// On cache miss, looks up the symbol in the EDGAR ticker maps
/// (fetched on demand via `getTickerMap*`), then runs the full
/// `Edgar.fetchEtfMetrics` cascade.
pub fn getEtfMetrics(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EtfMetricRecord) {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: etf_metrics fresh in local cache", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
if (opts.skip_network) {
if (s.read(Edgar.EtfMetricRecord, symbol, null, .any)) |cached| {
log.info("{s}: etf_metrics stale-cached returned (skip_network)", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
return DataError.FetchFailed;
}
if (!opts.force_refresh and self.syncFromServer(symbol, .etf_metrics)) {
if (s.read(Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| {
log.debug("{s}: etf_metrics synced from server", .{symbol});
return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator };
}
}
log.debug("{s}: fetching ETF metrics from EDGAR", .{symbol});
self.assertNetworkAllowed("getEtfMetrics edgar.fetchEtfMetrics");
// Load the ticker maps. These are big (3-5 MB each) but the
// load happens once per CLI invocation and the parsed
// TickerMap stays alive across all getEtfMetrics calls in
// the same process.
var mf_map = self.loadMutualFundTickerMap(opts) catch |err| {
log.warn("failed to load mutual-fund ticker map: {s}", .{@errorName(err)});
return DataError.FetchFailed;
};
defer mf_map.deinit();
var co_map = self.loadCompanyTickerMap(opts) catch |err| {
log.warn("failed to load company ticker map: {s}", .{@errorName(err)});
return DataError.FetchFailed;
};
defer co_map.deinit();
var edgar = try self.getProvider(Edgar);
const result = edgar.fetchEtfMetrics(self.io, self.allocator, &mf_map, &co_map, symbol, 20) catch |err| {
log.warn("{s}: etf_metrics fetch failed: {s}", .{ symbol, @errorName(err) });
return DataError.FetchFailed;
};
switch (result) {
.full => |m_in| {
var m = m_in;
defer m.deinit(self.allocator);
var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty;
errdefer {
for (records.items) |*r| r.deinit(self.allocator);
records.deinit(self.allocator);
}
try Edgar.appendEtfMetricRecords(self.allocator, &records, m);
const owned = try records.toOwnedSlice(self.allocator);
s.write(Edgar.EtfMetricRecord, symbol, owned, .{ .seconds = cache.Ttl.etf_metrics, .jitter_pct = 8 });
return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
},
.profile_only => |m_in| {
var m = m_in;
defer m.deinit(self.allocator);
var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty;
errdefer {
for (records.items) |*r| r.deinit(self.allocator);
records.deinit(self.allocator);
}
try Edgar.appendEtfMetricRecords(self.allocator, &records, m);
const owned = try records.toOwnedSlice(self.allocator);
s.write(Edgar.EtfMetricRecord, symbol, owned, .{ .seconds = cache.Ttl.etf_metrics, .jitter_pct = 8 });
return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator };
},
.not_a_fund => {
// Not a fund write a negative entry to suppress
// retries. The user can ask `getEntityFacts(cik)`
// separately for stock-level facts.
s.writeNegative(symbol, .etf_metrics);
return DataError.NotFound;
},
.not_in_edgar => {
// Symbol isn't in either ticker map. No EDGAR data
// available; negative-cache.
s.writeNegative(symbol, .etf_metrics);
return DataError.NotFound;
},
}
}
/// Load and parse the EDGAR mutual-fund ticker map, going
/// through the `Store`-backed cache. Caller deinits the result.
fn loadMutualFundTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(Edgar.MutualFundTickerMapBlob, "_edgar", null, .fresh_only)) |cached| {
defer self.allocator.free(cached.data);
if (cached.data.len > 0) {
const blob = cached.data[0];
defer self.allocator.free(blob.json);
return Edgar.parseTickerMap(self.allocator, blob.json);
}
}
}
log.debug("fetching EDGAR mutual-fund ticker map", .{});
self.assertNetworkAllowed("loadMutualFundTickerMap edgar.fetchMutualFundTickerMap");
var edgar = try self.getProvider(Edgar);
// Fetch the raw JSON via a separate call so we can write
// the blob to cache; the parsed map gets returned to the
// caller.
var resp = try edgar.client.request(.GET, "https://www.sec.gov/files/company_tickers_mf.json", null, &.{
.{ .name = "User-Agent", .value = "zfin/0.1" },
.{ .name = "From", .value = self.config.user_email orelse "" },
});
defer resp.deinit();
const json = try self.allocator.dupe(u8, resp.body);
var blob = [_]Edgar.MutualFundTickerMapBlob{.{ .json = json }};
s.write(Edgar.MutualFundTickerMapBlob, "_edgar", blob[0..], .{ .seconds = cache.Ttl.tickers_funds, .jitter_pct = 8 });
defer self.allocator.free(json);
return Edgar.parseTickerMap(self.allocator, json);
}
/// Load and parse the EDGAR company ticker map (stocks + UITs).
fn loadCompanyTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap {
var s = self.store();
if (!opts.force_refresh) {
if (s.read(Edgar.CompanyTickerMapBlob, "_edgar", null, .fresh_only)) |cached| {
defer self.allocator.free(cached.data);
if (cached.data.len > 0) {
const blob = cached.data[0];
defer self.allocator.free(blob.json);
return Edgar.parseStockTickerMap(self.allocator, blob.json);
}
}
}
log.debug("fetching EDGAR company ticker map", .{});
self.assertNetworkAllowed("loadCompanyTickerMap edgar.fetchCompanyTickerMap");
var edgar = try self.getProvider(Edgar);
var resp = try edgar.client.request(.GET, "https://www.sec.gov/files/company_tickers.json", null, &.{
.{ .name = "User-Agent", .value = "zfin/0.1" },
.{ .name = "From", .value = self.config.user_email orelse "" },
});
defer resp.deinit();
const json = try self.allocator.dupe(u8, resp.body);
var blob = [_]Edgar.CompanyTickerMapBlob{.{ .json = json }};
s.write(Edgar.CompanyTickerMapBlob, "_edgar", blob[0..], .{ .seconds = cache.Ttl.tickers_companies, .jitter_pct = 8 });
defer self.allocator.free(json);
return Edgar.parseStockTickerMap(self.allocator, json);
}
//
/// Fetch a real-time quote for a symbol.
/// Yahoo Finance is primary (free, no API key, no 15-min delay).
/// Falls back to TwelveData if Yahoo fails.
@ -1144,7 +1568,7 @@ pub const DataService = struct {
/// Read dividends from cache only (no network fetch).
pub fn getCachedDividends(self: *DataService, symbol: []const u8) ?[]Dividend {
var s = self.store();
const result = s.read(Dividend, symbol, dividendPostProcess, .any) orelse return null;
const result = s.read(Dividend, symbol, null, .any) orelse return null;
return result.data;
}
@ -1665,57 +2089,12 @@ pub const DataService = struct {
return DataError.FetchFailed;
}
/// Look up a CUSIP via OpenFIGI API. Returns the ticker if found, null otherwise.
/// Results are cached in {cache_dir}/cusip_tickers.srf.
/// Caller owns the returned string.
pub fn lookupCusip(self: *DataService, cusip: []const u8) ?[]const u8 {
// Check local cache first
if (self.getCachedCusipTicker(cusip)) |t| return t;
// Try OpenFIGI
const result = OpenFigi.lookupCusip(self.allocator, cusip, self.config.openfigi_key) catch return null;
defer {
if (result.name) |n| self.allocator.free(n);
if (result.security_type) |s| self.allocator.free(s);
}
if (result.ticker) |ticker| {
// Cache the mapping
self.cacheCusipTicker(cusip, ticker);
return ticker; // caller takes ownership
}
return null;
}
/// A single CUSIP-to-ticker mapping record in the cache file.
const CusipEntry = struct {
cusip: []const u8 = "",
ticker: []const u8 = "",
};
/// Read a cached CUSIP->ticker mapping. Returns null if not cached.
/// Caller owns the returned string.
fn getCachedCusipTicker(self: *DataService, cusip: []const u8) ?[]const u8 {
const path = std.fs.path.join(self.allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return null;
defer self.allocator.free(path);
const data = std.fs.cwd().readFileAlloc(self.allocator, path, 64 * 1024) catch return null;
defer self.allocator.free(data);
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, self.allocator, .{ .alloc_strings = false }) catch return null;
defer it.deinit();
while (it.next() catch return null) |fields| {
const entry = fields.to(CusipEntry) catch continue;
if (std.mem.eql(u8, entry.cusip, cusip) and entry.ticker.len > 0) {
return self.allocator.dupe(u8, entry.ticker) catch null;
}
}
return null;
}
/// Append a CUSIP->ticker mapping to the cache file.
///
/// Implemented as read-append-atomic-write (rather than a direct
@ -1745,7 +2124,7 @@ pub const DataService = struct {
const entry = [_]CusipEntry{.{ .cusip = cusip, .ticker = ticker }};
var aw: std.Io.Writer.Allocating = .init(self.allocator);
defer aw.deinit();
aw.writer.print("{f}", .{srf.fmtFrom(CusipEntry, self.allocator, &entry, .{ .emit_directives = emit_directives })}) catch return;
aw.writer.print("{f}", .{srf.fmt(CusipEntry, &entry, .{ .emit_directives = emit_directives })}) catch return;
const encoded = aw.writer.buffered();
if (encoded.len == 0) return;
@ -1796,13 +2175,11 @@ pub const DataService = struct {
.earnings => "/earnings",
.options => "/options",
.splits => "/splits",
.etf_profile => return false, // not served
.etf_profile => return false, // not served (replaced by etf_metrics)
.meta => return false,
// Endpoint mapping for these will be wired when the
// corresponding `getClassification` / `getEntityFacts` /
// `getEtfMetrics` service methods land. Until then,
// server sync is a no-op for them.
.classification, .etf_metrics, .entity_facts => return false,
.classification => "/classification",
.etf_metrics => "/etf_metrics",
.entity_facts => "/entity_facts",
// Provider-internal cache files (ticker-map indexes)
// are not served clients fetch them directly from
// the SEC. The DataService caches the JSON via
@ -2362,3 +2739,206 @@ test "loadAllPrices offline mode skips network and returns cached" {
// failed_count should reflect MISSING.
try std.testing.expectEqual(@as(usize, 1), result.failed_count);
}
test "getClassification: skip_network with no cache returns FetchFailed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
svc.panic_on_network_attempt = true;
const err = svc.getClassification("NEVERHEARDOFIT", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "getClassification: cache hit returns cached data without network" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
// Pre-populate the classification cache.
var s = svc.store();
var records = [_]Wikidata.ClassificationRecord{.{
.symbol = "AAPL",
.name = "Apple Inc.",
.country = "US",
.as_of = "2026-05-25",
.source = "wikidata",
}};
s.write(Wikidata.ClassificationRecord, "AAPL", records[0..], .{ .seconds = cache.Ttl.classification });
// Network guard on must return from cache without touching network.
svc.panic_on_network_attempt = true;
const result = try svc.getClassification("AAPL", .{});
defer result.deinit();
try std.testing.expectEqual(@as(usize, 1), result.data.len);
try std.testing.expectEqualStrings("AAPL", result.data[0].symbol);
try std.testing.expectEqualStrings("Apple Inc.", result.data[0].name.?);
try std.testing.expectEqual(Source.cached, result.source);
}
test "getEntityFacts: skip_network with no cache returns FetchFailed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
svc.panic_on_network_attempt = true;
const err = svc.getEntityFacts("0000999999", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "getEntityFacts: cache hit returns cached shares-outstanding" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var s = svc.store();
var records = [_]Edgar.EntityFactRecord{
.{ .shares_outstanding = .{
.symbol = "",
.shares_outstanding = 14687356000,
.period_end = "2026-04-17",
.form = "10-Q",
.cik = "0000320193",
.as_of = "2026-05-25",
.source = "edgar_xbrl",
} },
};
s.write(Edgar.EntityFactRecord, "0000320193", records[0..], .{ .seconds = cache.Ttl.entity_facts });
svc.panic_on_network_attempt = true;
const result = try svc.getEntityFacts("0000320193", .{});
defer result.deinit();
try std.testing.expectEqual(@as(usize, 1), result.data.len);
switch (result.data[0]) {
.shares_outstanding => |so| {
try std.testing.expectEqual(@as(u64, 14687356000), so.shares_outstanding);
try std.testing.expectEqualStrings("0000320193", so.cik);
},
}
try std.testing.expectEqual(Source.cached, result.source);
}
test "getEtfMetrics: skip_network with no cache returns FetchFailed" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
svc.panic_on_network_attempt = true;
const err = svc.getEtfMetrics("NEVERHEARDOFIT", .{ .skip_network = true });
try std.testing.expectError(DataError.FetchFailed, err);
}
test "getEtfMetrics: cache hit returns cached profile + sectors + holdings" {
const allocator = std.testing.allocator;
const io = std.testing.io;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
defer allocator.free(dir_path);
const config = Config{ .cache_dir = dir_path };
var svc = DataService.init(io, allocator, config);
defer svc.deinit();
var s = svc.store();
var records = [_]Edgar.EtfMetricRecord{
.{ .profile = .{
.symbol = "VTI",
.cik = "0000036405",
.as_of = "2026-05-25",
.source = "edgar",
} },
.{ .sector = .{
.symbol = "VTI",
.code = "EC/CORP",
.description = "Equity / Corporate",
.pct_of_portfolio = 99.7,
.as_of = "2026-05-25",
.source = "edgar",
} },
.{ .holding = .{
.symbol = "VTI",
.name = "NVIDIA Corp",
.pct_of_portfolio = 6.57,
.as_of = "2026-05-25",
.source = "edgar",
} },
};
s.write(Edgar.EtfMetricRecord, "VTI", records[0..], .{ .seconds = cache.Ttl.etf_metrics });
svc.panic_on_network_attempt = true;
const result = try svc.getEtfMetrics("VTI", .{});
defer result.deinit();
try std.testing.expectEqual(@as(usize, 3), result.data.len);
try std.testing.expect(result.data[0] == .profile);
try std.testing.expect(result.data[1] == .sector);
try std.testing.expect(result.data[2] == .holding);
try std.testing.expectEqualStrings("VTI", result.data[0].profile.symbol);
try std.testing.expectEqual(Source.cached, result.source);
}
test "DataService getProvider initializes Wikidata with user_email" {
const allocator = std.testing.allocator;
const config = Config{
.cache_dir = "/tmp/zfin-test-cache",
.user_email = "test@example.com",
};
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
const wd1 = try svc.getProvider(Wikidata);
try std.testing.expect(svc.wikidata != null);
try std.testing.expectEqualStrings("test@example.com", wd1.user_email);
// Second call returns same instance.
const wd2 = try svc.getProvider(Wikidata);
try std.testing.expect(wd1 == wd2);
}
test "DataService getProvider returns NoApiKey for Wikidata without user_email" {
const allocator = std.testing.allocator;
const config = Config{ .cache_dir = "/tmp/zfin-test-cache" };
var svc = DataService.init(std.testing.io, allocator, config);
defer svc.deinit();
const wd_result = svc.getProvider(Wikidata);
try std.testing.expectError(DataError.NoApiKey, wd_result);
const ed_result = svc.getProvider(Edgar);
try std.testing.expectError(DataError.NoApiKey, ed_result);
}

View file

@ -32,12 +32,13 @@ pub const KeyCombo = struct {
codepoint: u21,
mods: vaxis.Key.Modifiers = .{},
/// SRF custom parser. Used by `srf.Record.to(...)` to coerce a
/// `key::ctrl+c` field into a `KeyCombo` value. Returns
/// SRF custom parser. Used by `srf.FieldIterator.to(...)` to coerce
/// a `key::ctrl+c` field into a `KeyCombo` value. Returns
/// `error.CustomParseFailed` (via the srf-level wrapping) on
/// invalid input.
pub fn srfParse(val: []const u8) !KeyCombo {
return parseKeyCombo(val) orelse error.InvalidKeyCombo;
/// invalid input. Returns `CoercionResult(...).initFree(...)` so
/// SRF frees the consumed source string after parsing.
pub fn srfParse(val: []const u8) !srf.CoercionResult(KeyCombo) {
return .initFree(parseKeyCombo(val) orelse return error.InvalidKeyCombo);
}
};
@ -451,15 +452,15 @@ pub fn loadFromData(allocator: std.mem.Allocator, data: []const u8) ?KeyMap {
pub fn loadFromDataChecked(allocator: std.mem.Allocator, data: []const u8) LoadOutcome {
var reader = std.Io.Reader.fixed(data);
const parsed = srf.parse(&reader, allocator, .{}) catch return .fallback;
// Don't deinit `parsed` until the end its arena owns the
var ri = srf.iterator(&reader, allocator, .{}) catch return .fallback;
// Don't deinit `ri` until the end its arena owns the
// string slices we'll borrow into the returned KeyMap. We
// transfer ownership to the KeyMap's arena instead.
// Move parsed.arena into our own KeyMap so it outlives this
// call. The `Parsed` struct holds the arena by pointer; we
// claim it directly.
const arena = parsed.arena;
//
// Move ri.arena into our own KeyMap so it outlives this
// call. The `RecordIterator` holds the arena by pointer; we
// claim it directly and skip `ri.deinit()`.
const arena = ri.arena;
errdefer {
arena.deinit();
allocator.destroy(arena);
@ -470,8 +471,9 @@ pub fn loadFromDataChecked(allocator: std.mem.Allocator, data: []const u8) LoadO
var scopes = std.ArrayList(ScopeBuilder).empty;
var warnings = std.ArrayList([]const u8).empty;
for (parsed.records, 0..) |record, idx| {
const raw = record.to(RawRecord) catch |err| {
var idx: usize = 0;
while (ri.next() catch return .fallback) |fields| : (idx += 1) {
const raw = fields.to(RawRecord, .{}) catch |err| {
// Per-record parse failure (missing field, bad key
// string, unknown action). Don't drop the whole file
// skip the record and warn the user. Record index is

View file

@ -225,7 +225,8 @@ fn colorPtrConst(theme: *const Theme, offset: usize) *const Color {
fn formatHex(c: Color) [7]u8 {
var buf: [7]u8 = undefined;
_ = std.fmt.bufPrint(&buf, "#{x:0>2}{x:0>2}{x:0>2}", .{ c[0], c[1], c[2] }) catch {};
_ = std.fmt.bufPrint(&buf, "#{x:0>2}{x:0>2}{x:0>2}", .{ c[0], c[1], c[2] }) catch
@panic("formatHex: 7-byte buffer cannot hold #RRGGBB — unreachable");
return buf;
}
@ -273,7 +274,7 @@ pub fn loadFromData(data: []const u8) ?Theme {
const alloc = fba.allocator();
var reader = std.Io.Reader.fixed(data);
var it = srf.iterator(&reader, alloc, .{ .alloc_strings = false }) catch return null;
var it = srf.iterator(&reader, alloc, .{ .parse_allocator = .none }) catch return null;
// Don't deinit -- fba owns everything
var theme = default_theme;