From 52afd7569617db7a2c670fbf53b9d23ba3b34ba4 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Fri, 29 May 2026 12:23:43 -0700 Subject: [PATCH] update srf/wire edgar and wikidata into service --- .pre-commit-config.yaml | 2 +- AGENTS.md | 18 +- build.zig.zon | 4 +- src/Date.zig | 18 +- src/analytics/analysis.zig | 4 +- src/analytics/projections.zig | 19 +- src/analytics/timeline.zig | 4 +- src/cache/store.zig | 76 ++-- src/commands/audit.zig | 2 +- src/commands/cache.zig | 2 +- src/commands/common.zig | 4 +- src/commands/history.zig | 4 +- src/commands/snapshot.zig | 12 +- src/data/imported_values.zig | 52 ++- src/history.zig | 13 +- src/main.zig | 6 - src/models/classification.zig | 4 +- src/models/snapshot.zig | 2 +- src/models/transaction_log.zig | 89 ++--- src/providers/Edgar.zig | 118 ++++-- src/providers/Wikidata.zig | 11 +- src/providers/openfigi.zig | 29 -- src/service.zig | 706 ++++++++++++++++++++++++++++++--- src/tui/keybinds.zig | 30 +- src/tui/theme.zig | 5 +- 25 files changed, 913 insertions(+), 321 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82ea20b..cc5fc74 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,7 +32,7 @@ repos: - id: test name: Run zig build test entry: zig - args: ["build", "coverage", "-Dcoverage-threshold=71"] + args: ["build", "coverage", "-Dcoverage-threshold=72"] language: system types: [file] pass_filenames: false diff --git a/AGENTS.md b/AGENTS.md index 6fac8b5..65e07a7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -589,7 +589,7 @@ zig build test # run all tests (single binary, discovers all tests zig build run -- # build and run CLI zig build docs # generate library documentation zig build coverage # run tests with kcov coverage (Linux only). See "Coverage" section. -zig build coverage -Dcoverage-threshold=65 # fail build if coverage < N% (pre-commit uses 65) +zig build coverage -Dcoverage-threshold=72 # fail build if coverage < N% (see .pre-commit-config.yaml for current floor) ``` **Tooling** (managed via `.mise.toml`): @@ -729,13 +729,15 @@ stdout: Total test coverage: 65.15% (15399/23638) ``` -**The pre-commit hook enforces a coverage floor.** The current -floor is **65%** (set in `.pre-commit-config.yaml`). The hook runs -`zig build coverage -Dcoverage-threshold=65` and fails the commit -if coverage drops below that threshold. Bumping the floor over time -is encouraged — every time we push the actual coverage materially -higher, raise the floor in the pre-commit config in the same commit -so the gain is locked in. +**The pre-commit hook enforces a coverage floor.** The exact +threshold lives in `.pre-commit-config.yaml` as the +`-Dcoverage-threshold=N` flag on the `test` hook — that's the +source of truth, always. The hook runs +`zig build coverage -Dcoverage-threshold=N` and fails the commit +if coverage drops below `N`. Bumping the floor over time is +encouraged — every time we push the actual coverage materially +higher, raise the threshold in the pre-commit config in the same +commit so the gain is locked in. **Coverage expectations for new work:** diff --git a/build.zig.zon b/build.zig.zon index b4b08cb..3718df8 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -13,8 +13,8 @@ .hash = "z2d-0.11.0-j5P_HtLzDwBGyQt49DrT0v4BuVqI_SRs6CXsuj7eBVhR", }, .srf = .{ - .url = "git+https://git.lerch.org/lobo/srf#12b755660e96ed65c645975110214fcc9c66ca4d", - .hash = "srf-0.0.0-qZj5743KAQAykeIHzFJdRDwgAA-Yy1RLaj0Lw4W5Rphx", + .url = "git+https://git.lerch.org/lobo/srf#4a3e5f00f15b0e0ba79d06ffe69dbcfa052baa5b", + .hash = "srf-0.0.0-qZj572nkAQAAz3zEg6fdD8A7PJnQ9je3zCeAOJS5PoZj", }, }, .paths = .{ diff --git a/src/Date.zig b/src/Date.zig index b165a54..ba32219 100644 --- a/src/Date.zig +++ b/src/Date.zig @@ -67,18 +67,20 @@ pub fn parse(str: []const u8) !Date { return fromYmd(y, m, d); } -/// Hook for srf Record.to(T) coercion. -pub fn srfParse(str: []const u8) !Date { - return parse(str); +/// Hook for srf coercion via `FieldIterator.to(T, ...)`. Returns a +/// `CoercionResult(Date)` with `require_free_original = true` so SRF +/// frees the consumed source string after parsing. +pub fn srfParse(str: []const u8) !srf.CoercionResult(Date) { + return .initFree(try parse(str)); } -/// Hook for srf Record.from(T) serialization. -pub fn srfFormat(self: Date, allocator: std.mem.Allocator, comptime field_name: []const u8) !srf.Value { - _ = field_name; +/// Hook for srf serialization. Writes "YYYY-MM-DD" to the writer +/// using the "string" type (untyped) so the value parses cleanly +/// back through `Date.srfParse` on read. +pub fn srfFormat(self: Date, comptime field_name: []const u8, writer: *std.Io.Writer) std.Io.Writer.Error!void { const ymd = epochDaysToYmd(self.days); const y: u16 = @intCast(ymd.year); - const buf = try std.fmt.allocPrint(allocator, "{d:0>4}-{d:0>2}-{d:0>2}", .{ y, ymd.month, ymd.day }); - return .{ .string = buf }; + try writer.print("{s}::{d:0>4}-{d:0>2}-{d:0>2}", .{ field_name, y, ymd.month, ymd.day }); } /// Zig 0.15+ format method: writes "YYYY-MM-DD" to the writer. diff --git a/src/analytics/analysis.zig b/src/analytics/analysis.zig index 7357535..be3863c 100644 --- a/src/analytics/analysis.zig +++ b/src/analytics/analysis.zig @@ -193,11 +193,11 @@ pub fn parseAccountsFile(allocator: std.mem.Allocator, data: []const u8) !Accoun } var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData; + var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData; defer it.deinit(); while (try it.next()) |fields| { - const entry = fields.to(AccountTaxEntry) catch continue; + const entry = fields.to(AccountTaxEntry, .{}) catch continue; try entries.append(allocator, .{ .account = try allocator.dupe(u8, entry.account), .tax_type = entry.tax_type, diff --git a/src/analytics/projections.zig b/src/analytics/projections.zig index e7d3ee3..c9d0cff 100644 --- a/src/analytics/projections.zig +++ b/src/analytics/projections.zig @@ -469,12 +469,15 @@ const SrfProjection = union(enum) { /// Returns default config if data is null or unparseable. /// /// Uses an internal stack-backed FixedBufferAllocator for the SRF -/// iterator's scratch (`alloc_strings = false` keeps strings borrowing -/// from `data`, so the iterator only needs scratch for field-row -/// bookkeeping). The 8 KB buffer comfortably fits any realistic -/// projections.srf — a handful of config + birthdate + event records. -/// On overflow the parse aborts and we return the default config, -/// matching the existing "unparseable → defaults" contract. +/// iterator's scratch. The default `parse_allocator` keeps short +/// string values borrowing from `data` (no copy) and transparently +/// allocates from the iterator's fallback arena for any +/// multi-line/binary values (e.g. an event `name` containing a +/// comma, which `srf.fmt` encodes with a length prefix). The 8 KB +/// buffer comfortably fits any realistic projections.srf — a +/// handful of config + birthdate + event records. On overflow the +/// parse aborts and we return the default config, matching the +/// existing "unparseable → defaults" contract. /// /// Format (union-tagged SRF records): /// type::config,target_stock_pct:num:80 @@ -491,7 +494,7 @@ pub fn parseProjectionsConfig(data: ?[]const u8) UserConfig { const scratch = fba.allocator(); var reader = std.Io.Reader.fixed(raw); - var it = srf.iterator(&reader, scratch, .{ .alloc_strings = false }) catch return config; + var it = srf.iterator(&reader, scratch, .{}) catch return config; defer it.deinit(); var saw_horizon = false; @@ -505,7 +508,7 @@ pub fn parseProjectionsConfig(data: ?[]const u8) UserConfig { var annotation_count: u8 = 0; while (it.next() catch null) |field_it| { - const rec = field_it.to(SrfProjection) catch continue; + const rec = field_it.to(SrfProjection, .{}) catch continue; switch (rec) { .config => |c| { config.target_stock_pct = c.target_stock_pct orelse config.target_stock_pct; diff --git a/src/analytics/timeline.zig b/src/analytics/timeline.zig index 713fa87..1215530 100644 --- a/src/analytics/timeline.zig +++ b/src/analytics/timeline.zig @@ -19,7 +19,7 @@ //! extractMetric(series, .net_worth) -> []MetricPoint for rendering //! //! For rollup generation, `buildRollupRecords` emits a flat slice suitable -//! for `srf.fmtFrom` without any of the per-lot detail — the rollup is a +//! for `srf.fmt` without any of the per-lot detail — the rollup is a //! summary cache, not a replacement for the per-day snapshot files. const std = @import("std"); @@ -392,7 +392,7 @@ pub const RollupRow = struct { }; /// Produce a rollup-row slice from a TimelineSeries. Pure function — -/// caller owns the result, ready to hand to `srf.fmtFrom`. +/// caller owns the result, ready to hand to `srf.fmt`. pub fn buildRollupRecords( allocator: std.mem.Allocator, points: []const TimelinePoint, diff --git a/src/cache/store.zig b/src/cache/store.zig index 2c91041..72a0411 100644 --- a/src/cache/store.zig +++ b/src/cache/store.zig @@ -12,6 +12,8 @@ const EarningsEvent = @import("../models/earnings.zig").EarningsEvent; const EtfProfile = @import("../models/etf_profile.zig").EtfProfile; const Holding = @import("../models/etf_profile.zig").Holding; const SectorWeight = @import("../models/etf_profile.zig").SectorWeight; +const Wikidata = @import("../providers/Wikidata.zig"); +const Edgar = @import("../providers/Edgar.zig"); // ── Wall-clock policy ──────────────────────────────────────── // @@ -264,6 +266,11 @@ pub const Store = struct { EarningsEvent => .earnings, OptionsChain => .options, EtfProfile => .etf_profile, + Wikidata.ClassificationRecord => .classification, + Edgar.EtfMetricRecord => .etf_metrics, + Edgar.EntityFactRecord => .entity_facts, + Edgar.MutualFundTickerMapBlob => .tickers_funds, + Edgar.CompanyTickerMapBlob => .tickers_companies, else => @compileError("unsupported type for Store"), }; } @@ -318,7 +325,7 @@ pub const Store = struct { } var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, self.allocator, .{ .alloc_strings = false }) catch return null; + var it = srf.iterator(&reader, self.allocator, .{}) catch return null; defer it.deinit(); if (freshness == .fresh_only) { @@ -465,7 +472,7 @@ pub const Store = struct { // below frees these duped strings after we're done with the // merged list. Keep the post-process logic in lockstep with // the deinit handling — they're a pair. - const existing_result = self.read(T, symbol, mergePostProcess(T), .any); + const existing_result = self.read(T, symbol, null, .any); const existing: []const T = if (existing_result) |r| r.data else &.{}; defer if (existing_result != null) { if (comptime @hasDecl(T, "deinit")) { @@ -544,26 +551,6 @@ pub const Store = struct { @compileError("mergeKey only defined for Dividend and Split"); } - /// Post-process callback for the merge primitive's `read` call. - /// Dupes any heap-allocated string fields into stable memory so - /// the `existing` slice's records survive past the SRF - /// iterator's backing buffer being freed. Paired with each - /// type's `deinit` to release the duped strings after merge. - /// Splits have no string fields so the callback is null. - fn mergePostProcess(comptime T: type) ?*const fn (*T, std.mem.Allocator) anyerror!void { - return switch (T) { - Dividend => &struct { - fn pp(div: *Dividend, allocator: std.mem.Allocator) anyerror!void { - if (div.currency) |c| { - div.currency = try allocator.dupe(u8, c); - } - } - }.pp, - Split => null, - else => @compileError("mergePostProcess only defined for Dividend and Split"), - }; - } - fn findKeyIndex(comptime T: type, items: []const T, key: i32) ?usize { for (items, 0..) |it, i| { if (mergeKey(T, it) == key) return i; @@ -872,7 +859,7 @@ pub const Store = struct { /// Schema for the SRF `.meta` sidecar emitted by `archiveTornBody`. /// Each field becomes a `key:type:value` entry in a single record /// under a `#!srfv1` header. Optional fields with `null` defaults - /// are silently skipped by `srf.fmtFrom` when unset — which is the + /// are silently skipped by `srf.fmt` when unset — which is the /// behavior we want for the http_*, server_*, and `?[]const u8` /// fields that only some detection paths populate. const TearRecord = struct { @@ -1018,7 +1005,7 @@ pub const Store = struct { var aw: std.Io.Writer.Allocating = .init(allocator); defer aw.deinit(); const records = [_]TearRecord{record}; - try aw.writer.print("{f}", .{srf.fmtFrom(TearRecord, allocator, &records, .{})}); + try aw.writer.print("{f}", .{srf.fmt(TearRecord, &records, .{})}); try atomic.writeFileAtomic(io, allocator, meta_path, aw.writer.buffered()); } @@ -1096,12 +1083,12 @@ pub const Store = struct { defer self.allocator.free(data); var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, self.allocator, .{ .alloc_strings = false }) catch return null; + var it = srf.iterator(&reader, self.allocator, .{ .parse_allocator = .none }) catch return null; defer it.deinit(); const created = it.created orelse std.Io.Timestamp.now(self.io, .real).toSeconds(); const fields = (it.next() catch return null) orelse return null; - const meta = fields.to(CandleMeta) catch return null; + const meta = fields.to(CandleMeta, .{}) catch return null; return .{ .meta = meta, .created = created }; } @@ -1280,7 +1267,16 @@ pub const Store = struct { comptime freshness: Freshness, ) ?CacheResult(T) { var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return null; + // `.parse_allocator = .{ .custom = .initTo(allocator) }` tells SRF + // to dupe field values (the data we keep) into the caller's + // allocator while letting field keys borrow from `data` (we only + // need them long enough for `fields.to(T, .{})` to match against + // compile-time field names). Records returned from `it.next()` + // then own their value strings via the caller's allocator, + // ready to outlive the iterator without any further duping. + var it = srf.iterator(&reader, allocator, .{ + .parse_allocator = .{ .custom = .initTo(allocator) }, + }) catch return null; defer it.deinit(); if (freshness == .fresh_only) { @@ -1305,7 +1301,7 @@ pub const Store = struct { } while (it.next() catch return null) |fields| { - var item = fields.to(T) catch continue; + var item = fields.to(T, .{}) catch continue; if (comptime postProcess) |pp| { pp(&item, allocator) catch { if (comptime @hasDecl(T, "deinit")) item.deinit(allocator); @@ -1335,7 +1331,7 @@ pub const Store = struct { errdefer aw.deinit(); var opts = options; opts.created = std.Io.Timestamp.now(io, .real).toSeconds(); - try aw.writer.print("{f}", .{srf.fmtFrom(T, allocator, items, opts)}); + try aw.writer.print("{f}", .{srf.fmt(T, items, opts)}); return aw.toOwnedSlice(); } @@ -1344,7 +1340,7 @@ pub const Store = struct { fn serializeCandles(allocator: std.mem.Allocator, candles: []const Candle, options: srf.FormatOptions) ![]const u8 { var aw: std.Io.Writer.Allocating = .init(allocator); errdefer aw.deinit(); - try aw.writer.print("{f}", .{srf.fmtFrom(Candle, allocator, candles, options)}); + try aw.writer.print("{f}", .{srf.fmt(Candle, candles, options)}); return aw.toOwnedSlice(); } @@ -1359,17 +1355,17 @@ pub const Store = struct { const items = [_]CandleMeta{meta}; var opts = options; opts.created = std.Io.Timestamp.now(io, .real).toSeconds(); - try aw.writer.print("{f}", .{srf.fmtFrom(CandleMeta, allocator, &items, opts)}); + try aw.writer.print("{f}", .{srf.fmt(CandleMeta, &items, opts)}); return aw.toOwnedSlice(); } fn deserializeCandleMeta(allocator: std.mem.Allocator, data: []const u8) !CandleMeta { var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData; + var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return error.InvalidData; defer it.deinit(); const fields = (try it.next()) orelse return error.InvalidData; - return fields.to(CandleMeta) catch error.InvalidData; + return fields.to(CandleMeta, .{}) catch error.InvalidData; } // ── Private serialization: options (bespoke) ───────────────── @@ -1407,7 +1403,7 @@ pub const Store = struct { errdefer aw.deinit(); var opts = options; opts.created = std.Io.Timestamp.now(io, .real).toSeconds(); - try aw.writer.print("{f}", .{srf.fmtFrom(OptionsRecord, allocator, records.items, opts)}); + try aw.writer.print("{f}", .{srf.fmt(OptionsRecord, records.items, opts)}); return aw.toOwnedSlice(); } @@ -1439,7 +1435,7 @@ pub const Store = struct { } while (try it.next()) |fields| { - const opt_rec = fields.to(OptionsRecord) catch continue; + const opt_rec = fields.to(OptionsRecord, .{}) catch continue; switch (opt_rec) { .chain => |ch| { const idx = chains.items.len; @@ -1506,7 +1502,7 @@ pub const Store = struct { errdefer aw.deinit(); var opts = options; opts.created = std.Io.Timestamp.now(io, .real).toSeconds(); - try aw.writer.print("{f}", .{srf.fmtFrom(EtfRecord, allocator, records.items, opts)}); + try aw.writer.print("{f}", .{srf.fmt(EtfRecord, records.items, opts)}); return aw.toOwnedSlice(); } @@ -1527,7 +1523,7 @@ pub const Store = struct { } while (try it.next()) |fields| { - const etf_rec = fields.to(EtfRecord) catch continue; + const etf_rec = fields.to(EtfRecord, .{}) catch continue; switch (etf_rec) { .meta => |m| { profile = m; @@ -1563,7 +1559,7 @@ pub const Store = struct { pub fn serializePortfolio(allocator: std.mem.Allocator, lots: []const Lot) ![]const u8 { var aw: std.Io.Writer.Allocating = .init(allocator); errdefer aw.deinit(); - try aw.writer.print("{f}", .{srf.fmtFrom(Lot, allocator, lots, .{})}); + try aw.writer.print("{f}", .{srf.fmt(Lot, lots, .{})}); return aw.toOwnedSlice(); } @@ -1582,13 +1578,13 @@ pub fn deserializePortfolio(allocator: std.mem.Allocator, data: []const u8) !Por } var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData; + var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData; defer it.deinit(); var skipped: usize = 0; while (try it.next()) |fields| { const line = it.state.line; - var lot = fields.to(Lot) catch { + var lot = fields.to(Lot, .{}) catch { std.log.warn("portfolio: could not parse record at line {d}", .{line}); skipped += 1; continue; diff --git a/src/commands/audit.zig b/src/commands/audit.zig index f3b6511..49e2db5 100644 --- a/src/commands/audit.zig +++ b/src/commands/audit.zig @@ -1219,7 +1219,7 @@ const srf = @import("srf"); /// field added to Lot is automatically included. fn lotToString(allocator: std.mem.Allocator, lot: portfolio_mod.Lot) ![]const u8 { const lots = [_]portfolio_mod.Lot{lot}; - return std.fmt.allocPrint(allocator, "{f}", .{srf.fmtFrom(portfolio_mod.Lot, allocator, &lots, .{ .emit_directives = false })}); + return std.fmt.allocPrint(allocator, "{f}", .{srf.fmt(portfolio_mod.Lot, &lots, .{ .emit_directives = false })}); } /// Staleness color based on age vs threshold. diff --git a/src/commands/cache.zig b/src/commands/cache.zig index a0cb675..063f1ca 100644 --- a/src/commands/cache.zig +++ b/src/commands/cache.zig @@ -279,7 +279,7 @@ fn getFileInfo(io: std.Io, allocator: std.mem.Allocator, cache_dir: []const u8, defer allocator.free(data); var reader = std.Io.Reader.fixed(data); - const it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch + const it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return .{ .exists = true, .size = stat.size }; defer it.deinit(); diff --git a/src/commands/common.zig b/src/commands/common.zig index 26a0a7f..0f8dddc 100644 --- a/src/commands/common.zig +++ b/src/commands/common.zig @@ -796,12 +796,12 @@ pub fn loadWatchlist(io: std.Io, allocator: std.mem.Allocator, path: []const u8) const WatchEntry = struct { symbol: []const u8 }; var reader = std.Io.Reader.fixed(file_data); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return null; + var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return null; defer it.deinit(); var syms: std.ArrayList([]const u8) = .empty; while (it.next() catch null) |fields| { - const entry = fields.to(WatchEntry) catch continue; + const entry = fields.to(WatchEntry, .{}) catch continue; const duped = allocator.dupe(u8, entry.symbol) catch continue; syms.append(allocator, duped) catch { allocator.free(duped); diff --git a/src/commands/history.zig b/src/commands/history.zig index def9e68..3b1cf86 100644 --- a/src/commands/history.zig +++ b/src/commands/history.zig @@ -305,7 +305,7 @@ fn runPortfolio( } /// Regenerate `history/rollup.srf` from `snapshots`. Uses -/// `timeline.buildRollupRecords` + `srf.fmtFrom` + atomic write. +/// `timeline.buildRollupRecords` + `srf.fmt` + atomic write. fn rebuildRollup( io: std.Io, allocator: std.mem.Allocator, @@ -322,7 +322,7 @@ fn rebuildRollup( var aw: std.Io.Writer.Allocating = .init(allocator); defer aw.deinit(); - try aw.writer.print("{f}", .{srf.fmtFrom(timeline.RollupRow, allocator, rows, .{ + try aw.writer.print("{f}", .{srf.fmt(timeline.RollupRow, rows, .{ .emit_directives = true, .created = now_s, })}); diff --git a/src/commands/snapshot.zig b/src/commands/snapshot.zig index 7ff4190..a8e4df4 100644 --- a/src/commands/snapshot.zig +++ b/src/commands/snapshot.zig @@ -941,7 +941,7 @@ fn runAnalysis( /// Render a snapshot to SRF bytes. Caller owns result. /// /// Each section is emitted as a homogeneous record slice via -/// `srf.fmtFrom`. The first section (meta) carries `emit_directives = +/// `srf.fmt`. The first section (meta) carries `emit_directives = /// true` so the `#!srfv1` header and `#!created=...` line are written /// once at the top; subsequent sections set `emit_directives = false` /// to suppress a duplicate header. @@ -954,17 +954,17 @@ pub fn renderSnapshot(allocator: std.mem.Allocator, snap: Snapshot) ![]const u8 // same `fmtFrom` pipeline as the rest of the sections. This also // puts the `#!created=...` header at the top of the file. const meta_rows: [1]MetaRow = .{snap.meta}; - try w.print("{f}", .{srf.fmtFrom(MetaRow, allocator, &meta_rows, .{ + try w.print("{f}", .{srf.fmt(MetaRow, &meta_rows, .{ .emit_directives = true, .created = snap.meta.captured_at, })}); // Subsequent sections: records only (no header). const tail_opts: srf.FormatOptions = .{ .emit_directives = false }; - try w.print("{f}", .{srf.fmtFrom(TotalRow, allocator, snap.totals, tail_opts)}); - try w.print("{f}", .{srf.fmtFrom(TaxTypeRow, allocator, snap.tax_types, tail_opts)}); - try w.print("{f}", .{srf.fmtFrom(AccountRow, allocator, snap.accounts, tail_opts)}); - try w.print("{f}", .{srf.fmtFrom(LotRow, allocator, snap.lots, tail_opts)}); + try w.print("{f}", .{srf.fmt(TotalRow, snap.totals, tail_opts)}); + try w.print("{f}", .{srf.fmt(TaxTypeRow, snap.tax_types, tail_opts)}); + try w.print("{f}", .{srf.fmt(AccountRow, snap.accounts, tail_opts)}); + try w.print("{f}", .{srf.fmt(LotRow, snap.lots, tail_opts)}); return aw.toOwnedSlice(); } diff --git a/src/data/imported_values.zig b/src/data/imported_values.zig index e3fda9f..8cd44a8 100644 --- a/src/data/imported_values.zig +++ b/src/data/imported_values.zig @@ -53,28 +53,26 @@ pub const ProjectedRetirement = union(enum) { date: Date, /// SRF parser hook. Accepts `reached` (case-insensitive) or - /// `YYYY-MM-DD`. Any other shape is rejected. - pub fn srfParse(str: []const u8) !ProjectedRetirement { - if (std.ascii.eqlIgnoreCase(str, "reached")) return .reached; + /// `YYYY-MM-DD`. Any other shape is rejected. Returns + /// `CoercionResult(...).initFree(...)` so SRF frees the consumed + /// source string after parsing. + pub fn srfParse(str: []const u8) !srf.CoercionResult(ProjectedRetirement) { + if (std.ascii.eqlIgnoreCase(str, "reached")) return .initFree(.reached); const d = Date.parse(str) catch return error.InvalidProjectedRetirement; - return .{ .date = d }; + return .initFree(.{ .date = d }); } - /// SRF serializer hook. Emits `reached` or `YYYY-MM-DD`. + /// SRF serializer hook. Emits `reached` or `YYYY-MM-DD` directly + /// to the writer using the "string" type (untyped). pub fn srfFormat( self: ProjectedRetirement, - allocator: std.mem.Allocator, comptime field_name: []const u8, - ) !srf.Value { - _ = field_name; - return switch (self) { - .reached => .{ .string = try allocator.dupe(u8, "reached") }, - .date => |d| blk: { - const buf = try allocator.alloc(u8, 10); - _ = d.format(buf[0..10]); - break :blk .{ .string = buf }; - }, - }; + writer: *std.Io.Writer, + ) std.Io.Writer.Error!void { + switch (self) { + .reached => try writer.print("{s}::reached", .{field_name}), + .date => |d| try writer.print("{s}::{f}", .{ field_name, d }), + } } pub fn eql(a: ProjectedRetirement, b: ProjectedRetirement) bool { @@ -171,14 +169,14 @@ pub fn parseImportedValues( bytes: []const u8, ) !ImportedValues { var reader = std.Io.Reader.fixed(bytes); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidSrf; + var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return error.InvalidSrf; defer it.deinit(); var points: std.ArrayList(HistoryPoint) = .empty; errdefer points.deinit(allocator); while (it.next() catch return error.InvalidSrf) |fields| { - const point = fields.to(HistoryPoint) catch return error.InvalidSrf; + const point = fields.to(HistoryPoint, .{}) catch return error.InvalidSrf; try points.append(allocator, point); } @@ -199,20 +197,20 @@ pub fn parseImportedValues( // ── Tests ──────────────────────────────────────────────────── test "ProjectedRetirement.srfParse: reached" { - const v = try ProjectedRetirement.srfParse("reached"); - try std.testing.expect(v == .reached); + const r = try ProjectedRetirement.srfParse("reached"); + try std.testing.expect(r.value == .reached); // Case-insensitive. - const v2 = try ProjectedRetirement.srfParse("REACHED"); - try std.testing.expect(v2 == .reached); + const r2 = try ProjectedRetirement.srfParse("REACHED"); + try std.testing.expect(r2.value == .reached); } test "ProjectedRetirement.srfParse: date" { - const v = try ProjectedRetirement.srfParse("2030-01-15"); - try std.testing.expect(v == .date); - try std.testing.expectEqual(@as(i16, 2030), v.date.year()); - try std.testing.expectEqual(@as(u8, 1), v.date.month()); - try std.testing.expectEqual(@as(u8, 15), v.date.day()); + const r = try ProjectedRetirement.srfParse("2030-01-15"); + try std.testing.expect(r.value == .date); + try std.testing.expectEqual(@as(i16, 2030), r.value.date.year()); + try std.testing.expectEqual(@as(u8, 1), r.value.date.month()); + try std.testing.expectEqual(@as(u8, 15), r.value.date.day()); } test "ProjectedRetirement.srfParse: invalid" { diff --git a/src/history.zig b/src/history.zig index 599a480..3f62518 100644 --- a/src/history.zig +++ b/src/history.zig @@ -71,9 +71,14 @@ pub fn parseSnapshotBytes( bytes: []const u8, ) Error!snapshot.Snapshot { var reader = std.Io.Reader.fixed(bytes); - // `alloc_strings = false` tells srf to return string values as - // slices into `bytes` rather than duping into its own arena. - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidSrf; + // Default `parse_allocator` (`.none_with_fallback`): short + // strings borrow from `bytes`; multi-line or comma-bearing + // values (e.g. an account name with a comma, which `srf.fmt` + // encodes with a length prefix) land in the iterator's + // fallback arena and are freed by `it.deinit()`. Snapshot + // consumers dupe what they need into the caller's allocator + // before the iterator dies. + var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidSrf; defer it.deinit(); var meta_opt: ?snapshot.MetaRow = null; @@ -95,7 +100,7 @@ pub fn parseSnapshotBytes( // record kind we don't know about). Every other srf error // indicates malformed data in a record we SHOULD understand, so // we propagate it up rather than silently losing rows. - const rec = field_it.to(SnapshotRecord) catch |err| switch (err) { + const rec = field_it.to(SnapshotRecord, .{}) catch |err| switch (err) { error.ActiveTagDoesNotExist => continue, else => return error.InvalidSrf, }; diff --git a/src/main.zig b/src/main.zig index 2b3df0f..966b9f1 100644 --- a/src/main.zig +++ b/src/main.zig @@ -721,10 +721,4 @@ test "looksLikeUnquotedGlob: empty arg returns false" { test { std.testing.refAllDecls(@This()); - // Wikidata and EDGAR providers aren't yet imported via - // `service.zig`; pull them in here for test discovery in the - // meantime. Drop these once the providers are wired through - // the data service. - _ = @import("providers/Wikidata.zig"); - _ = @import("providers/Edgar.zig"); } diff --git a/src/models/classification.zig b/src/models/classification.zig index 1a2b917..ac0af70 100644 --- a/src/models/classification.zig +++ b/src/models/classification.zig @@ -57,11 +57,11 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) ! } var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData; + var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData; defer it.deinit(); while (try it.next()) |fields| { - const entry = fields.to(ClassificationEntry) catch continue; + const entry = fields.to(ClassificationEntry, .{}) catch continue; try entries.append(allocator, .{ .symbol = try allocator.dupe(u8, entry.symbol), .sector = if (entry.sector) |s| try allocator.dupe(u8, s) else null, diff --git a/src/models/snapshot.zig b/src/models/snapshot.zig index aa691c7..0b8bd93 100644 --- a/src/models/snapshot.zig +++ b/src/models/snapshot.zig @@ -1,6 +1,6 @@ //! Snapshot record types — the wire format for `history/-portfolio.srf`. //! -//! Each record kind below is a plain struct suitable for `srf.fmtFrom` +//! Each record kind below is a plain struct suitable for `srf.fmt` //! on the write side and `srf.iterator` + `FieldIterator.to(Union)` on //! the read side (see `src/history.zig`, which demuxes via a tagged //! `SnapshotRecord` union whose `srf_tag_field = "kind"`). Field order diff --git a/src/models/transaction_log.zig b/src/models/transaction_log.zig index 7bd2fa3..0b517ec 100644 --- a/src/models/transaction_log.zig +++ b/src/models/transaction_log.zig @@ -87,38 +87,35 @@ pub const DestLot = union(enum) { /// srf parser hook. Accepts `cash` (case-insensitive) or /// `SYMBOL@YYYY-MM-DD`. Any other shape is rejected. - pub fn srfParse(str: []const u8) !DestLot { - if (std.ascii.eqlIgnoreCase(str, "cash")) return .{ .cash = {} }; + /// + /// The `cash` variant returns `.initFree(...)` since no slice + /// of `str` is retained. The `lot` variant returns `.init(...)` + /// because `DestLot.lot.symbol` borrows a slice of `str`; + /// freeing `str` would dangle the symbol. Callers consuming + /// the resulting `DestLot.lot` are responsible for duping + /// `symbol` if it must outlive the source buffer. + pub fn srfParse(str: []const u8) !srf.CoercionResult(DestLot) { + if (std.ascii.eqlIgnoreCase(str, "cash")) return .initFree(.{ .cash = {} }); const at = std.mem.indexOfScalar(u8, str, '@') orelse return error.InvalidDestLot; if (at == 0) return error.InvalidDestLot; if (at + 1 >= str.len) return error.InvalidDestLot; const sym = str[0..at]; const date_str = str[at + 1 ..]; const date = Date.parse(date_str) catch return error.InvalidDestLot; - return .{ .lot = .{ .symbol = sym, .open_date = date } }; + return .init(.{ .lot = .{ .symbol = sym, .open_date = date } }); } - /// srf serializer hook. Emits `cash` or `SYMBOL@YYYY-MM-DD`. - /// Allocates the output buffer via `allocator` — caller (the SRF - /// OwnedRecord machinery) manages the lifetime. + /// srf serializer hook. Emits `cash` or `SYMBOL@YYYY-MM-DD` + /// directly to the writer using the "string" type (untyped). pub fn srfFormat( self: DestLot, - allocator: std.mem.Allocator, comptime field_name: []const u8, - ) !srf.Value { - _ = field_name; - return switch (self) { - .cash => .{ .string = try allocator.dupe(u8, "cash") }, - .lot => |l| blk: { - // SYMBOL up to ~20 chars + '@' + 10-char date. - const buf = try std.fmt.allocPrint(allocator, "{s}@", .{l.symbol}); - defer allocator.free(buf); - var out = try allocator.alloc(u8, buf.len + 10); - @memcpy(out[0..buf.len], buf); - _ = try std.fmt.bufPrint(out[buf.len..][0..10], "{f}", .{l.open_date}); - break :blk .{ .string = out }; - }, - }; + writer: *std.Io.Writer, + ) std.Io.Writer.Error!void { + switch (self) { + .cash => try writer.print("{s}::cash", .{field_name}), + .lot => |l| try writer.print("{s}::{s}@{f}", .{ field_name, l.symbol, l.open_date }), + } } /// Equality for tests and duplicate-dest_lot detection in the matcher. @@ -263,11 +260,11 @@ pub fn parseTransactionLogFile( } var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData; + var it = srf.iterator(&reader, allocator, .{}) catch return error.InvalidData; defer it.deinit(); while (try it.next()) |fields| { - const parsed = fields.to(TransferRecord) catch |err| { + const parsed = fields.to(TransferRecord, .{}) catch |err| { // Tests intentionally feed malformed records to exercise the // skip path; real parse failures stay visible outside tests. if (!builtin.is_test) { @@ -310,28 +307,28 @@ pub fn parseTransactionLogFile( const testing = std.testing; test "DestLot.srfParse: cash token (lowercase)" { - const d = try DestLot.srfParse("cash"); - try testing.expect(d == .cash); + const r = try DestLot.srfParse("cash"); + try testing.expect(r.value == .cash); } test "DestLot.srfParse: cash token (mixed case)" { - const d = try DestLot.srfParse("Cash"); - try testing.expect(d == .cash); - const d2 = try DestLot.srfParse("CASH"); - try testing.expect(d2 == .cash); + const r = try DestLot.srfParse("Cash"); + try testing.expect(r.value == .cash); + const r2 = try DestLot.srfParse("CASH"); + try testing.expect(r2.value == .cash); } test "DestLot.srfParse: SYMBOL@DATE" { - const d = try DestLot.srfParse("SYM@2026-05-03"); - try testing.expect(d == .lot); - try testing.expectEqualStrings("SYM", d.lot.symbol); - try testing.expectEqual(Date.fromYmd(2026, 5, 3).days, d.lot.open_date.days); + const r = try DestLot.srfParse("SYM@2026-05-03"); + try testing.expect(r.value == .lot); + try testing.expectEqualStrings("SYM", r.value.lot.symbol); + try testing.expectEqual(Date.fromYmd(2026, 5, 3).days, r.value.lot.open_date.days); } test "DestLot.srfParse: multi-char symbol with hyphen" { - const d = try DestLot.srfParse("SYM-ABC@2026-05-03"); - try testing.expect(d == .lot); - try testing.expectEqualStrings("SYM-ABC", d.lot.symbol); + const r = try DestLot.srfParse("SYM-ABC@2026-05-03"); + try testing.expect(r.value == .lot); + try testing.expectEqualStrings("SYM-ABC", r.value.lot.symbol); } test "DestLot.srfParse: missing @ rejected (non-cash)" { @@ -353,20 +350,22 @@ test "DestLot.srfParse: malformed date rejected" { test "DestLot.srfFormat: cash" { var buf: [64]u8 = undefined; - var fba = std.heap.FixedBufferAllocator.init(&buf); - const v = try (DestLot{ .cash = {} }).srfFormat(fba.allocator(), "dest_lot"); - try testing.expectEqualStrings("cash", v.string); + var w = std.Io.Writer.fixed(&buf); + try (DestLot{ .cash = {} }).srfFormat("dest_lot", &w); + try testing.expectEqualStrings("dest_lot::cash", w.buffered()); } test "DestLot.srfFormat: lot round-trip" { var buf: [64]u8 = undefined; - var fba = std.heap.FixedBufferAllocator.init(&buf); + var w = std.Io.Writer.fixed(&buf); const orig: DestLot = .{ .lot = .{ .symbol = "SYM", .open_date = Date.fromYmd(2026, 5, 3) } }; - const v = try orig.srfFormat(fba.allocator(), "dest_lot"); - try testing.expectEqualStrings("SYM@2026-05-03", v.string); - // Round-trip back through parse - const parsed = try DestLot.srfParse(v.string); - try testing.expect(orig.eql(parsed)); + try orig.srfFormat("dest_lot", &w); + try testing.expectEqualStrings("dest_lot::SYM@2026-05-03", w.buffered()); + // Round-trip back through parse — strip the "dest_lot::" prefix. + const written = w.buffered(); + const value_str = written[std.mem.indexOfScalar(u8, written, ':').? + 2 ..]; + const parsed = try DestLot.srfParse(value_str); + try testing.expect(orig.eql(parsed.value)); } test "DestLot.eql: cash vs cash" { diff --git a/src/providers/Edgar.zig b/src/providers/Edgar.zig index f0aa602..15d01c9 100644 --- a/src/providers/Edgar.zig +++ b/src/providers/Edgar.zig @@ -429,7 +429,7 @@ pub const EtfMetrics = struct { holdings: []Holding, // owned sectors: []SectorWeight, // owned - pub fn deinit(self: *EtfMetrics, allocator: std.mem.Allocator) void { + pub fn deinit(self: EtfMetrics, allocator: std.mem.Allocator) void { allocator.free(self.symbol); if (self.series_name) |s| allocator.free(s); allocator.free(self.cik); @@ -665,13 +665,13 @@ pub fn parseStockTickerMap(allocator: std.mem.Allocator, json_bytes: []const u8) /// re-parsing the full JSON. All owned strings allocated by the /// caller's allocator; caller must free via `deinit`. pub const SubmissionsSummary = struct { - entity_name: ?[]u8 = null, - entity_type: ?[]u8 = null, - sic_description: ?[]u8 = null, + entity_name: ?[]const u8 = null, + entity_type: ?[]const u8 = null, + sic_description: ?[]const u8 = null, /// URL to the most-recent NPORT-P primary_doc.xml, if any. - latest_nport_p_url: ?[]u8 = null, + latest_nport_p_url: ?[]const u8 = null, - pub fn deinit(self: *SubmissionsSummary, allocator: std.mem.Allocator) void { + pub fn deinit(self: SubmissionsSummary, allocator: std.mem.Allocator) void { if (self.entity_name) |s| allocator.free(s); if (self.entity_type) |s| allocator.free(s); if (self.sic_description) |s| allocator.free(s); @@ -731,10 +731,10 @@ fn parseSubmissionsFeed( /// reasoning ("a 10-Q is 3 months stale, a 40-F is 12 months stale"). pub const SharesOutstanding = struct { value: u64, - period_end: []u8, // owned - form: []u8, // owned + period_end: []const u8, // owned + form: []const u8, // owned - pub fn deinit(self: *SharesOutstanding, allocator: std.mem.Allocator) void { + pub fn deinit(self: SharesOutstanding, allocator: std.mem.Allocator) void { allocator.free(self.period_end); allocator.free(self.form); } @@ -749,20 +749,43 @@ pub const SharesOutstanding = struct { /// (per the project's source-pure invariant: every row in a shared /// classification file must self-identify which source produced it). pub const SharesRecord = struct { - symbol: []u8, // owned + symbol: []const u8, // owned shares_outstanding: u64, - period_end: []u8, // owned, YYYY-MM-DD - form: ?[]u8 = null, // owned (e.g. "10-Q", "40-F") - cik: []u8, // owned - as_of: []u8, // owned (date scraper ran) + period_end: []const u8, // owned, YYYY-MM-DD + form: ?[]const u8 = null, // owned (e.g. "10-Q", "40-F") + cik: []const u8, // owned + as_of: []const u8, // owned (date scraper ran) source: []const u8, // no default - pub fn deinit(self: *SharesRecord, allocator: std.mem.Allocator) void { + pub fn deinit(self: SharesRecord, allocator: std.mem.Allocator) void { allocator.free(self.symbol); allocator.free(self.period_end); if (self.form) |f| allocator.free(f); allocator.free(self.cik); allocator.free(self.as_of); + allocator.free(self.source); + } +}; + +/// Tagged union of XBRL-derived per-entity facts. Stored in the +/// per-CIK `entity_facts.srf` cache file. Currently only carries +/// `shares_outstanding`; new variants (revenue, net income, EPS, +/// etc.) get added here as new methods on `Edgar` extract them. +/// SRF's default `type` discriminator is what we want, so no +/// `srf_tag_field` override is declared. +pub const EntityFactRecord = union(enum) { + shares_outstanding: SharesRecord, + + pub fn deinit(self: EntityFactRecord, allocator: std.mem.Allocator) void { + switch (self) { + .shares_outstanding => |r| r.deinit(allocator), + } + } + + /// Free a slice of records, calling deinit on each element first. + pub fn freeSlice(allocator: std.mem.Allocator, recs: []const EntityFactRecord) void { + for (recs) |r| r.deinit(allocator); + allocator.free(recs); } }; @@ -771,22 +794,23 @@ pub const SharesRecord = struct { /// holds the whole fund's data (profile + N sectors + M holdings) in /// nested arrays for parsing convenience. pub const EtfProfileRecord = struct { - symbol: []u8, // owned - series_name: ?[]u8 = null, // owned - cik: []u8, // owned - series_id: ?[]u8 = null, // owned + symbol: []const u8, // owned + series_name: ?[]const u8 = null, // owned + cik: []const u8, // owned + series_id: ?[]const u8 = null, // owned net_assets: ?f64 = null, - period_end: ?[]u8 = null, // owned, YYYY-MM-DD - as_of: []u8, // owned + period_end: ?[]const u8 = null, // owned, YYYY-MM-DD + as_of: []const u8, // owned source: []const u8, // no default - pub fn deinit(self: *EtfProfileRecord, allocator: std.mem.Allocator) void { + pub fn deinit(self: EtfProfileRecord, allocator: std.mem.Allocator) void { allocator.free(self.symbol); if (self.series_name) |s| allocator.free(s); allocator.free(self.cik); if (self.series_id) |s| allocator.free(s); if (self.period_end) |s| allocator.free(s); allocator.free(self.as_of); + allocator.free(self.source); } }; @@ -795,18 +819,19 @@ pub const EtfProfileRecord = struct { /// NPORT-P abbreviation; `description` is the human-readable /// translation per `sectorDescription`. pub const EtfSectorRecord = struct { - symbol: []u8, // owned - code: []u8, // owned, e.g. "EC/CORP" - description: []u8, // owned, e.g. "Equity / Corporate" + symbol: []const u8, // owned + code: []const u8, // owned, e.g. "EC/CORP" + description: []const u8, // owned, e.g. "Equity / Corporate" pct_of_portfolio: f64, - as_of: []u8, // owned + as_of: []const u8, // owned source: []const u8, // no default - pub fn deinit(self: *EtfSectorRecord, allocator: std.mem.Allocator) void { + pub fn deinit(self: EtfSectorRecord, allocator: std.mem.Allocator) void { allocator.free(self.symbol); allocator.free(self.code); allocator.free(self.description); allocator.free(self.as_of); + allocator.free(self.source); } }; @@ -815,17 +840,17 @@ pub const EtfSectorRecord = struct { /// downstream display can prefer ticker > cusip > lei without /// refetching. pub const EtfHoldingRecord = struct { - symbol: []u8, // owned; the FUND's symbol - name: []u8, // owned; holding's company / instrument name - ticker: ?[]u8 = null, // owned - cusip: ?[]u8 = null, // owned - lei: ?[]u8 = null, // owned - country: ?[]u8 = null, // owned, ISO-3166 alpha-2 + symbol: []const u8, // owned; the FUND's symbol + name: []const u8, // owned; holding's company / instrument name + ticker: ?[]const u8 = null, // owned + cusip: ?[]const u8 = null, // owned + lei: ?[]const u8 = null, // owned + country: ?[]const u8 = null, // owned, ISO-3166 alpha-2 pct_of_portfolio: f64, - as_of: []u8, // owned + as_of: []const u8, // owned source: []const u8, // no default - pub fn deinit(self: *EtfHoldingRecord, allocator: std.mem.Allocator) void { + pub fn deinit(self: EtfHoldingRecord, allocator: std.mem.Allocator) void { allocator.free(self.symbol); allocator.free(self.name); if (self.ticker) |s| allocator.free(s); @@ -833,6 +858,7 @@ pub const EtfHoldingRecord = struct { if (self.lei) |s| allocator.free(s); if (self.country) |s| allocator.free(s); allocator.free(self.as_of); + allocator.free(self.source); } }; @@ -846,13 +872,19 @@ pub const EtfMetricRecord = union(enum) { sector: EtfSectorRecord, holding: EtfHoldingRecord, - pub fn deinit(self: *EtfMetricRecord, allocator: std.mem.Allocator) void { - switch (self.*) { - .profile => |*r| r.deinit(allocator), - .sector => |*r| r.deinit(allocator), - .holding => |*r| r.deinit(allocator), + pub fn deinit(self: EtfMetricRecord, allocator: std.mem.Allocator) void { + switch (self) { + .profile => |r| r.deinit(allocator), + .sector => |r| r.deinit(allocator), + .holding => |r| r.deinit(allocator), } } + + /// Free a slice of records, calling deinit on each element first. + pub fn freeSlice(allocator: std.mem.Allocator, recs: []const EtfMetricRecord) void { + for (recs) |r| r.deinit(allocator); + allocator.free(recs); + } }; /// Decompose one fund's internal `EtfMetrics` struct into the SRF- @@ -874,7 +906,7 @@ pub fn appendEtfMetricRecords( .net_assets = metrics.net_assets, .period_end = if (metrics.period_end) |s| try allocator.dupe(u8, s) else null, .as_of = try allocator.dupe(u8, metrics.as_of), - .source = "edgar", + .source = try allocator.dupe(u8, "edgar"), } }); for (metrics.sectors) |s| { try out.append(allocator, .{ .sector = .{ @@ -883,7 +915,7 @@ pub fn appendEtfMetricRecords( .description = try allocator.dupe(u8, s.description), .pct_of_portfolio = s.pct_of_portfolio, .as_of = try allocator.dupe(u8, metrics.as_of), - .source = "edgar", + .source = try allocator.dupe(u8, "edgar"), } }); } for (metrics.holdings) |h| { @@ -896,7 +928,7 @@ pub fn appendEtfMetricRecords( .country = if (h.country) |c| try allocator.dupe(u8, c) else null, .pct_of_portfolio = h.pct_of_portfolio, .as_of = try allocator.dupe(u8, metrics.as_of), - .source = "edgar", + .source = try allocator.dupe(u8, "edgar"), } }); } } diff --git a/src/providers/Wikidata.zig b/src/providers/Wikidata.zig index e8eb3f0..8c6fead 100644 --- a/src/providers/Wikidata.zig +++ b/src/providers/Wikidata.zig @@ -77,7 +77,7 @@ pub const ClassificationRecord = struct { as_of: []const u8, // owned source: []const u8, // no default — provenance always emitted - pub fn deinit(self: *ClassificationRecord, allocator: std.mem.Allocator) void { + pub fn deinit(self: ClassificationRecord, allocator: std.mem.Allocator) void { allocator.free(self.symbol); if (self.name) |s| allocator.free(s); if (self.sector) |s| allocator.free(s); @@ -87,6 +87,13 @@ pub const ClassificationRecord = struct { if (self.inception_date) |s| allocator.free(s); if (self.cik) |s| allocator.free(s); allocator.free(self.as_of); + allocator.free(self.source); + } + + /// Free a slice of records, calling deinit on each element first. + pub fn freeSlice(allocator: std.mem.Allocator, recs: []const ClassificationRecord) void { + for (recs) |r| r.deinit(allocator); + allocator.free(recs); } }; @@ -391,7 +398,7 @@ fn parse( existing_or_new.value_ptr.* = .{ .symbol = try allocator.dupe(u8, ticker), .as_of = try allocator.dupe(u8, as_of), - .source = "wikidata", + .source = try allocator.dupe(u8, "wikidata"), }; } const rec = existing_or_new.value_ptr; diff --git a/src/providers/openfigi.zig b/src/providers/openfigi.zig index d57aa96..d36c757 100644 --- a/src/providers/openfigi.zig +++ b/src/providers/openfigi.zig @@ -21,35 +21,6 @@ pub const FigiResult = struct { found: bool, }; -/// Look up a single CUSIP via OpenFIGI. Caller must free returned strings. -/// Returns null ticker if not found. -pub fn lookupCusip( - io: std.Io, - allocator: std.mem.Allocator, - cusip: []const u8, - api_key: ?[]const u8, -) !FigiResult { - const results = try lookupCusips(io, allocator, &.{cusip}, api_key); - defer { - for (results) |r| { - if (r.ticker) |t| allocator.free(t); - if (r.name) |n| allocator.free(n); - if (r.security_type) |s| allocator.free(s); - } - allocator.free(results); - } - if (results.len == 0) return .{ .ticker = null, .name = null, .security_type = null, .found = false }; - - // Copy results since we're freeing the batch - const r = results[0]; - return .{ - .ticker = if (r.ticker) |t| try allocator.dupe(u8, t) else null, - .name = if (r.name) |n| try allocator.dupe(u8, n) else null, - .security_type = if (r.security_type) |s| try allocator.dupe(u8, s) else null, - .found = r.found, - }; -} - /// Look up multiple CUSIPs in a single batch request. Caller owns all returned slices. /// Results array is parallel to the input cusips array (same length, same order). pub fn lookupCusips( diff --git a/src/service.zig b/src/service.zig index ef7beba..1415676 100644 --- a/src/service.zig +++ b/src/service.zig @@ -33,6 +33,8 @@ const alphavantage = @import("providers/alphavantage.zig"); const OpenFigi = @import("providers/openfigi.zig"); const Yahoo = @import("providers/yahoo.zig").Yahoo; const Tiingo = @import("providers/tiingo.zig").Tiingo; +const Wikidata = @import("providers/Wikidata.zig"); +const Edgar = @import("providers/Edgar.zig"); const fmt = @import("format.zig"); const performance = @import("analytics/performance.zig"); const http = @import("net/http.zig"); @@ -176,15 +178,11 @@ pub fn FetchResult(comptime T: type) type { } // ── PostProcess callbacks ──────────────────────────────────── -// These are passed to Store.read to handle type-specific -// concerns: string duping (serialization plumbing) and domain transforms. - -/// Dupe the currency string so it outlives the SRF iterator's backing buffer. -fn dividendPostProcess(div: *Dividend, allocator: std.mem.Allocator) anyerror!void { - if (div.currency) |c| { - div.currency = try allocator.dupe(u8, c); - } -} +// `Store.read` parses with `parse_allocator = .{ .allocator = ... }`, +// so SRF dupes every owned string into the caller's allocator +// automatically. PostProcess callbacks remain only for non-trivial +// post-parse logic (e.g. recomputing derived fields). String duping +// is NOT a valid reason to add a postProcess. /// Recompute surprise/surprise_percent from actual and estimate fields. /// SRF only stores actual and estimate; surprise is derived. @@ -244,6 +242,8 @@ pub const DataService = struct { av: ?AlphaVantage = null, yh: ?Yahoo = null, tg: ?Tiingo = null, + wikidata: ?Wikidata = null, + edgar: ?Edgar = null, /// Test-only guard: when true, any code path that would touch /// the network panics with a clear message. Used by offline-mode @@ -301,6 +301,8 @@ pub const DataService = struct { if (self.av) |*av| av.deinit(); if (self.yh) |*yh| yh.deinit(); if (self.tg) |*tg| tg.deinit(); + if (self.wikidata) |*w| w.deinit(); + if (self.edgar) |*e| e.deinit(); } // ── Provider accessor ────────────────────────────────────────── @@ -311,6 +313,12 @@ pub const DataService = struct { if (T == Cboe or T == Yahoo) { // CBOE and Yahoo have no API key @field(self, field_name) = T.init(self.io, self.allocator); + } else if (T == Wikidata or T == Edgar) { + // Open-data providers identified by contact email rather + // than an API key. The email goes in User-Agent + From + // headers per each provider's politeness contract. + const email = self.config.user_email orelse return DataError.NoApiKey; + @field(self, field_name) = T.init(self.io, self.allocator, email); } else { // All we're doing here is lower casing the type name, then // appending _key to it, so AlphaVantage -> alphavantage_key @@ -801,7 +809,7 @@ pub const DataService = struct { /// Fetch dividend history for a symbol. pub fn getDividends(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Dividend) { - return self.fetchCached(Dividend, symbol, dividendPostProcess, opts); + return self.fetchCached(Dividend, symbol, null, opts); } /// Fetch split history for a symbol. @@ -931,6 +939,422 @@ pub const DataService = struct { return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; } + // ── Wikidata + EDGAR providers ───────────────────────────────── + + /// Fetch the Wikidata classification record for a single symbol + /// (name, sector, industry, country, inception date, CIK, + /// instance-of). Cache-first; on miss, runs a 1-symbol batched + /// SPARQL query. + /// + /// `opts.skip_network = true` returns cached data even if stale, + /// `FetchFailed` on cache miss. `opts.force_refresh = true` + /// ignores the cache and re-fetches. + /// + /// Callers fetching classifications for many symbols should use + /// `getClassifications(symbols)` instead — Wikidata's SPARQL API + /// is naturally batched, and one query for N symbols is much + /// cheaper than N queries for 1 symbol each. + pub fn getClassification(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Wikidata.ClassificationRecord) { + var s = self.store(); + + if (!opts.force_refresh) { + if (s.read(Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| { + log.debug("{s}: classification fresh in local cache", .{symbol}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + } + + if (opts.skip_network) { + if (s.read(Wikidata.ClassificationRecord, symbol, null, .any)) |cached| { + log.info("{s}: classification stale-cached returned (skip_network)", .{symbol}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + return DataError.FetchFailed; + } + + // Try server sync before hitting Wikidata. + if (!opts.force_refresh and self.syncFromServer(symbol, .classification)) { + if (s.read(Wikidata.ClassificationRecord, symbol, null, .fresh_only)) |cached| { + log.debug("{s}: classification synced from server", .{symbol}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + } + + log.debug("{s}: fetching classification from Wikidata", .{symbol}); + self.assertNetworkAllowed("getClassification wikidata.fetch"); + var wd = try self.getProvider(Wikidata); + + const symbols = [_][]const u8{symbol}; + const fetched = wd.fetch(self.allocator, &symbols) catch |err| { + if (err == error.RateLimited) { + self.rateLimitBackoff(); + break_blk: { + const retried = wd.fetch(self.allocator, &symbols) catch break :break_blk; + if (retried.len > 0) { + s.write(Wikidata.ClassificationRecord, symbol, retried, .{ .seconds = cache.Ttl.classification, .jitter_pct = 8 }); + return .{ .data = retried, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; + } + self.allocator.free(retried); + } + } + log.warn("{s}: wikidata fetch failed: {s}", .{ symbol, @errorName(err) }); + return DataError.FetchFailed; + }; + + if (fetched.len == 0) { + self.allocator.free(fetched); + // Wikidata had no row for this symbol. Negative-cache to + // suppress retries until the user explicitly refreshes. + s.writeNegative(symbol, .classification); + return DataError.NotFound; + } + + s.write(Wikidata.ClassificationRecord, symbol, fetched, .{ .seconds = cache.Ttl.classification, .jitter_pct = 8 }); + + return .{ .data = fetched, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; + } + + /// Batched classification fetch. Wikidata's SPARQL API takes a + /// `VALUES ?ticker { ... }` set in one query; this method runs + /// that query for the requested set, splits the response into + /// per-symbol cache writes, and returns the slice. Symbols not + /// in Wikidata are silently dropped from the result (the user- + /// facing cache for them gets a negative entry). + /// + /// The cache is consulted first per-symbol; only the symbols + /// that miss the cache (or are stale) are passed to the SPARQL + /// query. This minimizes the upstream load when most symbols + /// were already classified in a prior run. + pub fn getClassifications( + self: *DataService, + result_allocator: std.mem.Allocator, + symbols: []const []const u8, + opts: FetchOptions, + ) DataError![]Wikidata.ClassificationRecord { + if (symbols.len == 0) return &.{}; + var s = self.store(); + + // Identify cache misses. + var to_fetch: std.ArrayList([]const u8) = .empty; + defer to_fetch.deinit(self.allocator); + var cached_records: std.ArrayList(Wikidata.ClassificationRecord) = .empty; + errdefer { + for (cached_records.items) |*r| { + var m = r.*; + m.deinit(self.allocator); + } + cached_records.deinit(self.allocator); + } + + for (symbols) |sym| { + if (!opts.force_refresh) { + if (s.read(Wikidata.ClassificationRecord, sym, null, .fresh_only)) |cached| { + // The on-disk shape is a length-1 slice. + if (cached.data.len > 0) { + try cached_records.append(self.allocator, cached.data[0]); + // Free the rest if any (shouldn't happen for + // per-symbol classification, but defensive). + for (cached.data[1..]) |*r| { + var m = r.*; + m.deinit(self.allocator); + } + self.allocator.free(cached.data); + continue; + } + self.allocator.free(cached.data); + } + } + try to_fetch.append(self.allocator, sym); + } + + if (to_fetch.items.len == 0) { + // All cached — assemble result from cached_records. + const out = try result_allocator.alloc(Wikidata.ClassificationRecord, cached_records.items.len); + @memcpy(out, cached_records.items); + cached_records.clearRetainingCapacity(); + return out; + } + + if (opts.skip_network) { + // Offline mode: return what we have from cache. + const out = try result_allocator.alloc(Wikidata.ClassificationRecord, cached_records.items.len); + @memcpy(out, cached_records.items); + cached_records.clearRetainingCapacity(); + return out; + } + + log.debug("fetching {d} classifications from Wikidata", .{to_fetch.items.len}); + self.assertNetworkAllowed("getClassifications wikidata.fetch"); + var wd = try self.getProvider(Wikidata); + + const fetched = wd.fetch(self.allocator, to_fetch.items) catch |err| { + log.warn("wikidata batch fetch failed: {s}", .{@errorName(err)}); + return DataError.FetchFailed; + }; + defer self.allocator.free(fetched); + + // Write each fetched record to its per-symbol cache file. + for (fetched) |rec| { + const single = [_]Wikidata.ClassificationRecord{rec}; + s.write(Wikidata.ClassificationRecord, rec.symbol, &single, .{ .seconds = cache.Ttl.classification, .jitter_pct = 8 }); + } + + // Combine cached + fetched into the result. + const total = cached_records.items.len + fetched.len; + const out = try result_allocator.alloc(Wikidata.ClassificationRecord, total); + @memcpy(out[0..cached_records.items.len], cached_records.items); + @memcpy(out[cached_records.items.len..], fetched); + cached_records.clearRetainingCapacity(); + return out; + } + + /// Fetch XBRL-derived entity facts for a CIK (currently + /// shares-outstanding; extensible to revenue / net income / EPS + /// as new variants are added to `Edgar.EntityFactRecord`). + /// + /// CIK is the cache key — the file lives at + /// `//entity_facts.srf`. A single dual-class + /// issuer (BRK.A / BRK.B) shares one entity_facts file because + /// both class symbols resolve to the same CIK. + pub fn getEntityFacts(self: *DataService, cik: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EntityFactRecord) { + var s = self.store(); + + if (!opts.force_refresh) { + if (s.read(Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| { + log.debug("CIK {s}: entity_facts fresh in local cache", .{cik}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + } + + if (opts.skip_network) { + if (s.read(Edgar.EntityFactRecord, cik, null, .any)) |cached| { + log.info("CIK {s}: entity_facts stale-cached returned (skip_network)", .{cik}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + return DataError.FetchFailed; + } + + if (!opts.force_refresh and self.syncFromServer(cik, .entity_facts)) { + if (s.read(Edgar.EntityFactRecord, cik, null, .fresh_only)) |cached| { + log.debug("CIK {s}: entity_facts synced from server", .{cik}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + } + + log.debug("CIK {s}: fetching entity facts from EDGAR", .{cik}); + self.assertNetworkAllowed("getEntityFacts edgar.fetchSharesOutstanding"); + var edgar = try self.getProvider(Edgar); + + const so_opt = edgar.fetchSharesOutstanding(self.allocator, cik) catch |err| { + log.warn("CIK {s}: shares fetch failed: {s}", .{ cik, @errorName(err) }); + return DataError.FetchFailed; + }; + + if (so_opt) |so_in| { + var so = so_in; + defer so.deinit(self.allocator); + const today = fmt.todayDate(self.io); + var as_of_buf: [10]u8 = undefined; + // [10]u8 always fits "YYYY-MM-DD" (10 chars exactly). + const as_of = std.fmt.bufPrint(&as_of_buf, "{f}", .{today}) catch + @panic("getEntityFacts: 10-byte buffer cannot hold YYYY-MM-DD — unreachable"); + + const form_dup: ?[]u8 = if (so.form.len > 0) try self.allocator.dupe(u8, so.form) else null; + const shares_record = Edgar.SharesRecord{ + .symbol = try self.allocator.dupe(u8, ""), + .shares_outstanding = so.value, + .period_end = try self.allocator.dupe(u8, so.period_end), + .form = form_dup, + .cik = try self.allocator.dupe(u8, cik), + .as_of = try self.allocator.dupe(u8, as_of), + .source = "edgar_xbrl", + }; + + const records = try self.allocator.alloc(Edgar.EntityFactRecord, 1); + records[0] = .{ .shares_outstanding = shares_record }; + s.write(Edgar.EntityFactRecord, cik, records, .{ .seconds = cache.Ttl.entity_facts, .jitter_pct = 8 }); + + return .{ .data = records, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; + } + + // No shares-outstanding data for this CIK (e.g. 20-F-only + // filers like BP, XBRL-light filers like META). Negative- + // cache so we don't keep retrying. + s.writeNegative(cik, .entity_facts); + return DataError.NotFound; + } + + /// Fetch ETF metrics (NPORT-P profile + sectors + holdings) for + /// a fund symbol. Cache-first via `/etf_metrics.srf`. + /// + /// On cache miss, looks up the symbol in the EDGAR ticker maps + /// (fetched on demand via `getTickerMap*`), then runs the full + /// `Edgar.fetchEtfMetrics` cascade. + pub fn getEtfMetrics(self: *DataService, symbol: []const u8, opts: FetchOptions) DataError!FetchResult(Edgar.EtfMetricRecord) { + var s = self.store(); + + if (!opts.force_refresh) { + if (s.read(Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| { + log.debug("{s}: etf_metrics fresh in local cache", .{symbol}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + } + + if (opts.skip_network) { + if (s.read(Edgar.EtfMetricRecord, symbol, null, .any)) |cached| { + log.info("{s}: etf_metrics stale-cached returned (skip_network)", .{symbol}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + return DataError.FetchFailed; + } + + if (!opts.force_refresh and self.syncFromServer(symbol, .etf_metrics)) { + if (s.read(Edgar.EtfMetricRecord, symbol, null, .fresh_only)) |cached| { + log.debug("{s}: etf_metrics synced from server", .{symbol}); + return .{ .data = cached.data, .source = .cached, .timestamp = cached.timestamp, .allocator = self.allocator }; + } + } + + log.debug("{s}: fetching ETF metrics from EDGAR", .{symbol}); + self.assertNetworkAllowed("getEtfMetrics edgar.fetchEtfMetrics"); + + // Load the ticker maps. These are big (3-5 MB each) but the + // load happens once per CLI invocation and the parsed + // TickerMap stays alive across all getEtfMetrics calls in + // the same process. + var mf_map = self.loadMutualFundTickerMap(opts) catch |err| { + log.warn("failed to load mutual-fund ticker map: {s}", .{@errorName(err)}); + return DataError.FetchFailed; + }; + defer mf_map.deinit(); + var co_map = self.loadCompanyTickerMap(opts) catch |err| { + log.warn("failed to load company ticker map: {s}", .{@errorName(err)}); + return DataError.FetchFailed; + }; + defer co_map.deinit(); + + var edgar = try self.getProvider(Edgar); + const result = edgar.fetchEtfMetrics(self.io, self.allocator, &mf_map, &co_map, symbol, 20) catch |err| { + log.warn("{s}: etf_metrics fetch failed: {s}", .{ symbol, @errorName(err) }); + return DataError.FetchFailed; + }; + + switch (result) { + .full => |m_in| { + var m = m_in; + defer m.deinit(self.allocator); + + var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty; + errdefer { + for (records.items) |*r| r.deinit(self.allocator); + records.deinit(self.allocator); + } + try Edgar.appendEtfMetricRecords(self.allocator, &records, m); + const owned = try records.toOwnedSlice(self.allocator); + s.write(Edgar.EtfMetricRecord, symbol, owned, .{ .seconds = cache.Ttl.etf_metrics, .jitter_pct = 8 }); + return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; + }, + .profile_only => |m_in| { + var m = m_in; + defer m.deinit(self.allocator); + + var records: std.ArrayList(Edgar.EtfMetricRecord) = .empty; + errdefer { + for (records.items) |*r| r.deinit(self.allocator); + records.deinit(self.allocator); + } + try Edgar.appendEtfMetricRecords(self.allocator, &records, m); + const owned = try records.toOwnedSlice(self.allocator); + s.write(Edgar.EtfMetricRecord, symbol, owned, .{ .seconds = cache.Ttl.etf_metrics, .jitter_pct = 8 }); + return .{ .data = owned, .source = .fetched, .timestamp = std.Io.Timestamp.now(self.io, .real).toSeconds(), .allocator = self.allocator }; + }, + .not_a_fund => { + // Not a fund — write a negative entry to suppress + // retries. The user can ask `getEntityFacts(cik)` + // separately for stock-level facts. + s.writeNegative(symbol, .etf_metrics); + return DataError.NotFound; + }, + .not_in_edgar => { + // Symbol isn't in either ticker map. No EDGAR data + // available; negative-cache. + s.writeNegative(symbol, .etf_metrics); + return DataError.NotFound; + }, + } + } + + /// Load and parse the EDGAR mutual-fund ticker map, going + /// through the `Store`-backed cache. Caller deinits the result. + fn loadMutualFundTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap { + var s = self.store(); + + if (!opts.force_refresh) { + if (s.read(Edgar.MutualFundTickerMapBlob, "_edgar", null, .fresh_only)) |cached| { + defer self.allocator.free(cached.data); + if (cached.data.len > 0) { + const blob = cached.data[0]; + defer self.allocator.free(blob.json); + return Edgar.parseTickerMap(self.allocator, blob.json); + } + } + } + + log.debug("fetching EDGAR mutual-fund ticker map", .{}); + self.assertNetworkAllowed("loadMutualFundTickerMap edgar.fetchMutualFundTickerMap"); + var edgar = try self.getProvider(Edgar); + + // Fetch the raw JSON via a separate call so we can write + // the blob to cache; the parsed map gets returned to the + // caller. + var resp = try edgar.client.request(.GET, "https://www.sec.gov/files/company_tickers_mf.json", null, &.{ + .{ .name = "User-Agent", .value = "zfin/0.1" }, + .{ .name = "From", .value = self.config.user_email orelse "" }, + }); + defer resp.deinit(); + + const json = try self.allocator.dupe(u8, resp.body); + var blob = [_]Edgar.MutualFundTickerMapBlob{.{ .json = json }}; + s.write(Edgar.MutualFundTickerMapBlob, "_edgar", blob[0..], .{ .seconds = cache.Ttl.tickers_funds, .jitter_pct = 8 }); + defer self.allocator.free(json); + + return Edgar.parseTickerMap(self.allocator, json); + } + + /// Load and parse the EDGAR company ticker map (stocks + UITs). + fn loadCompanyTickerMap(self: *DataService, opts: FetchOptions) !Edgar.TickerMap { + var s = self.store(); + + if (!opts.force_refresh) { + if (s.read(Edgar.CompanyTickerMapBlob, "_edgar", null, .fresh_only)) |cached| { + defer self.allocator.free(cached.data); + if (cached.data.len > 0) { + const blob = cached.data[0]; + defer self.allocator.free(blob.json); + return Edgar.parseStockTickerMap(self.allocator, blob.json); + } + } + } + + log.debug("fetching EDGAR company ticker map", .{}); + self.assertNetworkAllowed("loadCompanyTickerMap edgar.fetchCompanyTickerMap"); + var edgar = try self.getProvider(Edgar); + + var resp = try edgar.client.request(.GET, "https://www.sec.gov/files/company_tickers.json", null, &.{ + .{ .name = "User-Agent", .value = "zfin/0.1" }, + .{ .name = "From", .value = self.config.user_email orelse "" }, + }); + defer resp.deinit(); + + const json = try self.allocator.dupe(u8, resp.body); + var blob = [_]Edgar.CompanyTickerMapBlob{.{ .json = json }}; + s.write(Edgar.CompanyTickerMapBlob, "_edgar", blob[0..], .{ .seconds = cache.Ttl.tickers_companies, .jitter_pct = 8 }); + defer self.allocator.free(json); + + return Edgar.parseStockTickerMap(self.allocator, json); + } + + // ────────────────────────────────────────────────────────────── /// Fetch a real-time quote for a symbol. /// Yahoo Finance is primary (free, no API key, no 15-min delay). /// Falls back to TwelveData if Yahoo fails. @@ -1144,7 +1568,7 @@ pub const DataService = struct { /// Read dividends from cache only (no network fetch). pub fn getCachedDividends(self: *DataService, symbol: []const u8) ?[]Dividend { var s = self.store(); - const result = s.read(Dividend, symbol, dividendPostProcess, .any) orelse return null; + const result = s.read(Dividend, symbol, null, .any) orelse return null; return result.data; } @@ -1665,57 +2089,12 @@ pub const DataService = struct { return DataError.FetchFailed; } - /// Look up a CUSIP via OpenFIGI API. Returns the ticker if found, null otherwise. - /// Results are cached in {cache_dir}/cusip_tickers.srf. - /// Caller owns the returned string. - pub fn lookupCusip(self: *DataService, cusip: []const u8) ?[]const u8 { - // Check local cache first - if (self.getCachedCusipTicker(cusip)) |t| return t; - - // Try OpenFIGI - const result = OpenFigi.lookupCusip(self.allocator, cusip, self.config.openfigi_key) catch return null; - defer { - if (result.name) |n| self.allocator.free(n); - if (result.security_type) |s| self.allocator.free(s); - } - - if (result.ticker) |ticker| { - // Cache the mapping - self.cacheCusipTicker(cusip, ticker); - return ticker; // caller takes ownership - } - - return null; - } - /// A single CUSIP-to-ticker mapping record in the cache file. const CusipEntry = struct { cusip: []const u8 = "", ticker: []const u8 = "", }; - /// Read a cached CUSIP->ticker mapping. Returns null if not cached. - /// Caller owns the returned string. - fn getCachedCusipTicker(self: *DataService, cusip: []const u8) ?[]const u8 { - const path = std.fs.path.join(self.allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return null; - defer self.allocator.free(path); - - const data = std.fs.cwd().readFileAlloc(self.allocator, path, 64 * 1024) catch return null; - defer self.allocator.free(data); - - var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, self.allocator, .{ .alloc_strings = false }) catch return null; - defer it.deinit(); - - while (it.next() catch return null) |fields| { - const entry = fields.to(CusipEntry) catch continue; - if (std.mem.eql(u8, entry.cusip, cusip) and entry.ticker.len > 0) { - return self.allocator.dupe(u8, entry.ticker) catch null; - } - } - return null; - } - /// Append a CUSIP->ticker mapping to the cache file. /// /// Implemented as read-append-atomic-write (rather than a direct @@ -1745,7 +2124,7 @@ pub const DataService = struct { const entry = [_]CusipEntry{.{ .cusip = cusip, .ticker = ticker }}; var aw: std.Io.Writer.Allocating = .init(self.allocator); defer aw.deinit(); - aw.writer.print("{f}", .{srf.fmtFrom(CusipEntry, self.allocator, &entry, .{ .emit_directives = emit_directives })}) catch return; + aw.writer.print("{f}", .{srf.fmt(CusipEntry, &entry, .{ .emit_directives = emit_directives })}) catch return; const encoded = aw.writer.buffered(); if (encoded.len == 0) return; @@ -1796,13 +2175,11 @@ pub const DataService = struct { .earnings => "/earnings", .options => "/options", .splits => "/splits", - .etf_profile => return false, // not served + .etf_profile => return false, // not served (replaced by etf_metrics) .meta => return false, - // Endpoint mapping for these will be wired when the - // corresponding `getClassification` / `getEntityFacts` / - // `getEtfMetrics` service methods land. Until then, - // server sync is a no-op for them. - .classification, .etf_metrics, .entity_facts => return false, + .classification => "/classification", + .etf_metrics => "/etf_metrics", + .entity_facts => "/entity_facts", // Provider-internal cache files (ticker-map indexes) // are not served — clients fetch them directly from // the SEC. The DataService caches the JSON via @@ -2362,3 +2739,206 @@ test "loadAllPrices offline mode skips network and returns cached" { // failed_count should reflect MISSING. try std.testing.expectEqual(@as(usize, 1), result.failed_count); } + +test "getClassification: skip_network with no cache returns FetchFailed" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + const config = Config{ .cache_dir = dir_path }; + var svc = DataService.init(io, allocator, config); + defer svc.deinit(); + + svc.panic_on_network_attempt = true; + const err = svc.getClassification("NEVERHEARDOFIT", .{ .skip_network = true }); + try std.testing.expectError(DataError.FetchFailed, err); +} + +test "getClassification: cache hit returns cached data without network" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + const config = Config{ .cache_dir = dir_path }; + var svc = DataService.init(io, allocator, config); + defer svc.deinit(); + + // Pre-populate the classification cache. + var s = svc.store(); + var records = [_]Wikidata.ClassificationRecord{.{ + .symbol = "AAPL", + .name = "Apple Inc.", + .country = "US", + .as_of = "2026-05-25", + .source = "wikidata", + }}; + s.write(Wikidata.ClassificationRecord, "AAPL", records[0..], .{ .seconds = cache.Ttl.classification }); + + // Network guard on — must return from cache without touching network. + svc.panic_on_network_attempt = true; + const result = try svc.getClassification("AAPL", .{}); + defer result.deinit(); + try std.testing.expectEqual(@as(usize, 1), result.data.len); + try std.testing.expectEqualStrings("AAPL", result.data[0].symbol); + try std.testing.expectEqualStrings("Apple Inc.", result.data[0].name.?); + try std.testing.expectEqual(Source.cached, result.source); +} + +test "getEntityFacts: skip_network with no cache returns FetchFailed" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + const config = Config{ .cache_dir = dir_path }; + var svc = DataService.init(io, allocator, config); + defer svc.deinit(); + + svc.panic_on_network_attempt = true; + const err = svc.getEntityFacts("0000999999", .{ .skip_network = true }); + try std.testing.expectError(DataError.FetchFailed, err); +} + +test "getEntityFacts: cache hit returns cached shares-outstanding" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + const config = Config{ .cache_dir = dir_path }; + var svc = DataService.init(io, allocator, config); + defer svc.deinit(); + + var s = svc.store(); + var records = [_]Edgar.EntityFactRecord{ + .{ .shares_outstanding = .{ + .symbol = "", + .shares_outstanding = 14687356000, + .period_end = "2026-04-17", + .form = "10-Q", + .cik = "0000320193", + .as_of = "2026-05-25", + .source = "edgar_xbrl", + } }, + }; + s.write(Edgar.EntityFactRecord, "0000320193", records[0..], .{ .seconds = cache.Ttl.entity_facts }); + + svc.panic_on_network_attempt = true; + const result = try svc.getEntityFacts("0000320193", .{}); + defer result.deinit(); + try std.testing.expectEqual(@as(usize, 1), result.data.len); + switch (result.data[0]) { + .shares_outstanding => |so| { + try std.testing.expectEqual(@as(u64, 14687356000), so.shares_outstanding); + try std.testing.expectEqualStrings("0000320193", so.cik); + }, + } + try std.testing.expectEqual(Source.cached, result.source); +} + +test "getEtfMetrics: skip_network with no cache returns FetchFailed" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + const config = Config{ .cache_dir = dir_path }; + var svc = DataService.init(io, allocator, config); + defer svc.deinit(); + + svc.panic_on_network_attempt = true; + const err = svc.getEtfMetrics("NEVERHEARDOFIT", .{ .skip_network = true }); + try std.testing.expectError(DataError.FetchFailed, err); +} + +test "getEtfMetrics: cache hit returns cached profile + sectors + holdings" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + const config = Config{ .cache_dir = dir_path }; + var svc = DataService.init(io, allocator, config); + defer svc.deinit(); + + var s = svc.store(); + var records = [_]Edgar.EtfMetricRecord{ + .{ .profile = .{ + .symbol = "VTI", + .cik = "0000036405", + .as_of = "2026-05-25", + .source = "edgar", + } }, + .{ .sector = .{ + .symbol = "VTI", + .code = "EC/CORP", + .description = "Equity / Corporate", + .pct_of_portfolio = 99.7, + .as_of = "2026-05-25", + .source = "edgar", + } }, + .{ .holding = .{ + .symbol = "VTI", + .name = "NVIDIA Corp", + .pct_of_portfolio = 6.57, + .as_of = "2026-05-25", + .source = "edgar", + } }, + }; + s.write(Edgar.EtfMetricRecord, "VTI", records[0..], .{ .seconds = cache.Ttl.etf_metrics }); + + svc.panic_on_network_attempt = true; + const result = try svc.getEtfMetrics("VTI", .{}); + defer result.deinit(); + try std.testing.expectEqual(@as(usize, 3), result.data.len); + try std.testing.expect(result.data[0] == .profile); + try std.testing.expect(result.data[1] == .sector); + try std.testing.expect(result.data[2] == .holding); + try std.testing.expectEqualStrings("VTI", result.data[0].profile.symbol); + try std.testing.expectEqual(Source.cached, result.source); +} + +test "DataService getProvider initializes Wikidata with user_email" { + const allocator = std.testing.allocator; + const config = Config{ + .cache_dir = "/tmp/zfin-test-cache", + .user_email = "test@example.com", + }; + var svc = DataService.init(std.testing.io, allocator, config); + defer svc.deinit(); + + const wd1 = try svc.getProvider(Wikidata); + try std.testing.expect(svc.wikidata != null); + try std.testing.expectEqualStrings("test@example.com", wd1.user_email); + + // Second call returns same instance. + const wd2 = try svc.getProvider(Wikidata); + try std.testing.expect(wd1 == wd2); +} + +test "DataService getProvider returns NoApiKey for Wikidata without user_email" { + const allocator = std.testing.allocator; + const config = Config{ .cache_dir = "/tmp/zfin-test-cache" }; + var svc = DataService.init(std.testing.io, allocator, config); + defer svc.deinit(); + + const wd_result = svc.getProvider(Wikidata); + try std.testing.expectError(DataError.NoApiKey, wd_result); + + const ed_result = svc.getProvider(Edgar); + try std.testing.expectError(DataError.NoApiKey, ed_result); +} diff --git a/src/tui/keybinds.zig b/src/tui/keybinds.zig index 2f0f206..9fa808d 100644 --- a/src/tui/keybinds.zig +++ b/src/tui/keybinds.zig @@ -32,12 +32,13 @@ pub const KeyCombo = struct { codepoint: u21, mods: vaxis.Key.Modifiers = .{}, - /// SRF custom parser. Used by `srf.Record.to(...)` to coerce a - /// `key::ctrl+c` field into a `KeyCombo` value. Returns + /// SRF custom parser. Used by `srf.FieldIterator.to(...)` to coerce + /// a `key::ctrl+c` field into a `KeyCombo` value. Returns /// `error.CustomParseFailed` (via the srf-level wrapping) on - /// invalid input. - pub fn srfParse(val: []const u8) !KeyCombo { - return parseKeyCombo(val) orelse error.InvalidKeyCombo; + /// invalid input. Returns `CoercionResult(...).initFree(...)` so + /// SRF frees the consumed source string after parsing. + pub fn srfParse(val: []const u8) !srf.CoercionResult(KeyCombo) { + return .initFree(parseKeyCombo(val) orelse return error.InvalidKeyCombo); } }; @@ -451,15 +452,15 @@ pub fn loadFromData(allocator: std.mem.Allocator, data: []const u8) ?KeyMap { pub fn loadFromDataChecked(allocator: std.mem.Allocator, data: []const u8) LoadOutcome { var reader = std.Io.Reader.fixed(data); - const parsed = srf.parse(&reader, allocator, .{}) catch return .fallback; - // Don't deinit `parsed` until the end — its arena owns the + var ri = srf.iterator(&reader, allocator, .{}) catch return .fallback; + // Don't deinit `ri` until the end — its arena owns the // string slices we'll borrow into the returned KeyMap. We // transfer ownership to the KeyMap's arena instead. - - // Move parsed.arena into our own KeyMap so it outlives this - // call. The `Parsed` struct holds the arena by pointer; we - // claim it directly. - const arena = parsed.arena; + // + // Move ri.arena into our own KeyMap so it outlives this + // call. The `RecordIterator` holds the arena by pointer; we + // claim it directly and skip `ri.deinit()`. + const arena = ri.arena; errdefer { arena.deinit(); allocator.destroy(arena); @@ -470,8 +471,9 @@ pub fn loadFromDataChecked(allocator: std.mem.Allocator, data: []const u8) LoadO var scopes = std.ArrayList(ScopeBuilder).empty; var warnings = std.ArrayList([]const u8).empty; - for (parsed.records, 0..) |record, idx| { - const raw = record.to(RawRecord) catch |err| { + var idx: usize = 0; + while (ri.next() catch return .fallback) |fields| : (idx += 1) { + const raw = fields.to(RawRecord, .{}) catch |err| { // Per-record parse failure (missing field, bad key // string, unknown action). Don't drop the whole file — // skip the record and warn the user. Record index is diff --git a/src/tui/theme.zig b/src/tui/theme.zig index 4c1f8f9..24c0ed9 100644 --- a/src/tui/theme.zig +++ b/src/tui/theme.zig @@ -225,7 +225,8 @@ fn colorPtrConst(theme: *const Theme, offset: usize) *const Color { fn formatHex(c: Color) [7]u8 { var buf: [7]u8 = undefined; - _ = std.fmt.bufPrint(&buf, "#{x:0>2}{x:0>2}{x:0>2}", .{ c[0], c[1], c[2] }) catch {}; + _ = std.fmt.bufPrint(&buf, "#{x:0>2}{x:0>2}{x:0>2}", .{ c[0], c[1], c[2] }) catch + @panic("formatHex: 7-byte buffer cannot hold #RRGGBB — unreachable"); return buf; } @@ -273,7 +274,7 @@ pub fn loadFromData(data: []const u8) ?Theme { const alloc = fba.allocator(); var reader = std.Io.Reader.fixed(data); - var it = srf.iterator(&reader, alloc, .{ .alloc_strings = false }) catch return null; + var it = srf.iterator(&reader, alloc, .{ .parse_allocator = .none }) catch return null; // Don't deinit -- fba owns everything var theme = default_theme;