diff --git a/src/commands/lookup.zig b/src/commands/lookup.zig index 22af36b..f248639 100644 --- a/src/commands/lookup.zig +++ b/src/commands/lookup.zig @@ -53,6 +53,23 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void { try cli.printFg(out, color, cli.CLR_MUTED, "Note: '{s}' doesn't look like a CUSIP (expected 9 alphanumeric chars with digits)\n", .{parsed.cusip}); } + // L1: check the local cache before any network call. Hits are + // permanent (CUSIP->ticker mappings don't change), so a cached + // answer is authoritative and skips OpenFIGI entirely. + var cache_map = svc.loadCusipTickerMap(allocator); + defer cache_map.deinit(); + if (cache_map.get(parsed.cusip)) |ticker| { + const cached: zfin.CusipResult = .{ + .ticker = ticker, + .name = null, + .security_type = null, + .found = true, + }; + try display(cached, parsed.cusip, color, out); + try cli.printFg(out, color, cli.CLR_MUTED, " (from local cache)\n", .{}); + return; + } + cli.stderrPrint(ctx.io, "Looking up via OpenFIGI...\n"); // Try full batch lookup for richer output diff --git a/src/service.zig b/src/service.zig index cfa2c0b..927f5b5 100644 --- a/src/service.zig +++ b/src/service.zig @@ -2398,13 +2398,104 @@ pub const DataService = struct { ticker: []const u8 = "", }; + /// CUSIP->ticker lookup table loaded from `cusip_tickers.srf`. + /// + /// Zero-copy: keys and values are slices into `backing` (the raw + /// file bytes parsed with `parse_allocator = .none`). Nothing is + /// duped per entry — the whole-file buffer IS the storage, and it + /// stays alive for the table's lifetime, released together with + /// the map table in `deinit`. + /// + /// This is the L1 tier of CUSIP resolution: callers consult it + /// before reaching for the server or OpenFIGI. + pub const CusipTickerMap = struct { + map: std.StringHashMap([]const u8), + /// Raw bytes of `cusip_tickers.srf`; every map key and value + /// points into this buffer. `&.{}` when the file was missing + /// or unreadable (freeing a zero-length slice is a no-op). + backing: []const u8, + + pub fn get(self: CusipTickerMap, cusip: []const u8) ?[]const u8 { + return self.map.get(cusip); + } + + pub fn contains(self: CusipTickerMap, cusip: []const u8) bool { + return self.map.contains(cusip); + } + + pub fn count(self: CusipTickerMap) u32 { + return self.map.count(); + } + + /// Release the map table and the backing buffer. Both were + /// allocated with the map's allocator at load time, so we + /// reuse it here — the two lifetimes are bound together by + /// construction, which is the whole point of the wrapper. + pub fn deinit(self: *CusipTickerMap) void { + const allocator = self.map.allocator; + self.map.deinit(); + allocator.free(self.backing); + } + }; + + /// Load the CUSIP->ticker cache file into a `CusipTickerMap`. The + /// returned table owns the file bytes; release it with + /// `CusipTickerMap.deinit`. + /// + /// Missing file → empty table (the common first-run case). First + /// occurrence wins on duplicate CUSIPs, which tolerates the + /// historical double-append bug in cache files written before + /// `cacheCusipTicker` learned to dedup. + /// + /// The on-disk format is CUSIP-keyed (`cusip::X,ticker::Y`); the + /// returned map is keyed the same way for O(1) forward lookup. + pub fn loadCusipTickerMap(self: *DataService, allocator: std.mem.Allocator) CusipTickerMap { + const map = std.StringHashMap([]const u8).init(allocator); + const path = std.fs.path.join(allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch + return .{ .map = map, .backing = &.{} }; + defer allocator.free(path); + + const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, allocator, .limited(4 * 1024 * 1024)) catch + return .{ .map = map, .backing = &.{} }; + // From here `data` is the table's backing store: keys and + // values are slices into it (parse_allocator = .none, so the + // parser borrows rather than copies). Freed by + // `CusipTickerMap.deinit`, never here — that's the lifetime + // contract that lets us skip per-entry dupes entirely. + var result: CusipTickerMap = .{ .map = map, .backing = data }; + + var reader = std.Io.Reader.fixed(data); + var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return result; + defer it.deinit(); + + while (it.next() catch return result) |fields| { + const entry = fields.to(CusipEntry, .{}) catch continue; + if (entry.cusip.len == 0 or entry.ticker.len == 0) continue; + // First occurrence wins; getOrPut stores the borrowed + // slices directly — they live in `backing`, no dupe. + const gop = result.map.getOrPut(entry.cusip) catch continue; + if (!gop.found_existing) gop.value_ptr.* = entry.ticker; + } + return result; + } + /// Append a CUSIP->ticker mapping to the cache file. /// /// Implemented as read-append-atomic-write (rather than a direct /// open-for-append) so a concurrent reader never sees a file with a /// valid header plus partial trailing record. See `cache/store.zig /// appendRaw` for the same pattern and rationale. + /// + /// Dedups: if the CUSIP is already cached, this is a no-op. That + /// keeps the file from accumulating duplicate rows when the same + /// CUSIP is looked up repeatedly (the historical bug — the writer + /// never checked the file before appending). pub fn cacheCusipTicker(self: *DataService, cusip: []const u8, ticker: []const u8) void { + // Dedup against what's already cached. + var existing_map = self.loadCusipTickerMap(self.allocator); + defer existing_map.deinit(); + if (existing_map.contains(cusip)) return; + const path = std.fs.path.join(self.allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return; defer self.allocator.free(path); @@ -3778,3 +3869,106 @@ test "freeEdgarLookup: handles all three union variants without leak" { // testing.allocator panics on leak — passing this test means // the title was freed. } + +// ── CUSIP->ticker cache (loadCusipTickerMap / cacheCusipTicker) ── + +test "loadCusipTickerMap: missing file returns empty map" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); + defer svc.deinit(); + + var map = svc.loadCusipTickerMap(allocator); + defer map.deinit(); + try std.testing.expectEqual(@as(usize, 0), map.count()); +} + +test "cacheCusipTicker + loadCusipTickerMap: write/read round-trip" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); + defer svc.deinit(); + + // Placeholder CUSIPs/tickers — never real PII. + svc.cacheCusipTicker("111111111", "AAA"); + svc.cacheCusipTicker("222222222", "BBB"); + + var map = svc.loadCusipTickerMap(allocator); + defer map.deinit(); + try std.testing.expectEqual(@as(usize, 2), map.count()); + try std.testing.expectEqualStrings("AAA", map.get("111111111").?); + try std.testing.expectEqualStrings("BBB", map.get("222222222").?); +} + +test "cacheCusipTicker: dedups repeated CUSIP (the historical bug)" { + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); + defer svc.deinit(); + + // Write the same CUSIP three times — must collapse to one row. + svc.cacheCusipTicker("111111111", "AAA"); + svc.cacheCusipTicker("111111111", "AAA"); + svc.cacheCusipTicker("111111111", "AAA"); + + var map = svc.loadCusipTickerMap(allocator); + defer map.deinit(); + try std.testing.expectEqual(@as(usize, 1), map.count()); + try std.testing.expectEqualStrings("AAA", map.get("111111111").?); + + // The on-disk file should physically contain exactly one data + // row (plus the directive header), proving dedup at the writer. + const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" }); + defer allocator.free(path); + const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024)); + defer allocator.free(data); + var row_count: usize = 0; + var lines = std.mem.splitScalar(u8, data, '\n'); + while (lines.next()) |line| { + if (std.mem.indexOf(u8, line, "cusip::") != null) row_count += 1; + } + try std.testing.expectEqual(@as(usize, 1), row_count); +} + +test "loadCusipTickerMap: first occurrence wins on duplicate rows" { + // Tolerate a pre-existing file written by the buggy appender + // (duplicate rows). The reader must not crash and must keep the + // first mapping. + const allocator = std.testing.allocator; + const io = std.testing.io; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator); + defer allocator.free(dir_path); + + // Hand-write a file with a duplicate row (as the old bug did). + const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" }); + defer allocator.free(path); + try std.Io.Dir.cwd().writeFile(io, .{ + .sub_path = path, + .data = "#!srfv1\ncusip::111111111,ticker::AAA\ncusip::111111111,ticker::AAA\n", + }); + + var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path }); + defer svc.deinit(); + + var map = svc.loadCusipTickerMap(allocator); + defer map.deinit(); + try std.testing.expectEqual(@as(usize, 1), map.count()); + try std.testing.expectEqualStrings("AAA", map.get("111111111").?); +}