dedup cusip cache and actually use the cache
This commit is contained in:
parent
d9fd5d5b97
commit
415071b955
2 changed files with 211 additions and 0 deletions
|
|
@ -53,6 +53,23 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
|
|||
try cli.printFg(out, color, cli.CLR_MUTED, "Note: '{s}' doesn't look like a CUSIP (expected 9 alphanumeric chars with digits)\n", .{parsed.cusip});
|
||||
}
|
||||
|
||||
// L1: check the local cache before any network call. Hits are
|
||||
// permanent (CUSIP->ticker mappings don't change), so a cached
|
||||
// answer is authoritative and skips OpenFIGI entirely.
|
||||
var cache_map = svc.loadCusipTickerMap(allocator);
|
||||
defer cache_map.deinit();
|
||||
if (cache_map.get(parsed.cusip)) |ticker| {
|
||||
const cached: zfin.CusipResult = .{
|
||||
.ticker = ticker,
|
||||
.name = null,
|
||||
.security_type = null,
|
||||
.found = true,
|
||||
};
|
||||
try display(cached, parsed.cusip, color, out);
|
||||
try cli.printFg(out, color, cli.CLR_MUTED, " (from local cache)\n", .{});
|
||||
return;
|
||||
}
|
||||
|
||||
cli.stderrPrint(ctx.io, "Looking up via OpenFIGI...\n");
|
||||
|
||||
// Try full batch lookup for richer output
|
||||
|
|
|
|||
194
src/service.zig
194
src/service.zig
|
|
@ -2398,13 +2398,104 @@ pub const DataService = struct {
|
|||
ticker: []const u8 = "",
|
||||
};
|
||||
|
||||
/// CUSIP->ticker lookup table loaded from `cusip_tickers.srf`.
|
||||
///
|
||||
/// Zero-copy: keys and values are slices into `backing` (the raw
|
||||
/// file bytes parsed with `parse_allocator = .none`). Nothing is
|
||||
/// duped per entry — the whole-file buffer IS the storage, and it
|
||||
/// stays alive for the table's lifetime, released together with
|
||||
/// the map table in `deinit`.
|
||||
///
|
||||
/// This is the L1 tier of CUSIP resolution: callers consult it
|
||||
/// before reaching for the server or OpenFIGI.
|
||||
pub const CusipTickerMap = struct {
|
||||
map: std.StringHashMap([]const u8),
|
||||
/// Raw bytes of `cusip_tickers.srf`; every map key and value
|
||||
/// points into this buffer. `&.{}` when the file was missing
|
||||
/// or unreadable (freeing a zero-length slice is a no-op).
|
||||
backing: []const u8,
|
||||
|
||||
pub fn get(self: CusipTickerMap, cusip: []const u8) ?[]const u8 {
|
||||
return self.map.get(cusip);
|
||||
}
|
||||
|
||||
pub fn contains(self: CusipTickerMap, cusip: []const u8) bool {
|
||||
return self.map.contains(cusip);
|
||||
}
|
||||
|
||||
pub fn count(self: CusipTickerMap) u32 {
|
||||
return self.map.count();
|
||||
}
|
||||
|
||||
/// Release the map table and the backing buffer. Both were
|
||||
/// allocated with the map's allocator at load time, so we
|
||||
/// reuse it here — the two lifetimes are bound together by
|
||||
/// construction, which is the whole point of the wrapper.
|
||||
pub fn deinit(self: *CusipTickerMap) void {
|
||||
const allocator = self.map.allocator;
|
||||
self.map.deinit();
|
||||
allocator.free(self.backing);
|
||||
}
|
||||
};
|
||||
|
||||
/// Load the CUSIP->ticker cache file into a `CusipTickerMap`. The
|
||||
/// returned table owns the file bytes; release it with
|
||||
/// `CusipTickerMap.deinit`.
|
||||
///
|
||||
/// Missing file → empty table (the common first-run case). First
|
||||
/// occurrence wins on duplicate CUSIPs, which tolerates the
|
||||
/// historical double-append bug in cache files written before
|
||||
/// `cacheCusipTicker` learned to dedup.
|
||||
///
|
||||
/// The on-disk format is CUSIP-keyed (`cusip::X,ticker::Y`); the
|
||||
/// returned map is keyed the same way for O(1) forward lookup.
|
||||
pub fn loadCusipTickerMap(self: *DataService, allocator: std.mem.Allocator) CusipTickerMap {
|
||||
const map = std.StringHashMap([]const u8).init(allocator);
|
||||
const path = std.fs.path.join(allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch
|
||||
return .{ .map = map, .backing = &.{} };
|
||||
defer allocator.free(path);
|
||||
|
||||
const data = std.Io.Dir.cwd().readFileAlloc(self.io, path, allocator, .limited(4 * 1024 * 1024)) catch
|
||||
return .{ .map = map, .backing = &.{} };
|
||||
// From here `data` is the table's backing store: keys and
|
||||
// values are slices into it (parse_allocator = .none, so the
|
||||
// parser borrows rather than copies). Freed by
|
||||
// `CusipTickerMap.deinit`, never here — that's the lifetime
|
||||
// contract that lets us skip per-entry dupes entirely.
|
||||
var result: CusipTickerMap = .{ .map = map, .backing = data };
|
||||
|
||||
var reader = std.Io.Reader.fixed(data);
|
||||
var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return result;
|
||||
defer it.deinit();
|
||||
|
||||
while (it.next() catch return result) |fields| {
|
||||
const entry = fields.to(CusipEntry, .{}) catch continue;
|
||||
if (entry.cusip.len == 0 or entry.ticker.len == 0) continue;
|
||||
// First occurrence wins; getOrPut stores the borrowed
|
||||
// slices directly — they live in `backing`, no dupe.
|
||||
const gop = result.map.getOrPut(entry.cusip) catch continue;
|
||||
if (!gop.found_existing) gop.value_ptr.* = entry.ticker;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Append a CUSIP->ticker mapping to the cache file.
|
||||
///
|
||||
/// Implemented as read-append-atomic-write (rather than a direct
|
||||
/// open-for-append) so a concurrent reader never sees a file with a
|
||||
/// valid header plus partial trailing record. See `cache/store.zig
|
||||
/// appendRaw` for the same pattern and rationale.
|
||||
///
|
||||
/// Dedups: if the CUSIP is already cached, this is a no-op. That
|
||||
/// keeps the file from accumulating duplicate rows when the same
|
||||
/// CUSIP is looked up repeatedly (the historical bug — the writer
|
||||
/// never checked the file before appending).
|
||||
pub fn cacheCusipTicker(self: *DataService, cusip: []const u8, ticker: []const u8) void {
|
||||
// Dedup against what's already cached.
|
||||
var existing_map = self.loadCusipTickerMap(self.allocator);
|
||||
defer existing_map.deinit();
|
||||
if (existing_map.contains(cusip)) return;
|
||||
|
||||
const path = std.fs.path.join(self.allocator, &.{ self.config.cache_dir, "cusip_tickers.srf" }) catch return;
|
||||
defer self.allocator.free(path);
|
||||
|
||||
|
|
@ -3778,3 +3869,106 @@ test "freeEdgarLookup: handles all three union variants without leak" {
|
|||
// testing.allocator panics on leak — passing this test means
|
||||
// the title was freed.
|
||||
}
|
||||
|
||||
// ── CUSIP->ticker cache (loadCusipTickerMap / cacheCusipTicker) ──
|
||||
|
||||
test "loadCusipTickerMap: missing file returns empty map" {
|
||||
const allocator = std.testing.allocator;
|
||||
const io = std.testing.io;
|
||||
var tmp = std.testing.tmpDir(.{});
|
||||
defer tmp.cleanup();
|
||||
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
||||
defer allocator.free(dir_path);
|
||||
|
||||
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
||||
defer svc.deinit();
|
||||
|
||||
var map = svc.loadCusipTickerMap(allocator);
|
||||
defer map.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 0), map.count());
|
||||
}
|
||||
|
||||
test "cacheCusipTicker + loadCusipTickerMap: write/read round-trip" {
|
||||
const allocator = std.testing.allocator;
|
||||
const io = std.testing.io;
|
||||
var tmp = std.testing.tmpDir(.{});
|
||||
defer tmp.cleanup();
|
||||
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
||||
defer allocator.free(dir_path);
|
||||
|
||||
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
||||
defer svc.deinit();
|
||||
|
||||
// Placeholder CUSIPs/tickers — never real PII.
|
||||
svc.cacheCusipTicker("111111111", "AAA");
|
||||
svc.cacheCusipTicker("222222222", "BBB");
|
||||
|
||||
var map = svc.loadCusipTickerMap(allocator);
|
||||
defer map.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 2), map.count());
|
||||
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
||||
try std.testing.expectEqualStrings("BBB", map.get("222222222").?);
|
||||
}
|
||||
|
||||
test "cacheCusipTicker: dedups repeated CUSIP (the historical bug)" {
|
||||
const allocator = std.testing.allocator;
|
||||
const io = std.testing.io;
|
||||
var tmp = std.testing.tmpDir(.{});
|
||||
defer tmp.cleanup();
|
||||
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
||||
defer allocator.free(dir_path);
|
||||
|
||||
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
||||
defer svc.deinit();
|
||||
|
||||
// Write the same CUSIP three times — must collapse to one row.
|
||||
svc.cacheCusipTicker("111111111", "AAA");
|
||||
svc.cacheCusipTicker("111111111", "AAA");
|
||||
svc.cacheCusipTicker("111111111", "AAA");
|
||||
|
||||
var map = svc.loadCusipTickerMap(allocator);
|
||||
defer map.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 1), map.count());
|
||||
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
||||
|
||||
// The on-disk file should physically contain exactly one data
|
||||
// row (plus the directive header), proving dedup at the writer.
|
||||
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
|
||||
defer allocator.free(path);
|
||||
const data = try std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(64 * 1024));
|
||||
defer allocator.free(data);
|
||||
var row_count: usize = 0;
|
||||
var lines = std.mem.splitScalar(u8, data, '\n');
|
||||
while (lines.next()) |line| {
|
||||
if (std.mem.indexOf(u8, line, "cusip::") != null) row_count += 1;
|
||||
}
|
||||
try std.testing.expectEqual(@as(usize, 1), row_count);
|
||||
}
|
||||
|
||||
test "loadCusipTickerMap: first occurrence wins on duplicate rows" {
|
||||
// Tolerate a pre-existing file written by the buggy appender
|
||||
// (duplicate rows). The reader must not crash and must keep the
|
||||
// first mapping.
|
||||
const allocator = std.testing.allocator;
|
||||
const io = std.testing.io;
|
||||
var tmp = std.testing.tmpDir(.{});
|
||||
defer tmp.cleanup();
|
||||
const dir_path = try tmp.dir.realPathFileAlloc(io, ".", allocator);
|
||||
defer allocator.free(dir_path);
|
||||
|
||||
// Hand-write a file with a duplicate row (as the old bug did).
|
||||
const path = try std.fs.path.join(allocator, &.{ dir_path, "cusip_tickers.srf" });
|
||||
defer allocator.free(path);
|
||||
try std.Io.Dir.cwd().writeFile(io, .{
|
||||
.sub_path = path,
|
||||
.data = "#!srfv1\ncusip::111111111,ticker::AAA\ncusip::111111111,ticker::AAA\n",
|
||||
});
|
||||
|
||||
var svc = DataService.init(io, allocator, Config{ .cache_dir = dir_path });
|
||||
defer svc.deinit();
|
||||
|
||||
var map = svc.loadCusipTickerMap(allocator);
|
||||
defer map.deinit();
|
||||
try std.testing.expectEqual(@as(usize, 1), map.count());
|
||||
try std.testing.expectEqualStrings("AAA", map.get("111111111").?);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue