From 0897eb850f21ef72771cc0898368b3bc71bc5830 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Mon, 1 Jun 2026 08:12:51 -0700 Subject: [PATCH] add edgar/wikidata endpoints/refresh, zfin upgrade --- README.md | 9 ++- build.zig.zon | 4 +- src/main.zig | 181 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 176 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 92e97aa..da15e55 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,11 @@ curl http://localhost:8080/AAPL/returns?fmt=xml | `GET /:symbol/dividends` | `application/x-srf` | Raw SRF cache file | | `GET /:symbol/earnings` | `application/x-srf` | Raw SRF cache file | | `GET /:symbol/options` | `application/x-srf` | Raw SRF cache file | +| `GET /:symbol/classification` | `application/x-srf` | Wikidata classification (SRF) | +| `GET /:symbol/etf_metrics` | `application/x-srf` | EDGAR NPORT-P fund metrics; 404 for non-funds | +| `GET /:cik/entity_facts` | `application/x-srf` | EDGAR XBRL entity facts (CIK-keyed) | +| `GET /_edgar/tickers_funds` | `application/x-srf` | EDGAR mutual-fund ticker map (~3 MB) | +| `GET /_edgar/tickers_companies` | `application/x-srf` | EDGAR company ticker map (~5 MB) | ## LibreCalc usage @@ -75,7 +80,7 @@ watch symbols, and fetches candles, dividends, and earnings for each. ## Building -Requires [Zig 0.15.2](https://ziglang.org/download/). +Requires [Zig 0.16.0](https://ziglang.org/download/). ```sh zig build # build @@ -92,7 +97,7 @@ All configuration is via environment variables: | `TWELVEDATA_API_KEY` | Yes | TwelveData API key | | `POLYGON_API_KEY` | No | Polygon API key | | `FINNHUB_API_KEY` | No | Finnhub API key | -| `ALPHAVANTAGE_API_KEY` | No | Alpha Vantage API key | +| `ZFIN_USER_EMAIL` | Yes | Contact email for SEC EDGAR User-Agent header | | `ZFIN_PORTFOLIO` | No | Path to portfolio SRF (default: `portfolio.srf`) | | `ZFIN_CACHE_DIR` | No | Cache directory (default: `~/.cache/zfin`) | diff --git a/build.zig.zon b/build.zig.zon index 63fd36d..51917ab 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -14,8 +14,8 @@ .hash = "httpz-0.0.0-PNVzrLjJCAD37S0CcrXpsjSqr86hVjK0rsALTDJ98AAJ", }, .zfin = .{ - .url = "git+https://git.lerch.org/lobo/zfin#16048489ddfc12b27c5f5fb53e8189d68b7fbfe4", - .hash = "zfin-0.0.0-J-B21qlXMQAmuNwl3EpsYiJy5E8jcsImfjk0k99rlahP", + .url = "git+https://git.lerch.org/lobo/zfin#641a88b0b768769a33e3f95fa6f1736fa58e4a6f", + .hash = "zfin-0.0.0-J-B21mEEPACavg9MPzB402B3DIzfIxXhNv3UOAyFHBxK", }, }, } diff --git a/src/main.zig b/src/main.zig index 30c94fd..51bc063 100644 --- a/src/main.zig +++ b/src/main.zig @@ -71,6 +71,11 @@ fn handleHelp(_: *App, _: *httpz.Request, res: *httpz.Response) !void { \\ GET /{SYMBOL}/splits Raw SRF cache file \\ GET /{SYMBOL}/earnings Raw SRF cache file \\ GET /{SYMBOL}/options Raw SRF cache file + \\ GET /{SYMBOL}/classification Wikidata classification (SRF) + \\ GET /{SYMBOL}/etf_metrics EDGAR NPORT-P fund metrics (SRF; 404 for non-funds) + \\ GET /{CIK}/entity_facts EDGAR XBRL entity facts (SRF; CIK-keyed) + \\ GET /_edgar/tickers_funds EDGAR mutual-fund ticker map (SRF) + \\ GET /_edgar/tickers_companies EDGAR company ticker map (SRF) \\ GET /symbols List of tracked symbols \\ \\Returns fields: @@ -146,7 +151,7 @@ fn handleReturns(app: *App, req: *httpz.Request, res: *httpz.Response) !void { } } - const result = app.svc.getTrailingReturns(symbol) catch { + const result = app.svc.getTrailingReturns(symbol, .{}) catch { res.status = 404; res.body = "Symbol not found or fetch failed"; return; @@ -279,7 +284,7 @@ fn handleQuote(app: *App, req: *httpz.Request, res: *httpz.Response) !void { const arena = res.arena; const symbol = try upperDupe(arena, raw_symbol); - const q = app.svc.getQuote(symbol) catch { + const q = app.svc.getQuote(symbol, .{}) catch { res.status = 404; res.body = "Quote not available"; return; @@ -292,15 +297,41 @@ fn handleQuote(app: *App, req: *httpz.Request, res: *httpz.Response) !void { } fn handleSrfFile(app: *App, req: *httpz.Request, res: *httpz.Response, filename: []const u8) !void { - const raw_symbol = req.param("symbol") orelse { + return handleSrfFileByKey(app, req, res, "symbol", filename); +} + +/// Generalized SRF cache-file passthrough: reads +/// `//` where `` is whatever URL +/// parameter `key_param` resolves to. The default `handleSrfFile` +/// uses `"symbol"`; CIK-keyed routes (e.g. `/:cik/entity_facts`) +/// pass `"cik"` instead. The cache-key segment is uppercased +/// (safe for both symbols and zero-padded CIK digit strings). +fn handleSrfFileByKey(app: *App, req: *httpz.Request, res: *httpz.Response, key_param: []const u8, filename: []const u8) !void { + const raw_key = req.param(key_param) orelse { res.status = 400; - res.body = "Missing symbol"; + res.body = "Missing key"; return; }; const arena = res.arena; - const symbol = try upperDupe(arena, raw_symbol); + const key = try upperDupe(arena, raw_key); + return serveSrfFile(app, res, key, filename); +} - const path = try std.fs.path.join(arena, &.{ app.config.cache_dir, symbol, filename }); +/// Static-key SRF cache-file passthrough for routes that don't +/// take a path parameter (e.g. `/_edgar/tickers_funds` reads +/// `/_edgar/tickers_funds.srf` directly). The `key` +/// is a literal directory name; not uppercased because the +/// cache uses `_edgar` as-is. +fn handleStaticSrfFile(app: *App, res: *httpz.Response, key: []const u8, filename: []const u8) !void { + return serveSrfFile(app, res, key, filename); +} + +/// Inner shared helper. Reads the file, computes etag, sets +/// headers, sends. Caller has already resolved the cache-key +/// segment (per-request param or static literal). +fn serveSrfFile(app: *App, res: *httpz.Response, key: []const u8, filename: []const u8) !void { + const arena = res.arena; + const path = try std.fs.path.join(arena, &.{ app.config.cache_dir, key, filename }); const content = std.Io.Dir.cwd().readFileAlloc(app.io, path, arena, .limited(10 * 1024 * 1024)) catch { res.status = 404; res.body = "Cache file not found"; @@ -351,6 +382,32 @@ fn handleOptions(app: *App, req: *httpz.Request, res: *httpz.Response) !void { return handleSrfFile(app, req, res, "options.srf"); } +fn handleClassification(app: *App, req: *httpz.Request, res: *httpz.Response) !void { + return handleSrfFile(app, req, res, "classification.srf"); +} + +fn handleEtfMetrics(app: *App, req: *httpz.Request, res: *httpz.Response) !void { + return handleSrfFile(app, req, res, "etf_metrics.srf"); +} + +fn handleEntityFacts(app: *App, req: *httpz.Request, res: *httpz.Response) !void { + // CIK-keyed route: cache layout is + // `//entity_facts.srf` (the CIK is the + // zero-padded 10-digit string Wikidata's P5531 emits). + return handleSrfFileByKey(app, req, res, "cik", "entity_facts.srf"); +} + +fn handleTickersFunds(app: *App, _: *httpz.Request, res: *httpz.Response) !void { + // Static-key route: `/_edgar/tickers_funds.srf` + // is a single file shared across all symbol lookups, not a + // per-symbol cache. + return handleStaticSrfFile(app, res, "_edgar", "tickers_funds.srf"); +} + +fn handleTickersCompanies(app: *App, _: *httpz.Request, res: *httpz.Response) !void { + return handleStaticSrfFile(app, res, "_edgar", "tickers_companies.srf"); +} + // ── Helpers ────────────────────────────────────────────────── fn upperDupe(allocator: std.mem.Allocator, s: []const u8) ![]u8 { @@ -494,6 +551,32 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process var success_count: u32 = 0; var fail_count: u32 = 0; + // Warm the EDGAR ticker maps once per refresh run. They're + // ~3-5 MB each, cached for 30 days; warming guarantees the + // shared `/_edgar/tickers_funds.srf` and + // `tickers_companies.srf` files exist for the static-route + // handlers to serve. Per-symbol `getEtfMetrics` calls below + // also rely on these maps being loaded. + { + try printRateLimitWait(&svc, stdout); + if (svc.loadMutualFundTickerMap(.{})) |mut_map| { + var m = mut_map; + m.deinit(); + try stdout.print("EDGAR mutual-fund ticker map ok\n", .{}); + } else |err| { + try stdout.print("EDGAR mutual-fund ticker map FAILED ({t})\n", .{err}); + } + try printRateLimitWait(&svc, stdout); + if (svc.loadCompanyTickerMap(.{})) |co_map| { + var m = co_map; + m.deinit(); + try stdout.print("EDGAR company ticker map ok\n", .{}); + } else |err| { + try stdout.print("EDGAR company ticker map FAILED ({t})\n", .{err}); + } + try stdout.flush(); + } + var it = symbols.iterator(); while (it.next()) |entry| { const sym = entry.key_ptr.*; @@ -504,8 +587,8 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process // Candles try printRateLimitWait(&svc, stdout); - if (svc.getCandles(sym)) |result| { - allocator.free(result.data); + if (svc.getCandles(sym, .{})) |result| { + result.deinit(); try stdout.print("candles ok", .{}); } else |err| { try stdout.print("candles FAILED ({s})", .{@errorName(err)}); @@ -522,8 +605,8 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process // Dividends try printRateLimitWait(&svc, stdout); - if (svc.getDividends(sym)) |result| { - zfin.Dividend.freeSlice(allocator, result.data); + if (svc.getDividends(sym, .{})) |result| { + result.deinit(); try stdout.print(", dividends ok", .{}); } else |err| { try stdout.print(", dividends FAILED ({s})", .{@errorName(err)}); @@ -532,8 +615,8 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process // Splits try printRateLimitWait(&svc, stdout); - if (svc.getSplits(sym)) |result| { - allocator.free(result.data); + if (svc.getSplits(sym, .{})) |result| { + result.deinit(); try stdout.print(", splits ok", .{}); } else |err| { try stdout.print(", splits FAILED ({s})", .{@errorName(err)}); @@ -542,14 +625,73 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process // Earnings try printRateLimitWait(&svc, stdout); - if (svc.getEarnings(sym)) |result| { - allocator.free(result.data); + if (svc.getEarnings(sym, .{})) |result| { + result.deinit(); try stdout.print(", earnings ok", .{}); } else |err| { try stdout.print(", earnings FAILED ({s})", .{@errorName(err)}); sym_ok = false; } + // Classification (Wikidata). Captures CIK if Wikidata + // had it — used to chain into entity_facts below. + // NotFound is logged as `n/a` (symbol genuinely has no + // Wikidata entry) and doesn't flip sym_ok. + var cik_buf: ?[]u8 = null; + defer if (cik_buf) |b| allocator.free(b); + try printRateLimitWait(&svc, stdout); + if (svc.getClassification(sym, .{})) |result| { + defer result.deinit(); + if (result.data.len > 0) { + if (result.data[0].cik) |cik| { + cik_buf = allocator.dupe(u8, cik) catch null; + } + } + try stdout.print(", classification ok", .{}); + } else |err| switch (err) { + zfin.DataError.NotFound => try stdout.print(", classification n/a", .{}), + else => { + try stdout.print(", classification FAILED ({t})", .{err}); + sym_ok = false; + }, + } + + // ETF metrics. NotFound is the expected outcome for + // non-funds (NPORT-P only exists for funds + UITs); a + // negative-cache entry suppresses retries. Logged as + // `n/a` and doesn't flip sym_ok. + try printRateLimitWait(&svc, stdout); + if (svc.getEtfMetrics(sym, .{})) |result| { + result.deinit(); + try stdout.print(", etf_metrics ok", .{}); + } else |err| switch (err) { + zfin.DataError.NotFound => try stdout.print(", etf_metrics n/a", .{}), + else => { + try stdout.print(", etf_metrics FAILED ({t})", .{err}); + sym_ok = false; + }, + } + + // Entity facts (XBRL). Only attempted when the + // classification step yielded a CIK — funds without + // Wikidata entries don't reach here even though they + // have an EDGAR CIK from the ticker map (production + // zfin chains entity_facts off Wikidata's CIK, so the + // server warms the cache the same way). + if (cik_buf) |cik| { + try printRateLimitWait(&svc, stdout); + if (svc.getEntityFacts(cik, .{})) |result| { + result.deinit(); + try stdout.print(", entity_facts ok", .{}); + } else |err| switch (err) { + zfin.DataError.NotFound => try stdout.print(", entity_facts n/a", .{}), + else => { + try stdout.print(", entity_facts FAILED ({t})", .{err}); + sym_ok = false; + }, + } + } + try stdout.print("\n", .{}); try stdout.flush(); @@ -615,6 +757,17 @@ pub fn main(init: std.process.Init) !void { router.get("/:symbol/earnings", handleEarnings, .{}); router.get("/:symbol/options", handleOptions, .{}); + // Wikidata + EDGAR derived data — populated by `refresh`. + router.get("/:symbol/classification", handleClassification, .{}); + router.get("/:symbol/etf_metrics", handleEtfMetrics, .{}); + router.get("/:cik/entity_facts", handleEntityFacts, .{}); + + // EDGAR shared ticker maps (~3-5 MB each, refreshed + // every 30 days). Static-key routes — single file + // shared across every symbol lookup. + router.get("/_edgar/tickers_funds", handleTickersFunds, .{}); + router.get("/_edgar/tickers_companies", handleTickersCompanies, .{}); + log.info("zfin-server {s}", .{version}); log.info("listening on port {d}", .{port}); try server.listen();