add edgar/wikidata endpoints/refresh, zfin upgrade
All checks were successful
Generic zig build / build (push) Successful in 3m39s
Generic zig build / deploy (push) Successful in 16s

This commit is contained in:
Emil Lerch 2026-06-01 08:12:51 -07:00
parent 7c08f28ecd
commit 0897eb850f
Signed by: lobo
GPG key ID: A7B62D657EF764F8
3 changed files with 176 additions and 18 deletions

View file

@ -35,6 +35,11 @@ curl http://localhost:8080/AAPL/returns?fmt=xml
| `GET /:symbol/dividends` | `application/x-srf` | Raw SRF cache file |
| `GET /:symbol/earnings` | `application/x-srf` | Raw SRF cache file |
| `GET /:symbol/options` | `application/x-srf` | Raw SRF cache file |
| `GET /:symbol/classification` | `application/x-srf` | Wikidata classification (SRF) |
| `GET /:symbol/etf_metrics` | `application/x-srf` | EDGAR NPORT-P fund metrics; 404 for non-funds |
| `GET /:cik/entity_facts` | `application/x-srf` | EDGAR XBRL entity facts (CIK-keyed) |
| `GET /_edgar/tickers_funds` | `application/x-srf` | EDGAR mutual-fund ticker map (~3 MB) |
| `GET /_edgar/tickers_companies` | `application/x-srf` | EDGAR company ticker map (~5 MB) |
## LibreCalc usage
@ -75,7 +80,7 @@ watch symbols, and fetches candles, dividends, and earnings for each.
## Building
Requires [Zig 0.15.2](https://ziglang.org/download/).
Requires [Zig 0.16.0](https://ziglang.org/download/).
```sh
zig build # build
@ -92,7 +97,7 @@ All configuration is via environment variables:
| `TWELVEDATA_API_KEY` | Yes | TwelveData API key |
| `POLYGON_API_KEY` | No | Polygon API key |
| `FINNHUB_API_KEY` | No | Finnhub API key |
| `ALPHAVANTAGE_API_KEY` | No | Alpha Vantage API key |
| `ZFIN_USER_EMAIL` | Yes | Contact email for SEC EDGAR User-Agent header |
| `ZFIN_PORTFOLIO` | No | Path to portfolio SRF (default: `portfolio.srf`) |
| `ZFIN_CACHE_DIR` | No | Cache directory (default: `~/.cache/zfin`) |

View file

@ -14,8 +14,8 @@
.hash = "httpz-0.0.0-PNVzrLjJCAD37S0CcrXpsjSqr86hVjK0rsALTDJ98AAJ",
},
.zfin = .{
.url = "git+https://git.lerch.org/lobo/zfin#16048489ddfc12b27c5f5fb53e8189d68b7fbfe4",
.hash = "zfin-0.0.0-J-B21qlXMQAmuNwl3EpsYiJy5E8jcsImfjk0k99rlahP",
.url = "git+https://git.lerch.org/lobo/zfin#641a88b0b768769a33e3f95fa6f1736fa58e4a6f",
.hash = "zfin-0.0.0-J-B21mEEPACavg9MPzB402B3DIzfIxXhNv3UOAyFHBxK",
},
},
}

View file

@ -71,6 +71,11 @@ fn handleHelp(_: *App, _: *httpz.Request, res: *httpz.Response) !void {
\\ GET /{SYMBOL}/splits Raw SRF cache file
\\ GET /{SYMBOL}/earnings Raw SRF cache file
\\ GET /{SYMBOL}/options Raw SRF cache file
\\ GET /{SYMBOL}/classification Wikidata classification (SRF)
\\ GET /{SYMBOL}/etf_metrics EDGAR NPORT-P fund metrics (SRF; 404 for non-funds)
\\ GET /{CIK}/entity_facts EDGAR XBRL entity facts (SRF; CIK-keyed)
\\ GET /_edgar/tickers_funds EDGAR mutual-fund ticker map (SRF)
\\ GET /_edgar/tickers_companies EDGAR company ticker map (SRF)
\\ GET /symbols List of tracked symbols
\\
\\Returns fields:
@ -146,7 +151,7 @@ fn handleReturns(app: *App, req: *httpz.Request, res: *httpz.Response) !void {
}
}
const result = app.svc.getTrailingReturns(symbol) catch {
const result = app.svc.getTrailingReturns(symbol, .{}) catch {
res.status = 404;
res.body = "Symbol not found or fetch failed";
return;
@ -279,7 +284,7 @@ fn handleQuote(app: *App, req: *httpz.Request, res: *httpz.Response) !void {
const arena = res.arena;
const symbol = try upperDupe(arena, raw_symbol);
const q = app.svc.getQuote(symbol) catch {
const q = app.svc.getQuote(symbol, .{}) catch {
res.status = 404;
res.body = "Quote not available";
return;
@ -292,15 +297,41 @@ fn handleQuote(app: *App, req: *httpz.Request, res: *httpz.Response) !void {
}
fn handleSrfFile(app: *App, req: *httpz.Request, res: *httpz.Response, filename: []const u8) !void {
const raw_symbol = req.param("symbol") orelse {
return handleSrfFileByKey(app, req, res, "symbol", filename);
}
/// Generalized SRF cache-file passthrough: reads
/// `<cache_dir>/<key>/<filename>` where `<key>` is whatever URL
/// parameter `key_param` resolves to. The default `handleSrfFile`
/// uses `"symbol"`; CIK-keyed routes (e.g. `/:cik/entity_facts`)
/// pass `"cik"` instead. The cache-key segment is uppercased
/// (safe for both symbols and zero-padded CIK digit strings).
fn handleSrfFileByKey(app: *App, req: *httpz.Request, res: *httpz.Response, key_param: []const u8, filename: []const u8) !void {
const raw_key = req.param(key_param) orelse {
res.status = 400;
res.body = "Missing symbol";
res.body = "Missing key";
return;
};
const arena = res.arena;
const symbol = try upperDupe(arena, raw_symbol);
const key = try upperDupe(arena, raw_key);
return serveSrfFile(app, res, key, filename);
}
const path = try std.fs.path.join(arena, &.{ app.config.cache_dir, symbol, filename });
/// Static-key SRF cache-file passthrough for routes that don't
/// take a path parameter (e.g. `/_edgar/tickers_funds` reads
/// `<cache_dir>/_edgar/tickers_funds.srf` directly). The `key`
/// is a literal directory name; not uppercased because the
/// cache uses `_edgar` as-is.
fn handleStaticSrfFile(app: *App, res: *httpz.Response, key: []const u8, filename: []const u8) !void {
return serveSrfFile(app, res, key, filename);
}
/// Inner shared helper. Reads the file, computes etag, sets
/// headers, sends. Caller has already resolved the cache-key
/// segment (per-request param or static literal).
fn serveSrfFile(app: *App, res: *httpz.Response, key: []const u8, filename: []const u8) !void {
const arena = res.arena;
const path = try std.fs.path.join(arena, &.{ app.config.cache_dir, key, filename });
const content = std.Io.Dir.cwd().readFileAlloc(app.io, path, arena, .limited(10 * 1024 * 1024)) catch {
res.status = 404;
res.body = "Cache file not found";
@ -351,6 +382,32 @@ fn handleOptions(app: *App, req: *httpz.Request, res: *httpz.Response) !void {
return handleSrfFile(app, req, res, "options.srf");
}
fn handleClassification(app: *App, req: *httpz.Request, res: *httpz.Response) !void {
return handleSrfFile(app, req, res, "classification.srf");
}
fn handleEtfMetrics(app: *App, req: *httpz.Request, res: *httpz.Response) !void {
return handleSrfFile(app, req, res, "etf_metrics.srf");
}
fn handleEntityFacts(app: *App, req: *httpz.Request, res: *httpz.Response) !void {
// CIK-keyed route: cache layout is
// `<cache_dir>/<CIK>/entity_facts.srf` (the CIK is the
// zero-padded 10-digit string Wikidata's P5531 emits).
return handleSrfFileByKey(app, req, res, "cik", "entity_facts.srf");
}
fn handleTickersFunds(app: *App, _: *httpz.Request, res: *httpz.Response) !void {
// Static-key route: `<cache_dir>/_edgar/tickers_funds.srf`
// is a single file shared across all symbol lookups, not a
// per-symbol cache.
return handleStaticSrfFile(app, res, "_edgar", "tickers_funds.srf");
}
fn handleTickersCompanies(app: *App, _: *httpz.Request, res: *httpz.Response) !void {
return handleStaticSrfFile(app, res, "_edgar", "tickers_companies.srf");
}
// Helpers
fn upperDupe(allocator: std.mem.Allocator, s: []const u8) ![]u8 {
@ -494,6 +551,32 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process
var success_count: u32 = 0;
var fail_count: u32 = 0;
// Warm the EDGAR ticker maps once per refresh run. They're
// ~3-5 MB each, cached for 30 days; warming guarantees the
// shared `<cache>/_edgar/tickers_funds.srf` and
// `tickers_companies.srf` files exist for the static-route
// handlers to serve. Per-symbol `getEtfMetrics` calls below
// also rely on these maps being loaded.
{
try printRateLimitWait(&svc, stdout);
if (svc.loadMutualFundTickerMap(.{})) |mut_map| {
var m = mut_map;
m.deinit();
try stdout.print("EDGAR mutual-fund ticker map ok\n", .{});
} else |err| {
try stdout.print("EDGAR mutual-fund ticker map FAILED ({t})\n", .{err});
}
try printRateLimitWait(&svc, stdout);
if (svc.loadCompanyTickerMap(.{})) |co_map| {
var m = co_map;
m.deinit();
try stdout.print("EDGAR company ticker map ok\n", .{});
} else |err| {
try stdout.print("EDGAR company ticker map FAILED ({t})\n", .{err});
}
try stdout.flush();
}
var it = symbols.iterator();
while (it.next()) |entry| {
const sym = entry.key_ptr.*;
@ -504,8 +587,8 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process
// Candles
try printRateLimitWait(&svc, stdout);
if (svc.getCandles(sym)) |result| {
allocator.free(result.data);
if (svc.getCandles(sym, .{})) |result| {
result.deinit();
try stdout.print("candles ok", .{});
} else |err| {
try stdout.print("candles FAILED ({s})", .{@errorName(err)});
@ -522,8 +605,8 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process
// Dividends
try printRateLimitWait(&svc, stdout);
if (svc.getDividends(sym)) |result| {
zfin.Dividend.freeSlice(allocator, result.data);
if (svc.getDividends(sym, .{})) |result| {
result.deinit();
try stdout.print(", dividends ok", .{});
} else |err| {
try stdout.print(", dividends FAILED ({s})", .{@errorName(err)});
@ -532,8 +615,8 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process
// Splits
try printRateLimitWait(&svc, stdout);
if (svc.getSplits(sym)) |result| {
allocator.free(result.data);
if (svc.getSplits(sym, .{})) |result| {
result.deinit();
try stdout.print(", splits ok", .{});
} else |err| {
try stdout.print(", splits FAILED ({s})", .{@errorName(err)});
@ -542,14 +625,73 @@ fn refresh(io: std.Io, allocator: std.mem.Allocator, environ: *const std.process
// Earnings
try printRateLimitWait(&svc, stdout);
if (svc.getEarnings(sym)) |result| {
allocator.free(result.data);
if (svc.getEarnings(sym, .{})) |result| {
result.deinit();
try stdout.print(", earnings ok", .{});
} else |err| {
try stdout.print(", earnings FAILED ({s})", .{@errorName(err)});
sym_ok = false;
}
// Classification (Wikidata). Captures CIK if Wikidata
// had it used to chain into entity_facts below.
// NotFound is logged as `n/a` (symbol genuinely has no
// Wikidata entry) and doesn't flip sym_ok.
var cik_buf: ?[]u8 = null;
defer if (cik_buf) |b| allocator.free(b);
try printRateLimitWait(&svc, stdout);
if (svc.getClassification(sym, .{})) |result| {
defer result.deinit();
if (result.data.len > 0) {
if (result.data[0].cik) |cik| {
cik_buf = allocator.dupe(u8, cik) catch null;
}
}
try stdout.print(", classification ok", .{});
} else |err| switch (err) {
zfin.DataError.NotFound => try stdout.print(", classification n/a", .{}),
else => {
try stdout.print(", classification FAILED ({t})", .{err});
sym_ok = false;
},
}
// ETF metrics. NotFound is the expected outcome for
// non-funds (NPORT-P only exists for funds + UITs); a
// negative-cache entry suppresses retries. Logged as
// `n/a` and doesn't flip sym_ok.
try printRateLimitWait(&svc, stdout);
if (svc.getEtfMetrics(sym, .{})) |result| {
result.deinit();
try stdout.print(", etf_metrics ok", .{});
} else |err| switch (err) {
zfin.DataError.NotFound => try stdout.print(", etf_metrics n/a", .{}),
else => {
try stdout.print(", etf_metrics FAILED ({t})", .{err});
sym_ok = false;
},
}
// Entity facts (XBRL). Only attempted when the
// classification step yielded a CIK funds without
// Wikidata entries don't reach here even though they
// have an EDGAR CIK from the ticker map (production
// zfin chains entity_facts off Wikidata's CIK, so the
// server warms the cache the same way).
if (cik_buf) |cik| {
try printRateLimitWait(&svc, stdout);
if (svc.getEntityFacts(cik, .{})) |result| {
result.deinit();
try stdout.print(", entity_facts ok", .{});
} else |err| switch (err) {
zfin.DataError.NotFound => try stdout.print(", entity_facts n/a", .{}),
else => {
try stdout.print(", entity_facts FAILED ({t})", .{err});
sym_ok = false;
},
}
}
try stdout.print("\n", .{});
try stdout.flush();
@ -615,6 +757,17 @@ pub fn main(init: std.process.Init) !void {
router.get("/:symbol/earnings", handleEarnings, .{});
router.get("/:symbol/options", handleOptions, .{});
// Wikidata + EDGAR derived data populated by `refresh`.
router.get("/:symbol/classification", handleClassification, .{});
router.get("/:symbol/etf_metrics", handleEtfMetrics, .{});
router.get("/:cik/entity_facts", handleEntityFacts, .{});
// EDGAR shared ticker maps (~3-5 MB each, refreshed
// every 30 days). Static-key routes single file
// shared across every symbol lookup.
router.get("/_edgar/tickers_funds", handleTickersFunds, .{});
router.get("/_edgar/tickers_companies", handleTickersCompanies, .{});
log.info("zfin-server {s}", .{version});
log.info("listening on port {d}", .{port});
try server.listen();