surface data gaps in alphavantage through enrich

This commit is contained in:
Emil Lerch 2026-05-24 10:28:54 -07:00
parent 4d6060552a
commit 56e462fae9
4 changed files with 235 additions and 33 deletions

View file

@ -439,6 +439,49 @@ look nice in prose but they create real problems:
When in doubt, **ask**. A one-line "I'm about to use `—` here for X, OK?"
is much cheaper than reverting after the user notices.
### Errors carry information — never throw it away
When you catch an error, the caller's first question is **"why did
this fail?"** A user-facing error message that says only
`"FetchFailed"` or `"could not parse portfolio file"` is failing
the user — they have to read source code to figure out what
happened. Three habits cause this:
**(1) Bare `catch {}` and `catch return error.X`.** The error
value carries the upstream's classification (`RateLimited`,
`Unauthorized`, `NotFound`, `ParseError`, etc.). Discarding it
reduces N distinct conditions to one opaque "something went
wrong." Fix: capture as `catch |err|` and at minimum include
`@errorName(err)` (or `{t}` format) in any user-visible message.
Better: re-raise a more specific error variant from the caller's
error set so the next layer up can decide what to do.
**(2) Generic `error.X` collapse points in service-layer code.**
When `service.zig:getX` does `provider.fetchX(...) catch return
DataError.FetchFailed`, the provider's specific error
(`RateLimited` vs `Unauthorized` vs `NotFound`) is permanently
erased. Fix: either expand the service-layer error set
(`DataError`) to mirror the provider distinctions, OR pass the
inner error through via `catch |err| { log.warn(...
@errorName(err) ...); return ...; }` so the stderr path tells
the user something useful even if the typed return value is
collapsed.
**(3) User-facing stderr messages that don't say what the actual
error name was.** A CLI command that catches an error MUST print
`@errorName(err)` (or a deliberate human translation per error
kind) in the stderr message. `"Error: Failed to fetch data for
symbol"` is unhelpful. `"Error fetching AAPL: RateLimited (free
tier 5/min — wait 60s)"` is helpful. The error name alone is
enough for the user to look it up; the human translation is a
nice-to-have on top.
**The grep test:** if you're catching an error in user-facing
code, the message should mention either `@errorName` / `{t}` or
reference a specific error variant by name. `cli.stderrPrint`
calls with hardcoded "Error: failed to ..." strings and no error
context fail this test.
### NEVER put PII in tests, fixtures, comments, or docs
This codebase is a **personal finance tool**. Real account names,

View file

@ -106,6 +106,72 @@ pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
try enrichPortfolio(ctx.io, ctx.allocator, svc, parsed.arg, ctx.today, ctx.out);
}
/// Whether the caller should continue with the next symbol or
/// abort the whole batch after a fetch failure. Hard-stop errors
/// (no API key, auth rejected, rate-limited) will recur on every
/// subsequent symbol; soft-skip errors are per-symbol and other
/// symbols may still succeed.
const FetchErrorAction = enum { hard_stop, soft_skip };
/// Print a user-facing stderr message describing why the fetch
/// for `sym` failed, and tell the caller whether to continue
/// (`soft_skip`) or stop the whole batch (`hard_stop`).
///
/// This is the single dispatch point for translating a
/// `DataError` into actionable user output. Per AGENTS.md "Errors
/// carry information": the message names the specific error
/// variant never just "fetch failed" so the user can act on
/// it without reading source code.
fn reportFetchError(io: std.Io, sym: []const u8, err: anyerror) FetchErrorAction {
var msg_buf: [256]u8 = undefined;
switch (err) {
zfin.DataError.NoApiKey => {
cli.stderrPrint(io, "Error: ALPHAVANTAGE_API_KEY not set. Add it to .env\n");
return .hard_stop;
},
zfin.DataError.AuthError => {
cli.stderrPrint(io, "Error: AlphaVantage rejected the API key. Check ALPHAVANTAGE_API_KEY in .env\n");
return .hard_stop;
},
zfin.DataError.RateLimited => {
const msg = std.fmt.bufPrint(
&msg_buf,
"Error: rate-limited by AlphaVantage on {s} (free tier 5 calls/min, 25/day). Try again later.\n",
.{sym},
) catch "Error: rate-limited by AlphaVantage. Try again later.\n";
cli.stderrPrint(io, msg);
return .hard_stop;
},
zfin.DataError.NotFound => {
const msg = std.fmt.bufPrint(
&msg_buf,
" {s}: not in AlphaVantage; mark sector/geo/asset_class manually\n",
.{sym},
) catch " not in AlphaVantage; mark manually\n";
cli.stderrPrint(io, msg);
return .soft_skip;
},
zfin.DataError.TransientError => {
const msg = std.fmt.bufPrint(
&msg_buf,
" {s}: transient AlphaVantage failure (server error); will need re-run\n",
.{sym},
) catch " transient AlphaVantage failure; will need re-run\n";
cli.stderrPrint(io, msg);
return .soft_skip;
},
else => {
const msg = std.fmt.bufPrint(
&msg_buf,
" {s}: fetch failed ({t})\n",
.{ sym, err },
) catch " fetch failed\n";
cli.stderrPrint(io, msg);
return .soft_skip;
},
}
}
/// Enrich a single symbol and output appendable SRF lines to stdout.
fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService, sym: []const u8, out: *std.Io.Writer) !void {
{
@ -115,14 +181,20 @@ fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService
}
const overview = svc.getCompanyOverview(sym) catch |err| {
if (err == zfin.DataError.NoApiKey) {
cli.stderrPrint(io, "Error: ALPHAVANTAGE_API_KEY not set. Add it to .env\n");
return;
// Specific user-facing message per error variant. See
// `reportFetchError` for the dispatch; on hard-stop errors
// (NoApiKey / AuthError / RateLimited) we don't even emit
// a TODO line the user can't do anything with this single
// symbol until they fix the underlying issue.
const action = reportFetchError(io, sym, err);
switch (action) {
.hard_stop => return,
.soft_skip => {
try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
return;
},
}
cli.stderrPrint(io, "Error: Failed to fetch data for symbol\n");
try out.print("# {s} -- fetch failed\n", .{sym});
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
return;
};
defer {
if (overview.name) |n| allocator.free(n);
@ -209,11 +281,31 @@ fn enrichPortfolio(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataServ
cli.stderrPrint(io, msg);
}
const overview = svc.getCompanyOverview(sym) catch {
try out.print("# {s} -- fetch failed\n", .{sym});
const overview = svc.getCompanyOverview(sym) catch |err| {
const action = reportFetchError(io, sym, err);
try out.print("# {s} -- fetch failed ({t})\n", .{ sym, err });
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
failed += 1;
continue;
switch (action) {
.hard_stop => {
// Every remaining symbol will hit the same
// condition (no API key / auth fail / rate
// limit). Stop the batch with a clear note
// about how many symbols were skipped, so the
// user knows to rerun rather than wonder why
// the SRF stops short.
var rem_buf: [256]u8 = undefined;
const remaining = syms.len - i - 1;
const rem_msg = std.fmt.bufPrint(
&rem_buf,
"Stopping enrichment: {d} symbol(s) not yet fetched. Rerun once the issue is resolved.\n",
.{remaining},
) catch "Stopping enrichment.\n";
cli.stderrPrint(io, rem_msg);
break;
},
.soft_skip => continue,
}
};
// Free allocated strings from overview when done
defer {

View file

@ -175,6 +175,19 @@ test "parseCompanyOverview missing fields" {
try std.testing.expect(overview.industry == null);
}
test "parseCompanyOverview empty body returns NotFound" {
// AlphaVantage replies HTTP 200 with `{}` for symbols it
// doesn't recognize (no "Error Message" key, no anything).
// The parser must surface that as NotFound, not silently
// succeed with an all-null overview that downstream code
// would render as "Sector: Unknown, Geo: US, Asset class:
// US Large Cap" — wrong on every axis for a nonexistent
// ticker.
const body = "{}";
const allocator = std.testing.allocator;
try std.testing.expectError(error.NotFound, parseCompanyOverview(allocator, body, "ZZQQXX99"));
}
pub const AlphaVantage = struct {
api_key: []const u8,
client: http.Client,
@ -372,6 +385,14 @@ fn parseCompanyOverview(
if (root.get("Note")) |_| return error.RateLimited;
if (root.get("Information")) |_| return error.RateLimited;
// AlphaVantage returns an empty `{}` body (HTTP 200) for
// symbols it doesn't recognize. There's no `Error Message`
// key in this case just nothing. Detect by checking for the
// canonical "this is a real overview" key (`Symbol`); if
// absent, the response carries no useful data and we should
// surface that as NotFound.
if (root.get("Symbol") == null) return error.NotFound;
return .{
.symbol = symbol,
.name = if (jsonStr(root.get("Name"))) |s| allocator.dupe(u8, s) catch null else null,

View file

@ -60,6 +60,17 @@ pub const DataError = error{
TransientError,
/// Provider auth failure (bad API key). Entire refresh should stop.
AuthError,
/// Provider returned a rate-limit response (e.g. AlphaVantage's
/// free-tier 5-calls/min or 25-calls/day). Caller should stop
/// the current batch and surface a "try again later" message;
/// retrying immediately will just hit the same limit.
RateLimited,
/// Provider responded but doesn't have data for the requested
/// symbol (404, "Error Message" body, or equivalent). Distinct
/// from `FetchFailed` so callers (e.g. `enrich`) can tell the
/// user "this symbol isn't in the provider's catalog; mark it
/// manually" instead of an opaque "fetch failed."
NotFound,
};
/// Per-call options controlling cache vs network behavior. Drives
@ -953,10 +964,45 @@ pub const DataService = struct {
/// Fetch company overview (sector, industry, country, market cap) from Alpha Vantage.
/// No cache -- always fetches fresh. Caller must free the returned string fields.
///
/// Maps the provider's specific error to a `DataError` variant so
/// callers (notably `enrich`) can distinguish "AlphaVantage
/// doesn't have this symbol" from "rate-limited" from "auth
/// failed" from generic transport errors. Logs the upstream
/// error name on every failure so the stderr log carries the
/// detail even when the typed return value is collapsed.
pub fn getCompanyOverview(self: *DataService, symbol: []const u8) DataError!CompanyOverview {
var av = try self.getProvider(AlphaVantage);
return av.fetchCompanyOverview(self.allocator, symbol) catch
return DataError.FetchFailed;
return av.fetchCompanyOverview(self.allocator, symbol) catch |err| {
log.warn("{s}: getCompanyOverview failed: {s}", .{ symbol, @errorName(err) });
return mapAlphaVantageError(err);
};
}
/// Translate an AlphaVantage provider error into the broader
/// `DataError` set. Keeps the rate-limit / not-found / auth
/// distinctions visible to callers so user-facing CLI messages
/// can be specific instead of generic "FetchFailed".
fn mapAlphaVantageError(err: anyerror) DataError {
return switch (err) {
error.RateLimited => DataError.RateLimited,
error.Unauthorized => DataError.AuthError,
error.NotFound => DataError.NotFound,
// The AlphaVantage parser throws `RequestFailed` when
// the response body contains an `"Error Message"` key,
// which AV sends for unknown / malformed symbols. The
// HTTP layer also uses `RequestFailed` as a last-resort
// transport collapse rare in practice. Treat both as
// NotFound; the user-facing semantic ("AlphaVantage
// doesn't recognize this symbol") is what's wanted in
// the common case, and the log line above carries the
// raw error name for the rare transport-failure case.
error.RequestFailed => DataError.NotFound,
error.ServerError => DataError.TransientError,
error.OutOfMemory => DataError.OutOfMemory,
error.ParseError => DataError.ParseError,
else => DataError.FetchFailed,
};
}
/// Compute trailing returns for a symbol (fetches candles + dividends).
@ -1190,7 +1236,7 @@ pub const DataService = struct {
// 1. Fresh cache fast path (no full deserialization)
if (!force_refresh and self.isCandleCacheFresh(sym)) {
if (self.getCachedLastClose(sym)) |close| {
prices.put(sym, close) catch {};
prices.put(sym, close) catch |err| log.warn("loadPrices cache-hit put({s}): {t}", .{ sym, err });
}
result.cached_count += 1;
if (progress) |p| p.emit(i, total, sym, .cached);
@ -1205,7 +1251,7 @@ pub const DataService = struct {
defer self.allocator.free(candle_result.data);
if (candle_result.data.len > 0) {
const last = candle_result.data[candle_result.data.len - 1];
prices.put(sym, last.close) catch {};
prices.put(sym, last.close) catch |err| log.warn("loadPrices candle-close put({s}): {t}", .{ sym, err });
if (result.latest_date == null or last.date.days > result.latest_date.?.days) {
result.latest_date = last.date;
}
@ -1218,7 +1264,7 @@ pub const DataService = struct {
// 3. Fetch failed fall back to stale cache
result.fail_count += 1;
if (self.getCachedLastClose(sym)) |close| {
prices.put(sym, close) catch {};
prices.put(sym, close) catch |err| log.warn("loadPrices stale-fallback put({s}): {t}", .{ sym, err });
result.stale_count += 1;
if (progress) |p| p.emit(i, total, sym, .failed_used_stale);
} else {
@ -1355,9 +1401,9 @@ pub const DataService = struct {
defer all_symbols.deinit(self.allocator);
if (portfolio_syms) |ps| {
for (ps) |sym| all_symbols.append(self.allocator, sym) catch {};
for (ps) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append portfolio sym({s}): {t}", .{ sym, err });
}
for (watch_syms) |sym| all_symbols.append(self.allocator, sym) catch {};
for (watch_syms) |sym| all_symbols.append(self.allocator, sym) catch |err| log.warn("loadAllPrices append watch sym({s}): {t}", .{ sym, err });
// Invalidate cache if force refresh
if (config.force_refresh) {
@ -1375,12 +1421,12 @@ pub const DataService = struct {
for (all_symbols.items) |sym| {
if (!config.force_refresh and self.isCandleCacheFresh(sym)) {
if (self.getCachedLastClose(sym)) |close| {
result.prices.put(sym, close) catch {};
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err });
self.updateLatestDate(&result, sym);
}
result.cached_count += 1;
} else {
needs_fetch.append(self.allocator, sym) catch {};
needs_fetch.append(self.allocator, sym) catch |err| log.warn("loadAllPrices needs_fetch append({s}): {t}", .{ sym, err });
}
}
@ -1397,7 +1443,7 @@ pub const DataService = struct {
if (config.skip_network) {
for (needs_fetch.items) |sym| {
if (self.getCachedLastClose(sym)) |close| {
result.prices.put(sym, close) catch {};
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices cache-hit put({s}): {t}", .{ sym, err });
self.updateLatestDate(&result, sym);
result.stale_count += 1;
} else {
@ -1424,7 +1470,7 @@ pub const DataService = struct {
} else {
// No server all need provider fetch
for (needs_fetch.items) |sym| {
server_failures.append(self.allocator, sym) catch {};
server_failures.append(self.allocator, sym) catch |err| log.warn("loadAllPrices server_failures append({s}): {t}", .{ sym, err });
}
}
@ -1469,7 +1515,7 @@ pub const DataService = struct {
var next_index = AtomicCounter{};
const sync_results = self.allocator.alloc(ServerSyncResult, symbols.len) catch {
// Allocation failed fall back to marking all as failures
for (symbols) |sym| failures.append(self.allocator, sym) catch {};
for (symbols) |sym| failures.append(self.allocator, sym) catch |err| log.warn("parallelFetch slots-alloc-fallback failures append({s}): {t}", .{ sym, err });
return;
};
defer self.allocator.free(sync_results);
@ -1481,7 +1527,7 @@ pub const DataService = struct {
// Spawn worker threads
var threads = self.allocator.alloc(std.Thread, thread_count) catch {
for (symbols) |sym| failures.append(self.allocator, sym) catch {};
for (symbols) |sym| failures.append(self.allocator, sym) catch |err| log.warn("parallelFetch threads-alloc-fallback failures append({s}): {t}", .{ sym, err });
return;
};
defer self.allocator.free(threads);
@ -1526,7 +1572,7 @@ pub const DataService = struct {
// Progress reporting while waiting
if (aggregate_progress) |p| {
while (completed.load() < symbols.len) {
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(50), .awake) catch {};
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(50), .awake) catch |err| log.debug("parallelFetch progress-poll sleep interrupted: {t}", .{err});
p.emit(result.cached_count + completed.load(), total_count, .server_sync);
}
}
@ -1541,15 +1587,15 @@ pub const DataService = struct {
if (sr.success) {
// Server sync succeeded read from cache
if (self.getCachedLastClose(sr.symbol)) |close| {
result.prices.put(sr.symbol, close) catch {};
result.prices.put(sr.symbol, close) catch |err| log.warn("syncFromServer cache-after-sync put({s}): {t}", .{ sr.symbol, err });
self.updateLatestDate(result, sr.symbol);
result.server_synced_count += 1;
} else {
// Sync said success but can't read cache treat as failure
failures.append(self.allocator, sr.symbol) catch {};
failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer success-but-no-cache failures append({s}): {t}", .{ sr.symbol, err });
}
} else {
failures.append(self.allocator, sr.symbol) catch {};
failures.append(self.allocator, sr.symbol) catch |err| log.warn("syncFromServer fail-result failures append({s}): {t}", .{ sr.symbol, err });
}
}
}
@ -1576,7 +1622,7 @@ pub const DataService = struct {
defer self.allocator.free(candle_result.data);
if (candle_result.data.len > 0) {
const last = candle_result.data[candle_result.data.len - 1];
result.prices.put(sym, last.close) catch {};
result.prices.put(sym, last.close) catch |err| log.warn("loadAllPrices candle-close put({s}): {t}", .{ sym, err });
if (result.latest_date == null or last.date.days > result.latest_date.?.days) {
result.latest_date = last.date;
}
@ -1589,7 +1635,7 @@ pub const DataService = struct {
// Provider failed try stale cache
result.failed_count += 1;
if (self.getCachedLastClose(sym)) |close| {
result.prices.put(sym, close) catch {};
result.prices.put(sym, close) catch |err| log.warn("loadAllPrices stale-fallback put({s}): {t}", .{ sym, err });
result.stale_count += 1;
if (progress) |p| p.emit(display_idx, total, sym, .failed_used_stale);
} else {
@ -1682,7 +1728,7 @@ pub const DataService = struct {
// Ensure cache dir exists
if (std.fs.path.dirnamePosix(path)) |dir| {
std.Io.Dir.cwd().createDirPath(self.io, dir) catch {};
std.Io.Dir.cwd().createDirPath(self.io, dir) catch |err| log.warn("audit-log createDirPath({s}): {t}", .{ dir, err });
}
// Read existing cache if present.
@ -1709,7 +1755,7 @@ pub const DataService = struct {
@memcpy(combined[0..existing.len], existing);
@memcpy(combined[existing.len..], encoded);
atomic.writeFileAtomic(self.io, self.allocator, path, combined) catch {};
atomic.writeFileAtomic(self.io, self.allocator, path, combined) catch |err| log.warn("audit-log writeFileAtomic({s}): {t}", .{ path, err });
}
// Utility
@ -1720,7 +1766,7 @@ pub const DataService = struct {
if (self.td) |*td| {
td.rate_limiter.backoff();
} else {
std.Io.sleep(self.io, std.Io.Duration.fromSeconds(10), .awake) catch {};
std.Io.sleep(self.io, std.Io.Duration.fromSeconds(10), .awake) catch |err| log.debug("rate-limit backoff sleep interrupted: {t}", .{err});
}
}
@ -1767,7 +1813,7 @@ pub const DataService = struct {
"{s}: retrying {s} server sync (attempt {d}/{d}) after {d}ms delay",
.{ symbol, @tagName(data_type), attempt + 1, max_attempts, retry_delay_ms },
);
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(retry_delay_ms), .awake) catch {};
std.Io.sleep(self.io, std.Io.Duration.fromMilliseconds(retry_delay_ms), .awake) catch |err| log.debug("syncFromServer retry-delay sleep interrupted: {t}", .{err});
}
switch (self.tryOneSync(symbol, data_type, full_url)) {
.ok => return true,