better reporting on http related issues
This commit is contained in:
parent
d3b1c04a3a
commit
e17e76905f
2 changed files with 191 additions and 33 deletions
194
src/net/http.zig
194
src/net/http.zig
|
|
@ -1,19 +1,55 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub const HttpError = error{
|
||||
RequestFailed,
|
||||
RateLimited,
|
||||
Unauthorized,
|
||||
NotFound,
|
||||
/// HTTP 402 Payment Required — used by FMP to mark symbols (mainly ETFs,
|
||||
/// mutual funds, CUSIPs, and some dual-class shares) that aren't covered
|
||||
/// by the caller's current plan. Providers should translate this into
|
||||
/// "no data" rather than a hard failure.
|
||||
PaymentRequired,
|
||||
ServerError,
|
||||
InvalidResponse,
|
||||
OutOfMemory,
|
||||
};
|
||||
const log = std.log.scoped(.http);
|
||||
|
||||
/// Public error set for the HTTP client wrapper. Merges the stdlib
|
||||
/// transport error sets directly so that callers see meaningful Zig
|
||||
/// error names (`NoAddressReturned`, `ConnectionRefused`,
|
||||
/// `EndOfStream`, `TlsInitializationFailed`, ...) at the boundary
|
||||
/// instead of a single opaque `RequestFailed`.
|
||||
///
|
||||
/// Why this shape, not `anyerror` and not a hand-rolled flat set:
|
||||
/// - Callers get exhaustive-switch checking from the compiler when
|
||||
/// they need to discriminate between, say, DNS failures and
|
||||
/// connection failures.
|
||||
/// - The error names propagate through `@errorName(err)` to log
|
||||
/// lines without per-stage translation tables. An operator
|
||||
/// reading "DnsLookupFailed" or "ConnectFailed" learns less
|
||||
/// than one reading "NoAddressReturned" — the stdlib variant
|
||||
/// is the truth, ours would be a lossy summary.
|
||||
/// - The set is the same shape as stdlib's own
|
||||
/// `http.Client.ConnectError = ConnectTcpError || RequestError`
|
||||
/// pattern. Zig dedupes shared variants automatically.
|
||||
///
|
||||
/// HTTP-status classifications (`RateLimited`, `NotFound`, etc.) live
|
||||
/// alongside the transport variants because callers consume both via
|
||||
/// the same `try` sites and the same error switches.
|
||||
pub const HttpError = std.Uri.ParseError ||
|
||||
std.http.Client.RequestError ||
|
||||
std.http.Client.Request.ReceiveHeadError ||
|
||||
std.Io.Writer.Error ||
|
||||
std.Io.Reader.Error ||
|
||||
std.mem.Allocator.Error ||
|
||||
error{
|
||||
/// Last-resort transport failure. Used only when we cannot
|
||||
/// recover the underlying stdlib error (e.g., across the
|
||||
/// retry boundary in `request()` after multiple distinct
|
||||
/// failures collapsed into a single retry exhaustion).
|
||||
/// Adding new uses of this variant is a smell — prefer
|
||||
/// preserving the real error.
|
||||
RequestFailed,
|
||||
// ── HTTP status classifications (no stdlib equivalent) ──
|
||||
RateLimited,
|
||||
Unauthorized,
|
||||
NotFound,
|
||||
/// HTTP 402 Payment Required — used by FMP to mark symbols (mainly ETFs,
|
||||
/// mutual funds, CUSIPs, and some dual-class shares) that aren't covered
|
||||
/// by the caller's current plan. Providers should translate this into
|
||||
/// "no data" rather than a hard failure.
|
||||
PaymentRequired,
|
||||
ServerError,
|
||||
InvalidResponse,
|
||||
};
|
||||
|
||||
pub const Response = struct {
|
||||
status: std.http.Status,
|
||||
|
|
@ -127,15 +163,26 @@ pub const Client = struct {
|
|||
}
|
||||
|
||||
pub fn request(self: *Client, method: std.http.Method, url: []const u8, body: ?[]const u8, extra_headers: []const std.http.Header) HttpError!Response {
|
||||
// Preserves the last underlying error across retry attempts.
|
||||
// Without this, the retry loop used to collapse every attempt's
|
||||
// distinct failure into a single `HttpError.RequestFailed`,
|
||||
// which is exactly the kind of opaque error that hides root
|
||||
// causes. A DNS-truncation bug surfaced for hours as bare
|
||||
// "RequestFailed" because the per-attempt error was discarded
|
||||
// by `catch {}`. Now the caller's `@errorName(err)` reports
|
||||
// the real cause (e.g., `NoAddressReturned`).
|
||||
var attempt: u8 = 0;
|
||||
var last_err: HttpError = HttpError.RequestFailed;
|
||||
while (true) : (attempt += 1) {
|
||||
const response = self.doRequest(method, url, body, extra_headers) catch {
|
||||
if (attempt >= self.max_retries) return HttpError.RequestFailed;
|
||||
const response = self.doRequest(method, url, body, extra_headers) catch |err| {
|
||||
last_err = err;
|
||||
if (attempt >= self.max_retries) return last_err;
|
||||
self.backoffSleep(attempt);
|
||||
continue;
|
||||
};
|
||||
return classifyResponse(response) catch |err| {
|
||||
if (err == HttpError.ServerError and attempt < self.max_retries) {
|
||||
last_err = err;
|
||||
self.backoffSleep(attempt);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -150,28 +197,97 @@ pub const Client = struct {
|
|||
}
|
||||
|
||||
fn doRequest(self: *Client, method: std.http.Method, url: []const u8, body: ?[]const u8, extra_headers: []const std.http.Header) HttpError!Response {
|
||||
const uri = std.Uri.parse(url) catch return HttpError.RequestFailed;
|
||||
// Per-stage timing for diagnosing where transport-level stalls
|
||||
// occur. Zig 0.16's `std.http.Client` exposes no per-request
|
||||
// receive timeout (only `connectTcpOptions.timeout`), so a
|
||||
// request can hang forever in `receiveHead` or `streamRemaining`
|
||||
// with no upstream visibility. The per-stage logging lets the
|
||||
// operator pinpoint exactly which network operation wedged
|
||||
// once they Ctrl-C, even though no `catch` will fire on a
|
||||
// truly infinite stall.
|
||||
//
|
||||
// Each stage that may fail logs the underlying stdlib error
|
||||
// name verbatim before propagating it through `HttpError`.
|
||||
// The error returned to the caller IS the underlying stdlib
|
||||
// error (`NoAddressReturned`, `ConnectionRefused`,
|
||||
// `EndOfStream`, `TlsInitializationFailed`, ...) — `HttpError`
|
||||
// is a merged superset of the relevant stdlib error sets, so
|
||||
// `try` here propagates the original error verbatim. Earlier
|
||||
// versions of this function caught every error and rethrew as
|
||||
// a single `HttpError.RequestFailed`, which collapsed six
|
||||
// distinct failure modes into one and made operator
|
||||
// diagnosis impossible (this cost hours of debugging at
|
||||
// least once when an intermittent DNS-truncation bug
|
||||
// surfaced as bare "RequestFailed").
|
||||
//
|
||||
// wall-clock required: per-stage transport timing. Uses
|
||||
// `.awake` (monotonic) so a system clock jump mid-request
|
||||
// doesn't produce nonsense elapsed values.
|
||||
const t_start = std.Io.Timestamp.now(self.io, .awake).nanoseconds;
|
||||
var t_stage = t_start;
|
||||
const stageElapsedMs = struct {
|
||||
fn f(prev: *i96, io: std.Io) i64 {
|
||||
const now = std.Io.Timestamp.now(io, .awake).nanoseconds;
|
||||
const delta_ns = now - prev.*;
|
||||
prev.* = now;
|
||||
return @intCast(@divTrunc(delta_ns, std.time.ns_per_ms));
|
||||
}
|
||||
}.f;
|
||||
|
||||
const uri = std.Uri.parse(url) catch |err| {
|
||||
log.warn("http {s}: stage=uri_parse err={s} url={s}", .{ @tagName(method), @errorName(err), url });
|
||||
return err;
|
||||
};
|
||||
const ms_uri_parse = stageElapsedMs(&t_stage, self.io);
|
||||
|
||||
var req = self.http_client.request(method, uri, .{
|
||||
.redirect_behavior = @enumFromInt(3),
|
||||
.extra_headers = extra_headers,
|
||||
}) catch return HttpError.RequestFailed;
|
||||
}) catch |err| {
|
||||
// The connect stage covers DNS lookup, TCP connect, and
|
||||
// TLS handshake. Logging at warn level (rather than debug)
|
||||
// because DNS / connectivity failures are exactly what
|
||||
// operators need to see immediately.
|
||||
log.warn("http {s}: stage=connect err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
defer req.deinit();
|
||||
const ms_connect = stageElapsedMs(&t_stage, self.io);
|
||||
|
||||
if (body) |payload| {
|
||||
var send_buf: [4096]u8 = undefined;
|
||||
req.transfer_encoding = .{ .content_length = payload.len };
|
||||
var bw = req.sendBodyUnflushed(&send_buf) catch return HttpError.RequestFailed;
|
||||
bw.writer.writeAll(payload) catch return HttpError.RequestFailed;
|
||||
bw.end() catch return HttpError.RequestFailed;
|
||||
req.connection.?.flush() catch return HttpError.RequestFailed;
|
||||
var bw = req.sendBodyUnflushed(&send_buf) catch |err| {
|
||||
log.warn("http {s}: stage=send_body_open err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
bw.writer.writeAll(payload) catch |err| {
|
||||
log.warn("http {s}: stage=send_body_write err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
bw.end() catch |err| {
|
||||
log.warn("http {s}: stage=send_body_end err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
req.connection.?.flush() catch |err| {
|
||||
log.warn("http {s}: stage=send_body_flush err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
} else {
|
||||
req.sendBodiless() catch return HttpError.RequestFailed;
|
||||
req.sendBodiless() catch |err| {
|
||||
log.warn("http {s}: stage=send_bodiless err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
}
|
||||
const ms_send = stageElapsedMs(&t_stage, self.io);
|
||||
|
||||
// Matches the default redirect capacity in std.http.Client.fetch.
|
||||
var redirect_buffer: [8 * 1024]u8 = undefined;
|
||||
var response = req.receiveHead(&redirect_buffer) catch return HttpError.RequestFailed;
|
||||
var response = req.receiveHead(&redirect_buffer) catch |err| {
|
||||
log.warn("http {s}: stage=receive_head err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
const ms_receive_head = stageElapsedMs(&t_stage, self.io);
|
||||
|
||||
// Capture the ETag (if any) from the response head BEFORE
|
||||
// draining the body. `Response.reader()` invalidates
|
||||
|
|
@ -181,8 +297,7 @@ pub const Client = struct {
|
|||
var it = response.head.iterateHeaders();
|
||||
while (it.next()) |h| {
|
||||
if (std.ascii.eqlIgnoreCase(h.name, "etag")) {
|
||||
const dup = self.allocator.dupe(u8, h.value) catch
|
||||
return HttpError.OutOfMemory;
|
||||
const dup = self.allocator.dupe(u8, h.value) catch |err| return err;
|
||||
break :blk dup;
|
||||
}
|
||||
}
|
||||
|
|
@ -205,13 +320,30 @@ pub const Client = struct {
|
|||
var decompress: std.http.Decompress = undefined;
|
||||
var decompress_buffer: [64 * 1024]u8 = undefined;
|
||||
const reader = response.readerDecompressing(&transfer_buffer, &decompress, &decompress_buffer);
|
||||
_ = reader.streamRemaining(&aw.writer) catch {
|
||||
return HttpError.RequestFailed;
|
||||
_ = reader.streamRemaining(&aw.writer) catch |err| {
|
||||
log.warn("http {s}: stage=stream_body err={s} elapsed_ms={d} url={s}", .{ @tagName(method), @errorName(err), stageElapsedMs(&t_stage, self.io), url });
|
||||
return err;
|
||||
};
|
||||
const ms_body = stageElapsedMs(&t_stage, self.io);
|
||||
|
||||
const resp_body = aw.toOwnedSlice() catch {
|
||||
return HttpError.OutOfMemory;
|
||||
};
|
||||
const resp_body = aw.toOwnedSlice() catch |err| return err;
|
||||
|
||||
const total_ms = @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms);
|
||||
log.debug(
|
||||
"http {s}: ok status={d} bytes={d} total_ms={d} (uri_parse={d} connect={d} send={d} receive_head={d} body={d}) url={s}",
|
||||
.{
|
||||
@tagName(method),
|
||||
@intFromEnum(response.head.status),
|
||||
resp_body.len,
|
||||
total_ms,
|
||||
ms_uri_parse,
|
||||
ms_connect,
|
||||
ms_send,
|
||||
ms_receive_head,
|
||||
ms_body,
|
||||
url,
|
||||
},
|
||||
);
|
||||
|
||||
return .{
|
||||
.status = response.head.status,
|
||||
|
|
|
|||
|
|
@ -1489,13 +1489,35 @@ pub const DataService = struct {
|
|||
/// One attempt at syncing a file from the server. Archives a torn
|
||||
/// body when detected but does NOT retry — the caller decides that.
|
||||
fn tryOneSync(self: *DataService, symbol: []const u8, data_type: cache.DataType, full_url: []const u8) SyncAttempt {
|
||||
log.debug("{s}: syncing {s} from server", .{ symbol, @tagName(data_type) });
|
||||
// Per-attempt start/finish trace. The "started" line emits
|
||||
// before any blocking call; the "finished" line emits on every
|
||||
// exit path. If a sync wedges in `client.get`, you'll see the
|
||||
// started line with no matching finished line — the missing
|
||||
// finished entries identify which symbols are stuck. Pair this
|
||||
// with the per-stage `http: stage=...` lines from `net/http.zig`
|
||||
// to pinpoint which transport stage stalled.
|
||||
//
|
||||
// wall-clock required: per-attempt elapsed for diagnosing
|
||||
// partial-success/stall patterns under parallel fan-out.
|
||||
// `.awake` (monotonic) avoids spurious negatives on clock skew.
|
||||
const t_start = std.Io.Timestamp.now(self.io, .awake).nanoseconds;
|
||||
log.debug("{s}: tryOneSync started ({s})", .{ symbol, @tagName(data_type) });
|
||||
|
||||
var client = http.Client.init(self.io, self.allocator);
|
||||
defer client.deinit();
|
||||
|
||||
var response = client.get(full_url) catch |err| {
|
||||
log.debug("{s}: server sync failed for {s}: {s}", .{ symbol, @tagName(data_type), @errorName(err) });
|
||||
const elapsed_ms = @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms);
|
||||
// Operator-visible: surfaces meaningful failures
|
||||
// (`NoAddressReturned`, `ConnectionRefused`,
|
||||
// `TlsInitializationFailed`, etc.) instead of swallowing
|
||||
// them. Network-shaped errors are exactly what the user
|
||||
// needs to see when sync stops working — keeping this at
|
||||
// debug level meant a DNS-truncation bug was visible only
|
||||
// to anyone running with debug logging on, which cost
|
||||
// hours of diagnosis time.
|
||||
log.warn("{s}: server sync failed for {s}: {s} (elapsed_ms={d})", .{ symbol, @tagName(data_type), @errorName(err), elapsed_ms });
|
||||
log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), elapsed_ms });
|
||||
return .net_err;
|
||||
};
|
||||
defer response.deinit();
|
||||
|
|
@ -1535,6 +1557,7 @@ pub const DataService = struct {
|
|||
"{s}: {s} server response failed integrity check ({d} bytes, expected sha256={s}, actual={s}) — archived under _torn/, not writing to cache",
|
||||
.{ symbol, @tagName(data_type), response.body.len, m.expected_hex, m.actual_hex },
|
||||
);
|
||||
log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
||||
return .torn;
|
||||
},
|
||||
.ok, .not_applicable => {},
|
||||
|
|
@ -1578,6 +1601,7 @@ pub const DataService = struct {
|
|||
"{s}: rejecting torn {s} server response ({d} bytes) — archived under _torn/, not writing to cache",
|
||||
.{ symbol, @tagName(data_type), response.body.len },
|
||||
);
|
||||
log.debug("{s}: tryOneSync finished ({s}) result=torn elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
||||
return .torn;
|
||||
}
|
||||
|
||||
|
|
@ -1585,9 +1609,11 @@ pub const DataService = struct {
|
|||
var s = self.store();
|
||||
s.writeRaw(symbol, data_type, response.body) catch |err| {
|
||||
log.debug("{s}: failed to write synced {s} to cache: {s}", .{ symbol, @tagName(data_type), @errorName(err) });
|
||||
log.debug("{s}: tryOneSync finished ({s}) result=net_err elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
||||
return .net_err;
|
||||
};
|
||||
log.debug("{s}: synced {s} from server ({d} bytes)", .{ symbol, @tagName(data_type), response.body.len });
|
||||
log.debug("{s}: tryOneSync finished ({s}) result=ok elapsed_ms={d}", .{ symbol, @tagName(data_type), @divTrunc(std.Io.Timestamp.now(self.io, .awake).nanoseconds - t_start, std.time.ns_per_ms) });
|
||||
return .ok;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue