zfin/src/commands/enrich.zig

283 lines
11 KiB
Zig

const std = @import("std");
const zfin = @import("../root.zig");
const cli = @import("common.zig");
const framework = @import("framework.zig");
const fmt = @import("../format.zig");
const isCusipLike = @import("../models/portfolio.zig").isCusipLike;
pub const ParsedArgs = struct {
/// Either a symbol (e.g. "AAPL") or a path to a portfolio file
/// (e.g. "portfolio.srf"). Distinguished by suffix / path-separator
/// heuristic at run time so the user can pass either form.
arg: []const u8,
};
pub const meta: framework.Meta = .{
.name = "enrich",
.group = .hygiene,
.synopsis = "Bootstrap metadata.srf from Alpha Vantage (25 req/day limit)",
.help =
\\Usage: zfin enrich <FILE|SYMBOL>
\\
\\Bootstrap a `metadata.srf` classification file from Alpha
\\Vantage's OVERVIEW endpoint. Two modes:
\\
\\ - File mode (path or `*.srf` suffix): enrich every stock
\\ symbol in the portfolio. Output is a complete SRF file
\\ written to stdout — redirect into metadata.srf and
\\ edit by hand for accuracy.
\\ - Symbol mode (anything else): enrich a single symbol and
\\ emit one appendable SRF line. Useful for adding to an
\\ existing metadata.srf without rerunning the whole file.
\\
\\Caveats: Alpha Vantage's free tier is 25 requests/day. The
\\OVERVIEW data is US-domicile-biased — international ETFs
\\classify as `geo::US`. Always review the output before
\\saving as `metadata.srf`. Requires ALPHAVANTAGE_API_KEY.
\\
\\Examples:
\\ zfin enrich portfolio.srf > metadata.srf # whole portfolio
\\ zfin enrich AAPL >> metadata.srf # single symbol append
\\
,
.uppercase_first_arg = false,
};
comptime {
framework.validateCommandModule(@This());
}
pub fn parseArgs(ctx: *framework.RunCtx, cmd_args: []const []const u8) !ParsedArgs {
if (cmd_args.len < 1) {
try cli.stderrPrint(ctx.io, "Error: 'enrich' requires a portfolio file path or symbol\n");
return error.MissingArg;
}
if (cmd_args.len > 1) {
try cli.stderrPrint(ctx.io, "Error: 'enrich' takes a single argument (file path or symbol)\n");
return error.UnexpectedArg;
}
return .{ .arg = cmd_args[0] };
}
const OverviewMeta = struct {
sector: []const u8,
geo: []const u8,
asset_class: []const u8,
};
/// Derive sector, geo, and asset_class from an Alpha Vantage company overview.
fn deriveMetadata(overview: zfin.CompanyOverview, sector_buf: []u8) OverviewMeta {
const sector_raw = overview.sector orelse "Unknown";
const sector_str = cli.fmt.toTitleCase(sector_buf, sector_raw);
const country_str = overview.country orelse "US";
const geo_str = if (std.mem.eql(u8, country_str, "USA")) "US" else country_str;
const asset_class_str = blk: {
if (overview.asset_type) |at| {
if (std.mem.eql(u8, at, "ETF")) break :blk "ETF";
if (std.mem.eql(u8, at, "Mutual Fund")) break :blk "Mutual Fund";
}
if (overview.market_cap) |mc_str| {
const mc = std.fmt.parseInt(u64, mc_str, 10) catch 0;
if (mc >= 10_000_000_000) break :blk "US Large Cap";
if (mc >= 2_000_000_000) break :blk "US Mid Cap";
break :blk "US Small Cap";
}
break :blk "US Large Cap";
};
return .{ .sector = sector_str, .geo = geo_str, .asset_class = asset_class_str };
}
/// CLI `enrich` command: bootstrap a metadata.srf file from Alpha Vantage OVERVIEW data.
/// Reads the portfolio, extracts stock symbols, fetches sector/industry/country for each,
/// and outputs a metadata SRF file to stdout.
/// If the argument looks like a symbol (no path separators, no .srf extension), enrich just that symbol.
pub fn run(ctx: *framework.RunCtx, parsed: ParsedArgs) !void {
const svc = ctx.svc orelse return error.MissingDataService;
// Determine if arg is a symbol or a file path
const is_file = std.mem.endsWith(u8, parsed.arg, ".srf") or
std.mem.indexOfScalar(u8, parsed.arg, '/') != null or
std.mem.indexOfScalar(u8, parsed.arg, '.') != null;
if (!is_file) {
// Single symbol mode: enrich one symbol, output appendable SRF (no header)
try enrichSymbol(ctx.io, ctx.allocator, svc, parsed.arg, ctx.out);
return;
}
// Portfolio file mode: enrich all symbols
try enrichPortfolio(ctx.io, ctx.allocator, svc, parsed.arg, ctx.today, ctx.out);
}
/// Enrich a single symbol and output appendable SRF lines to stdout.
fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService, sym: []const u8, out: *std.Io.Writer) !void {
{
var msg_buf: [128]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, " Fetching {s}...\n", .{sym}) catch " ...\n";
try cli.stderrPrint(io, msg);
}
const overview = svc.getCompanyOverview(sym) catch |err| {
if (err == zfin.DataError.NoApiKey) {
try cli.stderrPrint(io, "Error: ALPHAVANTAGE_API_KEY not set. Add it to .env\n");
return;
}
try cli.stderrPrint(io, "Error: Failed to fetch data for symbol\n");
try out.print("# {s} -- fetch failed\n", .{sym});
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n", .{sym});
return;
};
defer {
if (overview.name) |n| allocator.free(n);
if (overview.sector) |s| allocator.free(s);
if (overview.industry) |ind| allocator.free(ind);
if (overview.country) |c| allocator.free(c);
if (overview.market_cap) |mc| allocator.free(mc);
if (overview.asset_type) |at| allocator.free(at);
}
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(overview, &sector_buf);
if (overview.name) |name| {
try out.print("# {s}\n", .{name});
}
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{
sym, derived.sector, derived.geo, derived.asset_class,
});
}
/// Enrich all symbols from a portfolio file.
fn enrichPortfolio(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService, file_path: []const u8, as_of: zfin.Date, out: *std.Io.Writer) !void {
// Load portfolio
const file_data = std.Io.Dir.cwd().readFileAlloc(io, file_path, allocator, .limited(10 * 1024 * 1024)) catch {
try cli.stderrPrint(io, "Error: Cannot read portfolio file\n");
return;
};
defer allocator.free(file_data);
var portfolio = zfin.cache.deserializePortfolio(allocator, file_data) catch {
try cli.stderrPrint(io, "Error: Cannot parse portfolio file\n");
return;
};
defer portfolio.deinit();
// Get unique stock symbols (using display-oriented names)
const positions = try portfolio.positions(as_of, allocator);
defer allocator.free(positions);
// Get unique price symbols (raw API symbols)
const syms = try portfolio.stockSymbols(allocator);
defer allocator.free(syms);
try out.print("#!srfv1\n", .{});
try out.print("# Portfolio classification metadata\n", .{});
try out.print("# Generated from Alpha Vantage OVERVIEW data\n", .{});
try out.print("# Edit as needed: sector, geo, asset_class, pct:num:N\n", .{});
try out.print("#\n", .{});
try out.print("# For ETFs/funds with multi-class exposure, add multiple lines\n", .{});
try out.print("# with pct:num: values that sum to ~100\n\n", .{});
var success: usize = 0;
var skipped: usize = 0;
var failed: usize = 0;
for (syms, 0..) |sym, i| {
// Skip CUSIPs and known non-stock symbols
if (isCusipLike(sym)) {
// Find the display name for this CUSIP
const display: []const u8 = sym;
var note: ?[]const u8 = null;
for (positions) |pos| {
if (std.mem.eql(u8, pos.symbol, sym)) {
if (pos.note) |n| {
note = n;
}
break;
}
}
try out.print("# CUSIP {s}", .{sym});
if (note) |n| try out.print(" ({s})", .{n});
try out.print(" -- fill in manually\n", .{});
try out.print("# symbol::{s},asset_class::TODO,geo::TODO\n\n", .{display});
skipped += 1;
continue;
}
// Progress to stderr
{
var msg_buf: [128]u8 = undefined;
const msg = std.fmt.bufPrint(&msg_buf, " [{d}/{d}] {s}...\n", .{ i + 1, syms.len, sym }) catch " ...\n";
try cli.stderrPrint(io, msg);
}
const overview = svc.getCompanyOverview(sym) catch {
try out.print("# {s} -- fetch failed\n", .{sym});
try out.print("# symbol::{s},sector::TODO,geo::TODO,asset_class::TODO\n\n", .{sym});
failed += 1;
continue;
};
// Free allocated strings from overview when done
defer {
if (overview.name) |n| allocator.free(n);
if (overview.sector) |s| allocator.free(s);
if (overview.industry) |ind| allocator.free(ind);
if (overview.country) |c| allocator.free(c);
if (overview.market_cap) |mc| allocator.free(mc);
if (overview.asset_type) |at| allocator.free(at);
}
var sector_buf: [64]u8 = undefined;
const derived = deriveMetadata(overview, &sector_buf);
// Comment with the name for readability
if (overview.name) |name| {
try out.print("# {s}\n", .{name});
}
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{
sym, derived.sector, derived.geo, derived.asset_class,
});
success += 1;
}
// Summary comment
try out.print("# ---\n", .{});
try out.print("# Enriched {d} symbols ({d} success, {d} skipped, {d} failed)\n", .{
syms.len, success, skipped, failed,
});
try out.print("# Review and edit this file, then save as metadata.srf\n", .{});
}
// ── Tests ────────────────────────────────────────────────────
test "parseArgs: accepts a symbol argument" {
var ctx: framework.RunCtx = undefined;
ctx.io = std.testing.io;
const args = [_][]const u8{"AAPL"};
const parsed = try parseArgs(&ctx, &args);
try std.testing.expectEqualStrings("AAPL", parsed.arg);
}
test "parseArgs: accepts a file path argument" {
var ctx: framework.RunCtx = undefined;
ctx.io = std.testing.io;
const args = [_][]const u8{"portfolio.srf"};
const parsed = try parseArgs(&ctx, &args);
try std.testing.expectEqualStrings("portfolio.srf", parsed.arg);
}
test "parseArgs: missing arg errors" {
var ctx: framework.RunCtx = undefined;
ctx.io = std.testing.io;
const args = [_][]const u8{};
try std.testing.expectError(error.MissingArg, parseArgs(&ctx, &args));
}
test "parseArgs: extra args error" {
var ctx: framework.RunCtx = undefined;
ctx.io = std.testing.io;
const args = [_][]const u8{ "AAPL", "extra" };
try std.testing.expectError(error.UnexpectedArg, parseArgs(&ctx, &args));
}