update enrich to output names

This commit is contained in:
Emil Lerch 2026-06-10 11:09:57 -07:00
parent 14f55afb28
commit cd6e22f5ba
Signed by: lobo
GPG key ID: A7B62D657EF764F8
2 changed files with 125 additions and 22 deletions

View file

@ -1,9 +1,11 @@
const std = @import("std");
const srf = @import("srf");
const zfin = @import("../root.zig");
const cli = @import("common.zig");
const framework = @import("framework.zig");
const isCusipLike = @import("../models/portfolio.zig").isCusipLike;
const ClassificationRecord = zfin.classification.ClassificationRecord;
const ClassificationEntry = zfin.classification.ClassificationEntry;
pub const ParsedArgs = struct {
/// Optional symbol (e.g. "AAPL"). Null = portfolio mode (uses
@ -356,9 +358,7 @@ fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService
if (c.name) |name| {
try out.print("# {s}\n", .{name});
}
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{
sym, derived.sector, derived.geo, derived.asset_class,
});
try emitRecordLine(out, sym, c.name, derived.sector, derived.geo, derived.asset_class, null);
}
stderrSymbolProvenance(io, sym, kindFromSource(c.source), null);
@ -405,7 +405,7 @@ fn emitEtfRows(
} else {
try out.print("# {s}\n", .{sym});
}
try emitFundLines(sym, asset_class, sectors, c.sector, geo, out);
try emitFundLines(sym, c.name, asset_class, sectors, c.sector, geo, out);
}
/// Wikidata didn't return a classification for `sym` (either the
@ -478,6 +478,7 @@ fn hasDominantEquitySector(fund_sectors: ?[]const FundSector) bool {
/// available.
fn emitFundLines(
sym: []const u8,
name: ?[]const u8,
asset_class: []const u8,
fund_sectors: ?[]const FundSector,
inferred_sector: ?[]const u8,
@ -501,10 +502,7 @@ fn emitFundLines(
inferred_sector.?
else
s.description;
try out.print(
"symbol::{s},sector::{s},geo::{s},asset_class::{s},pct:num:{d:.2}\n",
.{ sym, sector_str, geo_str, asset_class, s.pct },
);
try emitRecordLine(out, sym, name, sector_str, geo_str, asset_class, s.pct);
}
return;
}
@ -513,7 +511,42 @@ fn emitFundLines(
// one TODO line but if title-keyword inference returned
// a sector, use it instead of "TODO".
const sector_str = inferred_sector orelse "TODO";
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, sector_str, geo_str, asset_class });
try emitRecordLine(out, sym, name, sector_str, geo_str, asset_class, null);
}
/// Emit one classification record line. Delegates to the SRF
/// library's writer-side formatter that handles field ordering
/// (driven by `ClassificationEntry`'s field declaration order),
/// escaping for values containing commas/newlines, and default-
/// value elision (e.g. an entry with `pct = 100.0` omits the
/// `pct:num:` field; null-valued optional fields are omitted
/// entirely).
///
/// `emit_directives = false` suppresses the `#!srfv1` header so
/// this can be called per-record interspersed with the comment
/// scaffold the enrich output uses.
fn emitRecordLine(
out: *std.Io.Writer,
sym: []const u8,
name: ?[]const u8,
sector: []const u8,
geo: []const u8,
asset_class: []const u8,
pct: ?f64,
) !void {
const entry: ClassificationEntry = .{
.symbol = sym,
.name = name,
.sector = sector,
.geo = geo,
.asset_class = asset_class,
// The default is 100.0; setting it explicitly here
// (not via `if (pct) |p| p else 100.0`) so the formatter's
// default-elision rule keeps single-class rows lean.
.pct = pct orelse 100.0,
};
const items = [_]ClassificationEntry{entry};
try out.print("{f}", .{srf.fmt(ClassificationEntry, &items, .{ .emit_directives = false })});
}
/// What `getEtfMetrics` provides that `enrich` actually uses:
@ -743,9 +776,8 @@ fn enrichPortfolio(ctx: *framework.RunCtx, svc: *zfin.DataService) !void {
if (c.name) |name| {
try out.print("# {s}\n", .{name});
}
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{
sym, derived.sector, derived.geo, derived.asset_class,
});
try emitRecordLine(out, sym, c.name, derived.sector, derived.geo, derived.asset_class, null);
try out.print("\n", .{});
}
switch (kindFromSource(c.source)) {
@ -1209,13 +1241,56 @@ test "hasDominantEquitySector: null and empty -> false" {
test "emitFundLines: null sectors -> single TODO line" {
var out_buf: [256]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
try emitFundLines("VTI", "ETF", null, null, null, &out);
try emitFundLines("VTI", null, "ETF", null, null, null, &out);
try std.testing.expectEqualStrings(
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
out.buffered(),
);
}
test "emitFundLines: name field is emitted as `name::Foo` between symbol and sector" {
var out_buf: [512]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
const sectors = [_]FundSector{
.{ .description = "Equity / Corporate", .pct = 99.5 },
};
try emitFundLines("SPY", "SPDR S&P 500 ETF Trust", "ETF", sectors[0..], null, null, &out);
try std.testing.expectEqualStrings(
"symbol::SPY,name::SPDR S&P 500 ETF Trust,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.5\n",
out.buffered(),
);
}
test "emitRecordLine: name=null and pct=null produces shortest form" {
var out_buf: [256]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
try emitRecordLine(&out, "AAPL", null, "Technology", "US", "US Large Cap", null);
try std.testing.expectEqualStrings(
"symbol::AAPL,sector::Technology,geo::US,asset_class::US Large Cap\n",
out.buffered(),
);
}
test "emitRecordLine: name set, pct null" {
var out_buf: [256]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
try emitRecordLine(&out, "AAPL", "Apple Inc", "Technology", "US", "US Large Cap", null);
try std.testing.expectEqualStrings(
"symbol::AAPL,name::Apple Inc,sector::Technology,geo::US,asset_class::US Large Cap\n",
out.buffered(),
);
}
test "emitRecordLine: name set, pct set (multi-class fund row)" {
var out_buf: [256]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
try emitRecordLine(&out, "FAGIX", "Fidelity Capital and Income Fund", "Debt / Corporate", "US", "Fund", 47.69);
try std.testing.expectEqualStrings(
"symbol::FAGIX,name::Fidelity Capital and Income Fund,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69\n",
out.buffered(),
);
}
test "emitFundLines: populated sectors -> one line per sector with pct" {
var out_buf: [512]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
@ -1223,7 +1298,7 @@ test "emitFundLines: populated sectors -> one line per sector with pct" {
.{ .description = "Debt / Corporate", .pct = 47.69 },
.{ .description = "Equity / Corporate", .pct = 22.49 },
};
try emitFundLines("FAGIX", "Fund", sectors[0..], null, null, &out);
try emitFundLines("FAGIX", null, "Fund", sectors[0..], null, null, &out);
const written = out.buffered();
try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69") != null);
@ -1235,7 +1310,7 @@ test "emitFundLines: empty slice -> single TODO line (treats empty as null)" {
var out_buf: [256]u8 = undefined;
var out: std.Io.Writer = .fixed(&out_buf);
const empty: [0]FundSector = .{};
try emitFundLines("VTI", "ETF", empty[0..], null, null, &out);
try emitFundLines("VTI", null, "ETF", empty[0..], null, null, &out);
try std.testing.expectEqualStrings(
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
out.buffered(),
@ -1251,7 +1326,7 @@ test "emitFundLines: negative pct values render correctly" {
.{ .description = "Repurchase Agreement / Other", .pct = -29.72 },
.{ .description = "Derivative-FX / Other", .pct = -0.84 },
};
try emitFundLines("PTY", "Fund", sectors[0..], null, null, &out);
try emitFundLines("PTY", null, "Fund", sectors[0..], null, null, &out);
const written = out.buffered();
try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-29.72") != null);
@ -1264,7 +1339,7 @@ test "emitFundLines: ETF asset_class flows through" {
const sectors = [_]FundSector{
.{ .description = "Equity / Corporate", .pct = 99.86 },
};
try emitFundLines("SOXX", "ETF", sectors[0..], null, null, &out);
try emitFundLines("SOXX", null, "ETF", sectors[0..], null, null, &out);
try std.testing.expectEqualStrings(
"symbol::SOXX,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.86\n",
out.buffered(),

View file

@ -15,6 +15,11 @@ const srf = @import("srf");
/// A single classification entry for a symbol.
pub const ClassificationEntry = struct {
symbol: []const u8,
/// Human-readable security name (e.g., "Amazon", "SPDR S&P 500
/// ETF Trust"). Optional — older metadata.srf files may not
/// have this field. Renderers fall back to `symbol` /
/// `display_symbol` when null.
name: ?[]const u8 = null,
/// Sector (e.g., "Technology", "Healthcare", "Financials")
sector: ?[]const u8 = null,
/// Geographic region (e.g., "US", "International Developed", "Emerging Markets")
@ -33,6 +38,7 @@ pub const ClassificationMap = struct {
pub fn deinit(self: *ClassificationMap) void {
for (self.entries) |e| {
self.allocator.free(e.symbol);
if (e.name) |n| self.allocator.free(n);
if (e.sector) |s| self.allocator.free(s);
if (e.geo) |g| self.allocator.free(g);
if (e.asset_class) |a| self.allocator.free(a);
@ -42,13 +48,14 @@ pub const ClassificationMap = struct {
};
/// Parse a metadata SRF file into a ClassificationMap.
/// Each record has: symbol::<SYM>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
/// Each record has: symbol::<SYM>,name::<N>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
/// All fields except symbol are optional. pct defaults to 100.
pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !ClassificationMap {
var entries = std.ArrayList(ClassificationEntry).empty;
errdefer {
for (entries.items) |e| {
allocator.free(e.symbol);
if (e.name) |n| allocator.free(n);
if (e.sector) |s| allocator.free(s);
if (e.geo) |g| allocator.free(g);
if (e.asset_class) |a| allocator.free(a);
@ -64,6 +71,7 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !
const entry = fields.to(ClassificationEntry, .{}) catch continue;
try entries.append(allocator, .{
.symbol = try allocator.dupe(u8, entry.symbol),
.name = if (entry.name) |n| try allocator.dupe(u8, n) else null,
.sector = if (entry.sector) |s| try allocator.dupe(u8, s) else null,
.geo = if (entry.geo) |g| try allocator.dupe(u8, g) else null,
.asset_class = if (entry.asset_class) |a| try allocator.dupe(u8, a) else null,
@ -81,12 +89,12 @@ test "parse classification file" {
const data =
\\#!srfv1
\\# Stock: single sector
\\symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap
\\symbol::AMZN,name::Amazon,sector::Technology,geo::US,asset_class::US Large Cap
\\
\\# Target date fund: blended
\\symbol::TGT2035,asset_class::US Large Cap,pct:num:55
\\symbol::TGT2035,asset_class::Bonds,pct:num:15
\\symbol::TGT2035,asset_class::International Developed,pct:num:20
\\symbol::TGT2035,name::Target Retirement 2035,asset_class::US Large Cap,pct:num:55
\\symbol::TGT2035,name::Target Retirement 2035,asset_class::Bonds,pct:num:15
\\symbol::TGT2035,name::Target Retirement 2035,asset_class::International Developed,pct:num:20
;
const allocator = std.testing.allocator;
var cm = try parseClassificationFile(allocator, data);
@ -94,15 +102,35 @@ test "parse classification file" {
try std.testing.expectEqual(@as(usize, 4), cm.entries.len);
try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol);
try std.testing.expectEqualStrings("Amazon", cm.entries[0].name.?);
try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?);
try std.testing.expectEqualStrings("US", cm.entries[0].geo.?);
try std.testing.expectApproxEqAbs(@as(f64, 100.0), cm.entries[0].pct, 0.01);
try std.testing.expectEqualStrings("TGT2035", cm.entries[1].symbol);
try std.testing.expectEqualStrings("Target Retirement 2035", cm.entries[1].name.?);
try std.testing.expectEqualStrings("US Large Cap", cm.entries[1].asset_class.?);
try std.testing.expectApproxEqAbs(@as(f64, 55.0), cm.entries[1].pct, 0.01);
}
test "parse classification file: missing name field stays null (backwards compat)" {
// Older metadata.srf files predate the name:: field. Parsing
// must still succeed; consumers fall back to symbol /
// display_symbol when name is null.
const data =
\\#!srfv1
\\symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap
;
const allocator = std.testing.allocator;
var cm = try parseClassificationFile(allocator, data);
defer cm.deinit();
try std.testing.expectEqual(@as(usize, 1), cm.entries.len);
try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol);
try std.testing.expect(cm.entries[0].name == null);
try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?);
}
// ClassificationRecord
//
// Distinct from `ClassificationEntry` above: that one represents