From cd6e22f5ba2cbdc2251244e79159ae9ca55bf06b Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Wed, 10 Jun 2026 11:09:57 -0700 Subject: [PATCH] update enrich to output names --- src/commands/enrich.zig | 109 ++++++++++++++++++++++++++++------ src/models/classification.zig | 38 ++++++++++-- 2 files changed, 125 insertions(+), 22 deletions(-) diff --git a/src/commands/enrich.zig b/src/commands/enrich.zig index 10ea8e8..6d726a9 100644 --- a/src/commands/enrich.zig +++ b/src/commands/enrich.zig @@ -1,9 +1,11 @@ const std = @import("std"); +const srf = @import("srf"); const zfin = @import("../root.zig"); const cli = @import("common.zig"); const framework = @import("framework.zig"); const isCusipLike = @import("../models/portfolio.zig").isCusipLike; const ClassificationRecord = zfin.classification.ClassificationRecord; +const ClassificationEntry = zfin.classification.ClassificationEntry; pub const ParsedArgs = struct { /// Optional symbol (e.g. "AAPL"). Null = portfolio mode (uses @@ -356,9 +358,7 @@ fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService if (c.name) |name| { try out.print("# {s}\n", .{name}); } - try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ - sym, derived.sector, derived.geo, derived.asset_class, - }); + try emitRecordLine(out, sym, c.name, derived.sector, derived.geo, derived.asset_class, null); } stderrSymbolProvenance(io, sym, kindFromSource(c.source), null); @@ -405,7 +405,7 @@ fn emitEtfRows( } else { try out.print("# {s}\n", .{sym}); } - try emitFundLines(sym, asset_class, sectors, c.sector, geo, out); + try emitFundLines(sym, c.name, asset_class, sectors, c.sector, geo, out); } /// Wikidata didn't return a classification for `sym` (either the @@ -478,6 +478,7 @@ fn hasDominantEquitySector(fund_sectors: ?[]const FundSector) bool { /// available. fn emitFundLines( sym: []const u8, + name: ?[]const u8, asset_class: []const u8, fund_sectors: ?[]const FundSector, inferred_sector: ?[]const u8, @@ -501,10 +502,7 @@ fn emitFundLines( inferred_sector.? else s.description; - try out.print( - "symbol::{s},sector::{s},geo::{s},asset_class::{s},pct:num:{d:.2}\n", - .{ sym, sector_str, geo_str, asset_class, s.pct }, - ); + try emitRecordLine(out, sym, name, sector_str, geo_str, asset_class, s.pct); } return; } @@ -513,7 +511,42 @@ fn emitFundLines( // one TODO line — but if title-keyword inference returned // a sector, use it instead of "TODO". const sector_str = inferred_sector orelse "TODO"; - try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, sector_str, geo_str, asset_class }); + try emitRecordLine(out, sym, name, sector_str, geo_str, asset_class, null); +} + +/// Emit one classification record line. Delegates to the SRF +/// library's writer-side formatter — that handles field ordering +/// (driven by `ClassificationEntry`'s field declaration order), +/// escaping for values containing commas/newlines, and default- +/// value elision (e.g. an entry with `pct = 100.0` omits the +/// `pct:num:` field; null-valued optional fields are omitted +/// entirely). +/// +/// `emit_directives = false` suppresses the `#!srfv1` header so +/// this can be called per-record interspersed with the comment +/// scaffold the enrich output uses. +fn emitRecordLine( + out: *std.Io.Writer, + sym: []const u8, + name: ?[]const u8, + sector: []const u8, + geo: []const u8, + asset_class: []const u8, + pct: ?f64, +) !void { + const entry: ClassificationEntry = .{ + .symbol = sym, + .name = name, + .sector = sector, + .geo = geo, + .asset_class = asset_class, + // The default is 100.0; setting it explicitly here + // (not via `if (pct) |p| p else 100.0`) so the formatter's + // default-elision rule keeps single-class rows lean. + .pct = pct orelse 100.0, + }; + const items = [_]ClassificationEntry{entry}; + try out.print("{f}", .{srf.fmt(ClassificationEntry, &items, .{ .emit_directives = false })}); } /// What `getEtfMetrics` provides that `enrich` actually uses: @@ -743,9 +776,8 @@ fn enrichPortfolio(ctx: *framework.RunCtx, svc: *zfin.DataService) !void { if (c.name) |name| { try out.print("# {s}\n", .{name}); } - try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{ - sym, derived.sector, derived.geo, derived.asset_class, - }); + try emitRecordLine(out, sym, c.name, derived.sector, derived.geo, derived.asset_class, null); + try out.print("\n", .{}); } switch (kindFromSource(c.source)) { @@ -1209,13 +1241,56 @@ test "hasDominantEquitySector: null and empty -> false" { test "emitFundLines: null sectors -> single TODO line" { var out_buf: [256]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); - try emitFundLines("VTI", "ETF", null, null, null, &out); + try emitFundLines("VTI", null, "ETF", null, null, null, &out); try std.testing.expectEqualStrings( "symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n", out.buffered(), ); } +test "emitFundLines: name field is emitted as `name::Foo` between symbol and sector" { + var out_buf: [512]u8 = undefined; + var out: std.Io.Writer = .fixed(&out_buf); + const sectors = [_]FundSector{ + .{ .description = "Equity / Corporate", .pct = 99.5 }, + }; + try emitFundLines("SPY", "SPDR S&P 500 ETF Trust", "ETF", sectors[0..], null, null, &out); + try std.testing.expectEqualStrings( + "symbol::SPY,name::SPDR S&P 500 ETF Trust,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.5\n", + out.buffered(), + ); +} + +test "emitRecordLine: name=null and pct=null produces shortest form" { + var out_buf: [256]u8 = undefined; + var out: std.Io.Writer = .fixed(&out_buf); + try emitRecordLine(&out, "AAPL", null, "Technology", "US", "US Large Cap", null); + try std.testing.expectEqualStrings( + "symbol::AAPL,sector::Technology,geo::US,asset_class::US Large Cap\n", + out.buffered(), + ); +} + +test "emitRecordLine: name set, pct null" { + var out_buf: [256]u8 = undefined; + var out: std.Io.Writer = .fixed(&out_buf); + try emitRecordLine(&out, "AAPL", "Apple Inc", "Technology", "US", "US Large Cap", null); + try std.testing.expectEqualStrings( + "symbol::AAPL,name::Apple Inc,sector::Technology,geo::US,asset_class::US Large Cap\n", + out.buffered(), + ); +} + +test "emitRecordLine: name set, pct set (multi-class fund row)" { + var out_buf: [256]u8 = undefined; + var out: std.Io.Writer = .fixed(&out_buf); + try emitRecordLine(&out, "FAGIX", "Fidelity Capital and Income Fund", "Debt / Corporate", "US", "Fund", 47.69); + try std.testing.expectEqualStrings( + "symbol::FAGIX,name::Fidelity Capital and Income Fund,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69\n", + out.buffered(), + ); +} + test "emitFundLines: populated sectors -> one line per sector with pct" { var out_buf: [512]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); @@ -1223,7 +1298,7 @@ test "emitFundLines: populated sectors -> one line per sector with pct" { .{ .description = "Debt / Corporate", .pct = 47.69 }, .{ .description = "Equity / Corporate", .pct = 22.49 }, }; - try emitFundLines("FAGIX", "Fund", sectors[0..], null, null, &out); + try emitFundLines("FAGIX", null, "Fund", sectors[0..], null, null, &out); const written = out.buffered(); try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69") != null); @@ -1235,7 +1310,7 @@ test "emitFundLines: empty slice -> single TODO line (treats empty as null)" { var out_buf: [256]u8 = undefined; var out: std.Io.Writer = .fixed(&out_buf); const empty: [0]FundSector = .{}; - try emitFundLines("VTI", "ETF", empty[0..], null, null, &out); + try emitFundLines("VTI", null, "ETF", empty[0..], null, null, &out); try std.testing.expectEqualStrings( "symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n", out.buffered(), @@ -1251,7 +1326,7 @@ test "emitFundLines: negative pct values render correctly" { .{ .description = "Repurchase Agreement / Other", .pct = -29.72 }, .{ .description = "Derivative-FX / Other", .pct = -0.84 }, }; - try emitFundLines("PTY", "Fund", sectors[0..], null, null, &out); + try emitFundLines("PTY", null, "Fund", sectors[0..], null, null, &out); const written = out.buffered(); try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-29.72") != null); @@ -1264,7 +1339,7 @@ test "emitFundLines: ETF asset_class flows through" { const sectors = [_]FundSector{ .{ .description = "Equity / Corporate", .pct = 99.86 }, }; - try emitFundLines("SOXX", "ETF", sectors[0..], null, null, &out); + try emitFundLines("SOXX", null, "ETF", sectors[0..], null, null, &out); try std.testing.expectEqualStrings( "symbol::SOXX,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.86\n", out.buffered(), diff --git a/src/models/classification.zig b/src/models/classification.zig index 9c58888..fa113df 100644 --- a/src/models/classification.zig +++ b/src/models/classification.zig @@ -15,6 +15,11 @@ const srf = @import("srf"); /// A single classification entry for a symbol. pub const ClassificationEntry = struct { symbol: []const u8, + /// Human-readable security name (e.g., "Amazon", "SPDR S&P 500 + /// ETF Trust"). Optional — older metadata.srf files may not + /// have this field. Renderers fall back to `symbol` / + /// `display_symbol` when null. + name: ?[]const u8 = null, /// Sector (e.g., "Technology", "Healthcare", "Financials") sector: ?[]const u8 = null, /// Geographic region (e.g., "US", "International Developed", "Emerging Markets") @@ -33,6 +38,7 @@ pub const ClassificationMap = struct { pub fn deinit(self: *ClassificationMap) void { for (self.entries) |e| { self.allocator.free(e.symbol); + if (e.name) |n| self.allocator.free(n); if (e.sector) |s| self.allocator.free(s); if (e.geo) |g| self.allocator.free(g); if (e.asset_class) |a| self.allocator.free(a); @@ -42,13 +48,14 @@ pub const ClassificationMap = struct { }; /// Parse a metadata SRF file into a ClassificationMap. -/// Each record has: symbol::,sector::,geo::,asset_class::,pct:num:

+/// Each record has: symbol::,name::,sector::,geo::,asset_class::,pct:num:

/// All fields except symbol are optional. pct defaults to 100. pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !ClassificationMap { var entries = std.ArrayList(ClassificationEntry).empty; errdefer { for (entries.items) |e| { allocator.free(e.symbol); + if (e.name) |n| allocator.free(n); if (e.sector) |s| allocator.free(s); if (e.geo) |g| allocator.free(g); if (e.asset_class) |a| allocator.free(a); @@ -64,6 +71,7 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) ! const entry = fields.to(ClassificationEntry, .{}) catch continue; try entries.append(allocator, .{ .symbol = try allocator.dupe(u8, entry.symbol), + .name = if (entry.name) |n| try allocator.dupe(u8, n) else null, .sector = if (entry.sector) |s| try allocator.dupe(u8, s) else null, .geo = if (entry.geo) |g| try allocator.dupe(u8, g) else null, .asset_class = if (entry.asset_class) |a| try allocator.dupe(u8, a) else null, @@ -81,12 +89,12 @@ test "parse classification file" { const data = \\#!srfv1 \\# Stock: single sector - \\symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap + \\symbol::AMZN,name::Amazon,sector::Technology,geo::US,asset_class::US Large Cap \\ \\# Target date fund: blended - \\symbol::TGT2035,asset_class::US Large Cap,pct:num:55 - \\symbol::TGT2035,asset_class::Bonds,pct:num:15 - \\symbol::TGT2035,asset_class::International Developed,pct:num:20 + \\symbol::TGT2035,name::Target Retirement 2035,asset_class::US Large Cap,pct:num:55 + \\symbol::TGT2035,name::Target Retirement 2035,asset_class::Bonds,pct:num:15 + \\symbol::TGT2035,name::Target Retirement 2035,asset_class::International Developed,pct:num:20 ; const allocator = std.testing.allocator; var cm = try parseClassificationFile(allocator, data); @@ -94,15 +102,35 @@ test "parse classification file" { try std.testing.expectEqual(@as(usize, 4), cm.entries.len); try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol); + try std.testing.expectEqualStrings("Amazon", cm.entries[0].name.?); try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?); try std.testing.expectEqualStrings("US", cm.entries[0].geo.?); try std.testing.expectApproxEqAbs(@as(f64, 100.0), cm.entries[0].pct, 0.01); try std.testing.expectEqualStrings("TGT2035", cm.entries[1].symbol); + try std.testing.expectEqualStrings("Target Retirement 2035", cm.entries[1].name.?); try std.testing.expectEqualStrings("US Large Cap", cm.entries[1].asset_class.?); try std.testing.expectApproxEqAbs(@as(f64, 55.0), cm.entries[1].pct, 0.01); } +test "parse classification file: missing name field stays null (backwards compat)" { + // Older metadata.srf files predate the name:: field. Parsing + // must still succeed; consumers fall back to symbol / + // display_symbol when name is null. + const data = + \\#!srfv1 + \\symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap + ; + const allocator = std.testing.allocator; + var cm = try parseClassificationFile(allocator, data); + defer cm.deinit(); + + try std.testing.expectEqual(@as(usize, 1), cm.entries.len); + try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol); + try std.testing.expect(cm.entries[0].name == null); + try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?); +} + // ── ClassificationRecord ───────────────────────────────────── // // Distinct from `ClassificationEntry` above: that one represents