update enrich to output names
This commit is contained in:
parent
14f55afb28
commit
cd6e22f5ba
2 changed files with 125 additions and 22 deletions
|
|
@ -1,9 +1,11 @@
|
|||
const std = @import("std");
|
||||
const srf = @import("srf");
|
||||
const zfin = @import("../root.zig");
|
||||
const cli = @import("common.zig");
|
||||
const framework = @import("framework.zig");
|
||||
const isCusipLike = @import("../models/portfolio.zig").isCusipLike;
|
||||
const ClassificationRecord = zfin.classification.ClassificationRecord;
|
||||
const ClassificationEntry = zfin.classification.ClassificationEntry;
|
||||
|
||||
pub const ParsedArgs = struct {
|
||||
/// Optional symbol (e.g. "AAPL"). Null = portfolio mode (uses
|
||||
|
|
@ -356,9 +358,7 @@ fn enrichSymbol(io: std.Io, allocator: std.mem.Allocator, svc: *zfin.DataService
|
|||
if (c.name) |name| {
|
||||
try out.print("# {s}\n", .{name});
|
||||
}
|
||||
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{
|
||||
sym, derived.sector, derived.geo, derived.asset_class,
|
||||
});
|
||||
try emitRecordLine(out, sym, c.name, derived.sector, derived.geo, derived.asset_class, null);
|
||||
}
|
||||
|
||||
stderrSymbolProvenance(io, sym, kindFromSource(c.source), null);
|
||||
|
|
@ -405,7 +405,7 @@ fn emitEtfRows(
|
|||
} else {
|
||||
try out.print("# {s}\n", .{sym});
|
||||
}
|
||||
try emitFundLines(sym, asset_class, sectors, c.sector, geo, out);
|
||||
try emitFundLines(sym, c.name, asset_class, sectors, c.sector, geo, out);
|
||||
}
|
||||
|
||||
/// Wikidata didn't return a classification for `sym` (either the
|
||||
|
|
@ -478,6 +478,7 @@ fn hasDominantEquitySector(fund_sectors: ?[]const FundSector) bool {
|
|||
/// available.
|
||||
fn emitFundLines(
|
||||
sym: []const u8,
|
||||
name: ?[]const u8,
|
||||
asset_class: []const u8,
|
||||
fund_sectors: ?[]const FundSector,
|
||||
inferred_sector: ?[]const u8,
|
||||
|
|
@ -501,10 +502,7 @@ fn emitFundLines(
|
|||
inferred_sector.?
|
||||
else
|
||||
s.description;
|
||||
try out.print(
|
||||
"symbol::{s},sector::{s},geo::{s},asset_class::{s},pct:num:{d:.2}\n",
|
||||
.{ sym, sector_str, geo_str, asset_class, s.pct },
|
||||
);
|
||||
try emitRecordLine(out, sym, name, sector_str, geo_str, asset_class, s.pct);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -513,7 +511,42 @@ fn emitFundLines(
|
|||
// one TODO line — but if title-keyword inference returned
|
||||
// a sector, use it instead of "TODO".
|
||||
const sector_str = inferred_sector orelse "TODO";
|
||||
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n", .{ sym, sector_str, geo_str, asset_class });
|
||||
try emitRecordLine(out, sym, name, sector_str, geo_str, asset_class, null);
|
||||
}
|
||||
|
||||
/// Emit one classification record line. Delegates to the SRF
|
||||
/// library's writer-side formatter — that handles field ordering
|
||||
/// (driven by `ClassificationEntry`'s field declaration order),
|
||||
/// escaping for values containing commas/newlines, and default-
|
||||
/// value elision (e.g. an entry with `pct = 100.0` omits the
|
||||
/// `pct:num:` field; null-valued optional fields are omitted
|
||||
/// entirely).
|
||||
///
|
||||
/// `emit_directives = false` suppresses the `#!srfv1` header so
|
||||
/// this can be called per-record interspersed with the comment
|
||||
/// scaffold the enrich output uses.
|
||||
fn emitRecordLine(
|
||||
out: *std.Io.Writer,
|
||||
sym: []const u8,
|
||||
name: ?[]const u8,
|
||||
sector: []const u8,
|
||||
geo: []const u8,
|
||||
asset_class: []const u8,
|
||||
pct: ?f64,
|
||||
) !void {
|
||||
const entry: ClassificationEntry = .{
|
||||
.symbol = sym,
|
||||
.name = name,
|
||||
.sector = sector,
|
||||
.geo = geo,
|
||||
.asset_class = asset_class,
|
||||
// The default is 100.0; setting it explicitly here
|
||||
// (not via `if (pct) |p| p else 100.0`) so the formatter's
|
||||
// default-elision rule keeps single-class rows lean.
|
||||
.pct = pct orelse 100.0,
|
||||
};
|
||||
const items = [_]ClassificationEntry{entry};
|
||||
try out.print("{f}", .{srf.fmt(ClassificationEntry, &items, .{ .emit_directives = false })});
|
||||
}
|
||||
|
||||
/// What `getEtfMetrics` provides that `enrich` actually uses:
|
||||
|
|
@ -743,9 +776,8 @@ fn enrichPortfolio(ctx: *framework.RunCtx, svc: *zfin.DataService) !void {
|
|||
if (c.name) |name| {
|
||||
try out.print("# {s}\n", .{name});
|
||||
}
|
||||
try out.print("symbol::{s},sector::{s},geo::{s},asset_class::{s}\n\n", .{
|
||||
sym, derived.sector, derived.geo, derived.asset_class,
|
||||
});
|
||||
try emitRecordLine(out, sym, c.name, derived.sector, derived.geo, derived.asset_class, null);
|
||||
try out.print("\n", .{});
|
||||
}
|
||||
|
||||
switch (kindFromSource(c.source)) {
|
||||
|
|
@ -1209,13 +1241,56 @@ test "hasDominantEquitySector: null and empty -> false" {
|
|||
test "emitFundLines: null sectors -> single TODO line" {
|
||||
var out_buf: [256]u8 = undefined;
|
||||
var out: std.Io.Writer = .fixed(&out_buf);
|
||||
try emitFundLines("VTI", "ETF", null, null, null, &out);
|
||||
try emitFundLines("VTI", null, "ETF", null, null, null, &out);
|
||||
try std.testing.expectEqualStrings(
|
||||
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
|
||||
out.buffered(),
|
||||
);
|
||||
}
|
||||
|
||||
test "emitFundLines: name field is emitted as `name::Foo` between symbol and sector" {
|
||||
var out_buf: [512]u8 = undefined;
|
||||
var out: std.Io.Writer = .fixed(&out_buf);
|
||||
const sectors = [_]FundSector{
|
||||
.{ .description = "Equity / Corporate", .pct = 99.5 },
|
||||
};
|
||||
try emitFundLines("SPY", "SPDR S&P 500 ETF Trust", "ETF", sectors[0..], null, null, &out);
|
||||
try std.testing.expectEqualStrings(
|
||||
"symbol::SPY,name::SPDR S&P 500 ETF Trust,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.5\n",
|
||||
out.buffered(),
|
||||
);
|
||||
}
|
||||
|
||||
test "emitRecordLine: name=null and pct=null produces shortest form" {
|
||||
var out_buf: [256]u8 = undefined;
|
||||
var out: std.Io.Writer = .fixed(&out_buf);
|
||||
try emitRecordLine(&out, "AAPL", null, "Technology", "US", "US Large Cap", null);
|
||||
try std.testing.expectEqualStrings(
|
||||
"symbol::AAPL,sector::Technology,geo::US,asset_class::US Large Cap\n",
|
||||
out.buffered(),
|
||||
);
|
||||
}
|
||||
|
||||
test "emitRecordLine: name set, pct null" {
|
||||
var out_buf: [256]u8 = undefined;
|
||||
var out: std.Io.Writer = .fixed(&out_buf);
|
||||
try emitRecordLine(&out, "AAPL", "Apple Inc", "Technology", "US", "US Large Cap", null);
|
||||
try std.testing.expectEqualStrings(
|
||||
"symbol::AAPL,name::Apple Inc,sector::Technology,geo::US,asset_class::US Large Cap\n",
|
||||
out.buffered(),
|
||||
);
|
||||
}
|
||||
|
||||
test "emitRecordLine: name set, pct set (multi-class fund row)" {
|
||||
var out_buf: [256]u8 = undefined;
|
||||
var out: std.Io.Writer = .fixed(&out_buf);
|
||||
try emitRecordLine(&out, "FAGIX", "Fidelity Capital and Income Fund", "Debt / Corporate", "US", "Fund", 47.69);
|
||||
try std.testing.expectEqualStrings(
|
||||
"symbol::FAGIX,name::Fidelity Capital and Income Fund,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69\n",
|
||||
out.buffered(),
|
||||
);
|
||||
}
|
||||
|
||||
test "emitFundLines: populated sectors -> one line per sector with pct" {
|
||||
var out_buf: [512]u8 = undefined;
|
||||
var out: std.Io.Writer = .fixed(&out_buf);
|
||||
|
|
@ -1223,7 +1298,7 @@ test "emitFundLines: populated sectors -> one line per sector with pct" {
|
|||
.{ .description = "Debt / Corporate", .pct = 47.69 },
|
||||
.{ .description = "Equity / Corporate", .pct = 22.49 },
|
||||
};
|
||||
try emitFundLines("FAGIX", "Fund", sectors[0..], null, null, &out);
|
||||
try emitFundLines("FAGIX", null, "Fund", sectors[0..], null, null, &out);
|
||||
|
||||
const written = out.buffered();
|
||||
try std.testing.expect(std.mem.indexOf(u8, written, "symbol::FAGIX,sector::Debt / Corporate,geo::US,asset_class::Fund,pct:num:47.69") != null);
|
||||
|
|
@ -1235,7 +1310,7 @@ test "emitFundLines: empty slice -> single TODO line (treats empty as null)" {
|
|||
var out_buf: [256]u8 = undefined;
|
||||
var out: std.Io.Writer = .fixed(&out_buf);
|
||||
const empty: [0]FundSector = .{};
|
||||
try emitFundLines("VTI", "ETF", empty[0..], null, null, &out);
|
||||
try emitFundLines("VTI", null, "ETF", empty[0..], null, null, &out);
|
||||
try std.testing.expectEqualStrings(
|
||||
"symbol::VTI,sector::TODO,geo::US,asset_class::ETF\n",
|
||||
out.buffered(),
|
||||
|
|
@ -1251,7 +1326,7 @@ test "emitFundLines: negative pct values render correctly" {
|
|||
.{ .description = "Repurchase Agreement / Other", .pct = -29.72 },
|
||||
.{ .description = "Derivative-FX / Other", .pct = -0.84 },
|
||||
};
|
||||
try emitFundLines("PTY", "Fund", sectors[0..], null, null, &out);
|
||||
try emitFundLines("PTY", null, "Fund", sectors[0..], null, null, &out);
|
||||
|
||||
const written = out.buffered();
|
||||
try std.testing.expect(std.mem.indexOf(u8, written, "pct:num:-29.72") != null);
|
||||
|
|
@ -1264,7 +1339,7 @@ test "emitFundLines: ETF asset_class flows through" {
|
|||
const sectors = [_]FundSector{
|
||||
.{ .description = "Equity / Corporate", .pct = 99.86 },
|
||||
};
|
||||
try emitFundLines("SOXX", "ETF", sectors[0..], null, null, &out);
|
||||
try emitFundLines("SOXX", null, "ETF", sectors[0..], null, null, &out);
|
||||
try std.testing.expectEqualStrings(
|
||||
"symbol::SOXX,sector::Equity / Corporate,geo::US,asset_class::ETF,pct:num:99.86\n",
|
||||
out.buffered(),
|
||||
|
|
|
|||
|
|
@ -15,6 +15,11 @@ const srf = @import("srf");
|
|||
/// A single classification entry for a symbol.
|
||||
pub const ClassificationEntry = struct {
|
||||
symbol: []const u8,
|
||||
/// Human-readable security name (e.g., "Amazon", "SPDR S&P 500
|
||||
/// ETF Trust"). Optional — older metadata.srf files may not
|
||||
/// have this field. Renderers fall back to `symbol` /
|
||||
/// `display_symbol` when null.
|
||||
name: ?[]const u8 = null,
|
||||
/// Sector (e.g., "Technology", "Healthcare", "Financials")
|
||||
sector: ?[]const u8 = null,
|
||||
/// Geographic region (e.g., "US", "International Developed", "Emerging Markets")
|
||||
|
|
@ -33,6 +38,7 @@ pub const ClassificationMap = struct {
|
|||
pub fn deinit(self: *ClassificationMap) void {
|
||||
for (self.entries) |e| {
|
||||
self.allocator.free(e.symbol);
|
||||
if (e.name) |n| self.allocator.free(n);
|
||||
if (e.sector) |s| self.allocator.free(s);
|
||||
if (e.geo) |g| self.allocator.free(g);
|
||||
if (e.asset_class) |a| self.allocator.free(a);
|
||||
|
|
@ -42,13 +48,14 @@ pub const ClassificationMap = struct {
|
|||
};
|
||||
|
||||
/// Parse a metadata SRF file into a ClassificationMap.
|
||||
/// Each record has: symbol::<SYM>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
|
||||
/// Each record has: symbol::<SYM>,name::<N>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
|
||||
/// All fields except symbol are optional. pct defaults to 100.
|
||||
pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !ClassificationMap {
|
||||
var entries = std.ArrayList(ClassificationEntry).empty;
|
||||
errdefer {
|
||||
for (entries.items) |e| {
|
||||
allocator.free(e.symbol);
|
||||
if (e.name) |n| allocator.free(n);
|
||||
if (e.sector) |s| allocator.free(s);
|
||||
if (e.geo) |g| allocator.free(g);
|
||||
if (e.asset_class) |a| allocator.free(a);
|
||||
|
|
@ -64,6 +71,7 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !
|
|||
const entry = fields.to(ClassificationEntry, .{}) catch continue;
|
||||
try entries.append(allocator, .{
|
||||
.symbol = try allocator.dupe(u8, entry.symbol),
|
||||
.name = if (entry.name) |n| try allocator.dupe(u8, n) else null,
|
||||
.sector = if (entry.sector) |s| try allocator.dupe(u8, s) else null,
|
||||
.geo = if (entry.geo) |g| try allocator.dupe(u8, g) else null,
|
||||
.asset_class = if (entry.asset_class) |a| try allocator.dupe(u8, a) else null,
|
||||
|
|
@ -81,12 +89,12 @@ test "parse classification file" {
|
|||
const data =
|
||||
\\#!srfv1
|
||||
\\# Stock: single sector
|
||||
\\symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap
|
||||
\\symbol::AMZN,name::Amazon,sector::Technology,geo::US,asset_class::US Large Cap
|
||||
\\
|
||||
\\# Target date fund: blended
|
||||
\\symbol::TGT2035,asset_class::US Large Cap,pct:num:55
|
||||
\\symbol::TGT2035,asset_class::Bonds,pct:num:15
|
||||
\\symbol::TGT2035,asset_class::International Developed,pct:num:20
|
||||
\\symbol::TGT2035,name::Target Retirement 2035,asset_class::US Large Cap,pct:num:55
|
||||
\\symbol::TGT2035,name::Target Retirement 2035,asset_class::Bonds,pct:num:15
|
||||
\\symbol::TGT2035,name::Target Retirement 2035,asset_class::International Developed,pct:num:20
|
||||
;
|
||||
const allocator = std.testing.allocator;
|
||||
var cm = try parseClassificationFile(allocator, data);
|
||||
|
|
@ -94,15 +102,35 @@ test "parse classification file" {
|
|||
|
||||
try std.testing.expectEqual(@as(usize, 4), cm.entries.len);
|
||||
try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol);
|
||||
try std.testing.expectEqualStrings("Amazon", cm.entries[0].name.?);
|
||||
try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?);
|
||||
try std.testing.expectEqualStrings("US", cm.entries[0].geo.?);
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 100.0), cm.entries[0].pct, 0.01);
|
||||
|
||||
try std.testing.expectEqualStrings("TGT2035", cm.entries[1].symbol);
|
||||
try std.testing.expectEqualStrings("Target Retirement 2035", cm.entries[1].name.?);
|
||||
try std.testing.expectEqualStrings("US Large Cap", cm.entries[1].asset_class.?);
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 55.0), cm.entries[1].pct, 0.01);
|
||||
}
|
||||
|
||||
test "parse classification file: missing name field stays null (backwards compat)" {
|
||||
// Older metadata.srf files predate the name:: field. Parsing
|
||||
// must still succeed; consumers fall back to symbol /
|
||||
// display_symbol when name is null.
|
||||
const data =
|
||||
\\#!srfv1
|
||||
\\symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap
|
||||
;
|
||||
const allocator = std.testing.allocator;
|
||||
var cm = try parseClassificationFile(allocator, data);
|
||||
defer cm.deinit();
|
||||
|
||||
try std.testing.expectEqual(@as(usize, 1), cm.entries.len);
|
||||
try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol);
|
||||
try std.testing.expect(cm.entries[0].name == null);
|
||||
try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?);
|
||||
}
|
||||
|
||||
// ── ClassificationRecord ─────────────────────────────────────
|
||||
//
|
||||
// Distinct from `ClassificationEntry` above: that one represents
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue