/// Classification metadata for portfolio analysis. /// /// Each entry maps a symbol to one or more asset class / sector / geographic allocations. /// For individual stocks, there's typically one entry at 100%. /// For blended funds (e.g., target date), there can be multiple entries that sum to ~100%. /// /// Loaded from a metadata SRF file like `metadata.srf`: /// symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap /// symbol::02315N600,asset_class::US Large Cap,pct:num:55 /// symbol::02315N600,asset_class::International Developed,pct:num:20 /// symbol::02315N600,asset_class::Bonds,pct:num:15 const std = @import("std"); const srf = @import("srf"); /// A single classification entry for a symbol. pub const ClassificationEntry = struct { symbol: []const u8, /// Human-readable security name (e.g., "Amazon", "SPDR S&P 500 /// ETF Trust"). Optional — older metadata.srf files may not /// have this field. Renderers fall back to `symbol` / /// `display_symbol` when null. name: ?[]const u8 = null, /// User-curated grouping label that overrides the auto-derived /// bucket for concentration / dominance checks and the /// analysis tab's Sector breakdown. Use this when the upstream /// `sector` field is the NPORT-P "Equity / Corporate" mush /// that doesn't actually distinguish your holdings (e.g. SPY /// vs FRDM vs HFXI all tagged the same way). When null, /// `deriveBucket` falls back to a sensible default. bucket: ?[]const u8 = null, /// Sector (e.g., "Technology", "Healthcare", "Financials") sector: ?[]const u8 = null, /// Geographic region (e.g., "US", "International Developed", "Emerging Markets") geo: ?[]const u8 = null, /// Asset class (e.g., "US Large Cap", "Bonds", "Cash") asset_class: ?[]const u8 = null, /// Percentage weight for this entry (0-100). Default 100 for single-class assets. pct: f64 = 100.0, }; /// Parsed classification data for the entire portfolio. pub const ClassificationMap = struct { entries: []ClassificationEntry, allocator: std.mem.Allocator, pub fn deinit(self: *ClassificationMap) void { for (self.entries) |e| { self.allocator.free(e.symbol); if (e.name) |n| self.allocator.free(n); if (e.bucket) |b| self.allocator.free(b); if (e.sector) |s| self.allocator.free(s); if (e.geo) |g| self.allocator.free(g); if (e.asset_class) |a| self.allocator.free(a); } self.allocator.free(self.entries); } }; /// Parse a metadata SRF file into a ClassificationMap. /// Each record has: symbol::,name::,bucket::,sector::,geo::,asset_class::,pct:num:

/// All fields except symbol are optional. pct defaults to 100. pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !ClassificationMap { var entries = std.ArrayList(ClassificationEntry).empty; errdefer { for (entries.items) |e| { allocator.free(e.symbol); if (e.name) |n| allocator.free(n); if (e.bucket) |b| allocator.free(b); if (e.sector) |s| allocator.free(s); if (e.geo) |g| allocator.free(g); if (e.asset_class) |a| allocator.free(a); } entries.deinit(allocator); } var reader = std.Io.Reader.fixed(data); var it = srf.iterator(&reader, allocator, .{ .parse_allocator = .none }) catch return error.InvalidData; defer it.deinit(); while (try it.next()) |fields| { const entry = fields.to(ClassificationEntry, .{}) catch continue; // Pre-fill `bucket` if the user didn't curate one. This // shifts the cost of `deriveBucket` to parse time and // makes downstream code free to read `entry.bucket` // directly without juggling allocator parameters. const built_bucket: []const u8 = if (entry.bucket) |b| try allocator.dupe(u8, b) else try deriveBucket(entry, allocator); try entries.append(allocator, .{ .symbol = try allocator.dupe(u8, entry.symbol), .name = if (entry.name) |n| try allocator.dupe(u8, n) else null, .bucket = built_bucket, .sector = if (entry.sector) |s| try allocator.dupe(u8, s) else null, .geo = if (entry.geo) |g| try allocator.dupe(u8, g) else null, .asset_class = if (entry.asset_class) |a| try allocator.dupe(u8, a) else null, .pct = entry.pct, }); } return .{ .entries = try entries.toOwnedSlice(allocator), .allocator = allocator, }; } /// Resolve a classification entry to its display bucket. Used by /// the review tab's Sector column, by `analyzePortfolio`'s sector /// rollup, and by the observation engine's concentration / /// dominance checks. /// /// Four-tier fallback (caller owns the returned slice; allocated /// via `allocator`): /// 1. `entry.bucket` if set — user-curated, always wins. /// 2. `entry.sector` if set AND doesn't contain '/' — GICS-style /// sector ("Technology", "Healthcare"). The '/' rules out /// NPORT-P fund-decomp categories ("Equity / Corporate") /// that are noise rather than meaningful sectors. /// 3. Composite " " if both are set. For /// funds without a curated bucket, this gives a meaningful /// grouping like "International Developed Fund" or "US ETF". /// 4. Literal "Unclassified". pub fn deriveBucket(entry: ClassificationEntry, allocator: std.mem.Allocator) ![]const u8 { if (entry.bucket) |b| return try allocator.dupe(u8, b); if (entry.sector) |s| { if (std.mem.indexOfScalar(u8, s, '/') == null) return try allocator.dupe(u8, s); } if (entry.geo != null and entry.asset_class != null) { const g = entry.geo.?; const ac = entry.asset_class.?; // Avoid duplicate-geo composites like "US US Large Cap". // If the asset_class starts with the geo prefix (followed // by a space or end-of-string), use it alone. Same for // common geographic-noun asset classes that already imply // their region ("International Developed", "Emerging // Markets") — these don't need a geo prefix. const ac_starts_with_geo = std.mem.startsWith(u8, ac, g) and (ac.len == g.len or ac[g.len] == ' '); const ac_has_implicit_geo = std.mem.startsWith(u8, ac, "International") or std.mem.startsWith(u8, ac, "Emerging"); if (ac_starts_with_geo or ac_has_implicit_geo) { return try allocator.dupe(u8, ac); } return try std.fmt.allocPrint(allocator, "{s} {s}", .{ g, ac }); } return try allocator.dupe(u8, "Unclassified"); } test "parse classification file" { const data = \\#!srfv1 \\# Stock: single sector \\symbol::AMZN,name::Amazon,sector::Technology,geo::US,asset_class::US Large Cap \\ \\# Target date fund: blended \\symbol::TGT2035,name::Target Retirement 2035,asset_class::US Large Cap,pct:num:55 \\symbol::TGT2035,name::Target Retirement 2035,asset_class::Bonds,pct:num:15 \\symbol::TGT2035,name::Target Retirement 2035,asset_class::International Developed,pct:num:20 ; const allocator = std.testing.allocator; var cm = try parseClassificationFile(allocator, data); defer cm.deinit(); try std.testing.expectEqual(@as(usize, 4), cm.entries.len); try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol); try std.testing.expectEqualStrings("Amazon", cm.entries[0].name.?); try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?); try std.testing.expectEqualStrings("US", cm.entries[0].geo.?); try std.testing.expectApproxEqAbs(@as(f64, 100.0), cm.entries[0].pct, 0.01); try std.testing.expectEqualStrings("TGT2035", cm.entries[1].symbol); try std.testing.expectEqualStrings("Target Retirement 2035", cm.entries[1].name.?); try std.testing.expectEqualStrings("US Large Cap", cm.entries[1].asset_class.?); try std.testing.expectApproxEqAbs(@as(f64, 55.0), cm.entries[1].pct, 0.01); } test "parse classification file: missing name field stays null (backwards compat)" { // Older metadata.srf files predate the name:: field. Parsing // must still succeed; consumers fall back to symbol / // display_symbol when name is null. const data = \\#!srfv1 \\symbol::AMZN,sector::Technology,geo::US,asset_class::US Large Cap ; const allocator = std.testing.allocator; var cm = try parseClassificationFile(allocator, data); defer cm.deinit(); try std.testing.expectEqual(@as(usize, 1), cm.entries.len); try std.testing.expectEqualStrings("AMZN", cm.entries[0].symbol); try std.testing.expect(cm.entries[0].name == null); // `bucket` is pre-filled by the parser via deriveBucket. For // a GICS-style sector ("Technology"), it equals the sector. try std.testing.expectEqualStrings("Technology", cm.entries[0].bucket.?); try std.testing.expectEqualStrings("Technology", cm.entries[0].sector.?); } test "parse classification file: bucket round-trips" { const data = \\#!srfv1 \\symbol::SPY,name::SPDR S&P 500 ETF Trust,bucket::US Large Cap,sector::Equity / Corporate,geo::US,asset_class::ETF ; const allocator = std.testing.allocator; var cm = try parseClassificationFile(allocator, data); defer cm.deinit(); try std.testing.expectEqual(@as(usize, 1), cm.entries.len); try std.testing.expectEqualStrings("SPY", cm.entries[0].symbol); try std.testing.expectEqualStrings("US Large Cap", cm.entries[0].bucket.?); try std.testing.expectEqualStrings("Equity / Corporate", cm.entries[0].sector.?); } test "deriveBucket: returns user-curated bucket when set" { const e: ClassificationEntry = .{ .symbol = "SPY", .bucket = "US Large Cap", .sector = "Equity / Corporate", // would otherwise force fallback .geo = "US", .asset_class = "ETF", }; const out = try deriveBucket(e, std.testing.allocator); defer std.testing.allocator.free(out); try std.testing.expectEqualStrings("US Large Cap", out); } test "deriveBucket: returns sector when GICS-like (no '/')" { const e: ClassificationEntry = .{ .symbol = "AMZN", .sector = "Technology", .geo = "US", .asset_class = "US Large Cap", }; const out = try deriveBucket(e, std.testing.allocator); defer std.testing.allocator.free(out); try std.testing.expectEqualStrings("Technology", out); } test "deriveBucket: composite fallback when sector is NPORT-P mush" { const e: ClassificationEntry = .{ .symbol = "HFXI", .sector = "Equity / Corporate", .geo = "International Developed", .asset_class = "Fund", }; const out = try deriveBucket(e, std.testing.allocator); defer std.testing.allocator.free(out); try std.testing.expectEqualStrings("International Developed Fund", out); } test "deriveBucket: returns Unclassified when nothing usable is set" { const e: ClassificationEntry = .{ .symbol = "UNK", }; const out = try deriveBucket(e, std.testing.allocator); defer std.testing.allocator.free(out); try std.testing.expectEqualStrings("Unclassified", out); } test "deriveBucket: NPORT-P sector with no geo/asset_class falls through to Unclassified" { // Defensive: sector is NPORT-P-style (skipped by the GICS // filter) AND we don't have both geo and asset_class to // build a composite. Falls through to Unclassified. const e: ClassificationEntry = .{ .symbol = "X", .sector = "Debt / Corporate", .geo = "US", // asset_class missing }; const out = try deriveBucket(e, std.testing.allocator); defer std.testing.allocator.free(out); try std.testing.expectEqualStrings("Unclassified", out); } test "deriveBucket: composite avoids duplicate geo when asset_class already starts with it" { // Hand-written entries often have geographically-prefixed // asset_class values like "US Large Cap" alongside // geo="US". The naive composite "{geo} {asset_class}" then // produces "US US Large Cap" which is ugly and clusters // incorrectly in the breakdown. Detect the duplicate prefix // and use the asset_class alone. const e: ClassificationEntry = .{ .symbol = "VOO", .geo = "US", .asset_class = "US Large Cap", }; const out = try deriveBucket(e, std.testing.allocator); defer std.testing.allocator.free(out); try std.testing.expectEqualStrings("US Large Cap", out); } test "deriveBucket: composite uses asset_class alone for International/Emerging implicit-geo classes" { // "International Developed" and "Emerging Markets" are // already geographic; the composite shouldn't re-prepend // the geo. const e1: ClassificationEntry = .{ .symbol = "VEA", .geo = "International Developed", .asset_class = "International Developed", }; const out1 = try deriveBucket(e1, std.testing.allocator); defer std.testing.allocator.free(out1); try std.testing.expectEqualStrings("International Developed", out1); const e2: ClassificationEntry = .{ .symbol = "VWO", .geo = "Emerging Markets", .asset_class = "Emerging Markets", }; const out2 = try deriveBucket(e2, std.testing.allocator); defer std.testing.allocator.free(out2); try std.testing.expectEqualStrings("Emerging Markets", out2); } test "deriveBucket: composite still prepends geo when asset_class is generic (Fund/ETF/Bonds)" { // The whole point of the composite is to disambiguate // generic asset_class labels by their geo. Make sure we // don't accidentally regress on this case while fixing // the duplicate-prefix one. const e: ClassificationEntry = .{ .symbol = "BND", .geo = "US", .asset_class = "Fund", }; const out = try deriveBucket(e, std.testing.allocator); defer std.testing.allocator.free(out); try std.testing.expectEqualStrings("US Fund", out); } // ── ClassificationRecord ───────────────────────────────────── // // Distinct from `ClassificationEntry` above: that one represents // a row in the user's `metadata.srf` (already-curated portfolio // data). `ClassificationRecord` is the upstream-fetched // per-symbol shape that flows OUT of `DataService.getClassification`. // `enrich` reads it to write the metadata.srf row that becomes // a `ClassificationEntry` later. // // Lives here (not in `providers/Wikidata.zig`) because the shape // is provider-agnostic: any future classification source (FMP, // Alpha Vantage, hand-written) populates the same record. The // fact that today the only producer is Wikidata is incidental. /// A single fetched classification result for one symbol. /// /// All optional fields default to `null`; populators only set /// the fields they have data for. The `source` field always /// emits per the project's source-pure invariant. pub const ClassificationRecord = struct { symbol: []const u8, // owned name: ?[]const u8 = null, // owned sector: ?[]const u8 = null, // owned industry: ?[]const u8 = null, // owned /// ISO-3166 alpha-2 country code (e.g. "US", "GB", "DE"). country: ?[]const u8 = null, // owned /// Resolved geo bucket (e.g. "US", "International Developed", /// "Emerging Markets"). Populated either from `geoFor(country)` /// or from title-keyword inference (`inferGeoFromTitle`) for /// symbols where Wikidata didn't supply a country. Producers /// pick one of the `geo.*` constants above; consumers compare /// against the same constants. geo: ?[]const u8 = null, // owned asset_class: ?[]const u8 = null, // owned is_etf: bool = false, /// YYYY-MM-DD; trimmed from upstream's ISO-8601 date. inception_date: ?[]const u8 = null, // owned /// Wikidata's P5531 — the SEC CIK as a digit string. Already /// zero-padded to 10 digits, matching the project-wide CIK /// normalization convention. cik: ?[]const u8 = null, // owned /// YYYY-MM-DD when this provider ran, NOT when upstream last /// updated the underlying entity. as_of: []const u8, // owned source: []const u8, // no default — provenance always emitted pub fn deinit(self: ClassificationRecord, allocator: std.mem.Allocator) void { allocator.free(self.symbol); if (self.name) |s| allocator.free(s); if (self.sector) |s| allocator.free(s); if (self.industry) |s| allocator.free(s); if (self.country) |s| allocator.free(s); if (self.geo) |s| allocator.free(s); if (self.asset_class) |s| allocator.free(s); if (self.inception_date) |s| allocator.free(s); if (self.cik) |s| allocator.free(s); allocator.free(self.as_of); allocator.free(self.source); } /// Free a slice of records, calling deinit on each element first. pub fn freeSlice(allocator: std.mem.Allocator, recs: []const ClassificationRecord) void { for (recs) |r| r.deinit(allocator); allocator.free(recs); } }; // ── Geographic taxonomy ────────────────────────────────────── /// Geo-bucket constants used by the country → geo lookup. Kept /// as named constants (rather than inline string literals in the /// map) so callers can reference them without typo risk and the /// taxonomy is tweakable in one place. pub const geo = struct { pub const us = "US"; pub const developed = "International Developed"; pub const emerging = "Emerging Markets"; pub const unknown = "Unknown"; }; // ── Sector taxonomy ────────────────────────────────────────── /// Canonical sector taxonomy (GICS-aligned 11-sector model). /// Producers (Wikidata's `canonicalizeSector`, enrich's /// `inferSectorFromTitle`) emit one of these strings; consumers /// (analysis bucketing, display) compare against them. /// /// Lives here (not in any provider) so multiple producers can /// share one taxonomy. Adding a 12th sector or renaming an /// existing one is a one-place edit. pub const sector = struct { pub const technology = "Technology"; pub const communication_services = "Communication Services"; pub const consumer_cyclical = "Consumer Cyclical"; pub const consumer_defensive = "Consumer Defensive"; pub const healthcare = "Healthcare"; pub const financial_services = "Financial Services"; pub const energy = "Energy"; pub const industrials = "Industrials"; pub const basic_materials = "Basic Materials"; pub const real_estate = "Real Estate"; pub const utilities = "Utilities"; }; /// Country-code-to-geo-bucket lookup. Producers (Wikidata today, /// others tomorrow) hand us ISO-3166 alpha-2 codes via the /// `ClassificationRecord.country` field; we map them to the geo /// taxonomy (`geo.us` / `geo.developed` / `geo.emerging` / /// `geo.unknown`). /// /// MSCI conventions used as the developed/emerging split. Taiwan /// and South Korea are MSCI-emerging despite FTSE classifying /// them developed. Israel is MSCI-developed (upgraded 2010). /// Canada is folded into International Developed (some users /// prefer separate Canada bucket; override in `metadata.srf`). const country_to_geo = std.StaticStringMap([]const u8).initComptime(.{ // United States .{ "US", geo.us }, // Alpha-3 fallback for entries that use the longer form. .{ "USA", geo.us }, // International Developed — Europe ex-CIS .{ "GB", geo.developed }, .{ "DE", geo.developed }, .{ "FR", geo.developed }, .{ "NL", geo.developed }, .{ "CH", geo.developed }, .{ "SE", geo.developed }, .{ "DK", geo.developed }, .{ "NO", geo.developed }, .{ "FI", geo.developed }, .{ "IT", geo.developed }, .{ "ES", geo.developed }, .{ "BE", geo.developed }, .{ "AT", geo.developed }, .{ "IE", geo.developed }, .{ "LU", geo.developed }, .{ "PT", geo.developed }, .{ "GR", geo.developed }, .{ "IS", geo.developed }, // International Developed — Asia-Pacific + Israel + Canada .{ "JP", geo.developed }, .{ "AU", geo.developed }, .{ "NZ", geo.developed }, .{ "SG", geo.developed }, .{ "HK", geo.developed }, .{ "IL", geo.developed }, .{ "CA", geo.developed }, // Emerging Markets (MSCI) .{ "CN", geo.emerging }, .{ "TW", geo.emerging }, .{ "KR", geo.emerging }, .{ "IN", geo.emerging }, .{ "BR", geo.emerging }, .{ "MX", geo.emerging }, .{ "RU", geo.emerging }, .{ "TR", geo.emerging }, .{ "ZA", geo.emerging }, .{ "TH", geo.emerging }, .{ "MY", geo.emerging }, .{ "ID", geo.emerging }, .{ "PH", geo.emerging }, .{ "VN", geo.emerging }, .{ "AR", geo.emerging }, .{ "CL", geo.emerging }, .{ "CO", geo.emerging }, .{ "PE", geo.emerging }, .{ "EG", geo.emerging }, }); /// Map an ISO-3166 alpha-2 country code to one of the geo /// buckets. Null/empty input or an unknown code returns /// `geo.unknown` so the user can override in `metadata.srf`. pub fn geoFor(iso2: ?[]const u8) []const u8 { const code = iso2 orelse return geo.unknown; if (code.len == 0) return geo.unknown; return country_to_geo.get(code) orelse geo.unknown; } test "geoFor maps known ISO-3166 codes to bucket" { try std.testing.expectEqualStrings(geo.us, geoFor("US")); try std.testing.expectEqualStrings(geo.us, geoFor("USA")); try std.testing.expectEqualStrings(geo.developed, geoFor("GB")); try std.testing.expectEqualStrings(geo.developed, geoFor("DE")); try std.testing.expectEqualStrings(geo.developed, geoFor("CA")); try std.testing.expectEqualStrings(geo.developed, geoFor("IL")); try std.testing.expectEqualStrings(geo.emerging, geoFor("CN")); try std.testing.expectEqualStrings(geo.emerging, geoFor("TW")); try std.testing.expectEqualStrings(geo.emerging, geoFor("KR")); } test "geoFor returns Unknown for null/empty/unmapped" { try std.testing.expectEqualStrings(geo.unknown, geoFor(null)); try std.testing.expectEqualStrings(geo.unknown, geoFor("")); try std.testing.expectEqualStrings(geo.unknown, geoFor("ZZ")); // unassigned ISO-2 try std.testing.expectEqualStrings(geo.unknown, geoFor("XX")); } test "geo bucket labels are stable strings (not byte copies)" { // Callers stash these in HashMap keys without duping. // Verify the literal-pointer property holds across calls. try std.testing.expectEqual(@intFromPtr(geo.us.ptr), @intFromPtr(geoFor("US").ptr)); try std.testing.expectEqual(@intFromPtr(geo.developed.ptr), @intFromPtr(geoFor("GB").ptr)); try std.testing.expectEqual(@intFromPtr(geo.emerging.ptr), @intFromPtr(geoFor("CN").ptr)); try std.testing.expectEqual(@intFromPtr(geo.unknown.ptr), @intFromPtr(geoFor(null).ptr)); } // ── Title-keyword inference ────────────────────────────────── // // Pure functions over a fund/security title string. Used by // `service.getClassification` to populate the sector / geo of a // `ClassificationRecord` when Wikidata didn't carry one and the // EDGAR ticker-map fallback fired. Lives here (not in any // provider) because the inference is provider-agnostic and // shares the canonical sector/geo taxonomy declared above. fn titleContainsAny(haystack: []const u8, needles: []const []const u8) bool { for (needles) |needle| { if (std.mem.indexOf(u8, haystack, needle) != null) return true; } return false; } /// Lowercase the title into a stack buffer for case-insensitive /// keyword matching. Truncates titles longer than the buffer /// (returns null) — real fund names easily fit in 256 bytes. fn lowercaseTitle(buf: []u8, title: []const u8) ?[]const u8 { if (title.len > buf.len) return null; return std.ascii.lowerString(buf[0..title.len], title); } /// Infer a GICS sector from a fund's title. Returns null when /// no unambiguous keyword match — caller falls back to whatever /// sector data the upstream source provided (typically null). /// /// Conservative keyword set: matches only words that map /// unambiguously to a single GICS sector. "Income" / "Dividend" /// / "Value" / "Growth" / "Momentum" / "Total" / "Equal Weight" /// / "International" / "Emerging" don't appear here — they /// describe the screening methodology or geo, not the sector. /// /// Reuses the `sector` constants above so the inference taxonomy /// stays in lock-step with the canonicalizer. pub fn inferSectorFromTitle(title: ?[]const u8) ?[]const u8 { const t = title orelse return null; if (t.len == 0) return null; var buf: [256]u8 = undefined; const lc = lowercaseTitle(&buf, t) orelse return null; // Order matters: more-specific keywords come first within // each sector. "Health care" before "care" (irrelevant // example), "semiconductor" before generic "tech" (which we // don't include — too broad). // Healthcare. "Health care" with space (XLV title), "healthcare" // (one word), "biotech", "pharmaceutical". if (titleContainsAny(lc, &.{ "health care", "healthcare", "biotech", "pharmaceutical", "medical" })) { return sector.healthcare; } // Technology. Specific terms only — "tech" alone is too // broad (matches "biotech", "fintech", "edtech" — all // sector-mixing). if (titleContainsAny(lc, &.{ "semiconductor", "software", "cloud computing", "internet" })) { return sector.technology; } // Financial Services. "Financial" is fairly specific in // fund-name conventions ("Financial Select Sector SPDR", // "Vanguard Financials ETF"). if (titleContainsAny(lc, &.{ "financial", "bank" })) { return sector.financial_services; } // Energy. "Energy" alone is mostly unambiguous in fund // conventions; pair with "oil" / "gas" for redundancy. if (titleContainsAny(lc, &.{ "energy", "oil & gas", "oil and gas", "petroleum" })) { return sector.energy; } // Real Estate / REITs. if (titleContainsAny(lc, &.{ "real estate", "reit" })) { return sector.real_estate; } // Utilities. "Utilities" alone is unambiguous. if (titleContainsAny(lc, &.{"utilities"})) { return sector.utilities; } // Consumer Discretionary / Cyclical. Match the explicit // labels — "consumer" alone is ambiguous (could be // discretionary or staples). if (titleContainsAny(lc, &.{ "consumer discretionary", "consumer cyclical" })) { return sector.consumer_cyclical; } // Consumer Staples / Defensive. if (titleContainsAny(lc, &.{ "consumer staples", "consumer defensive" })) { return sector.consumer_defensive; } // Industrials. "Industrial" is more reliable than // "industrials" because some fund names use the singular // ("Industrial Select Sector SPDR"). if (titleContainsAny(lc, &.{ "industrial", "aerospace", "defense" })) { return sector.industrials; } // Basic Materials. if (titleContainsAny(lc, &.{ "materials", "mining", "miners", "metals" })) { return sector.basic_materials; } // Communication Services. "Communication" / "Telecom" // unambiguous. if (titleContainsAny(lc, &.{ "communication", "telecom", "media" })) { return sector.communication_services; } return null; } /// Infer a geo bucket from a fund's title. Returns null when /// the title doesn't carry an unambiguous international/emerging /// keyword — caller keeps whatever default they have (typically /// US for SEC-filed funds). /// /// More important than sector inference: a default `geo::US` is /// *factually wrong* for international funds (FRDM holds /// emerging-market equities, not US), so this fix tightens /// portfolio-level geographic-exposure reporting. pub fn inferGeoFromTitle(title: ?[]const u8) ?[]const u8 { const t = title orelse return null; if (t.len == 0) return null; var buf: [256]u8 = undefined; const lc = lowercaseTitle(&buf, t) orelse return null; // Emerging markets first — most specific. "Emerging" alone // is rare in non-EM contexts in fund-name conventions. // "Frontier" likewise is conventionally only used for // frontier markets in fund titles. if (titleContainsAny(lc, &.{ "emerging market", "emerging markets", "frontier market", "frontier markets", "frontier" })) { return geo.emerging; } // International Developed. "International" / "Intl" / // "ex-US" / "World ex US" / "Developed Markets" / // specific developed-market regions. // // False-positive risk: a hypothetical "Vanguard Total // International + US Equity Fund" would mis-tag here. None // of the user's current portfolio holds such a hybrid // fund. If one ever shows up, it'll get flagged in the // diff-against-old-metadata.srf review and can be // hand-corrected. if (titleContainsAny(lc, &.{ "international", " intl", "ex-us", "ex us", "world ex", "developed market", "developed markets" })) { return geo.developed; } return null; } test "inferSectorFromTitle: null/empty -> null" { try std.testing.expectEqual(@as(?[]const u8, null), inferSectorFromTitle(null)); try std.testing.expectEqual(@as(?[]const u8, null), inferSectorFromTitle("")); } test "inferSectorFromTitle: technology keywords" { try std.testing.expectEqual(@as(?[]const u8, sector.technology), inferSectorFromTitle("iShares Semiconductor ETF")); try std.testing.expectEqual(@as(?[]const u8, sector.technology), inferSectorFromTitle("Vanguard Software ETF")); } test "inferSectorFromTitle: healthcare keywords" { try std.testing.expectEqual(@as(?[]const u8, sector.healthcare), inferSectorFromTitle("Health Care Select Sector SPDR")); try std.testing.expectEqual(@as(?[]const u8, sector.healthcare), inferSectorFromTitle("iShares Biotech ETF")); } test "inferSectorFromTitle: ambiguous title -> null" { try std.testing.expectEqual(@as(?[]const u8, null), inferSectorFromTitle("Vanguard Total Stock Market")); try std.testing.expectEqual(@as(?[]const u8, null), inferSectorFromTitle("SPDR S&P 500")); } test "inferGeoFromTitle: null/empty -> null" { try std.testing.expectEqual(@as(?[]const u8, null), inferGeoFromTitle(null)); try std.testing.expectEqual(@as(?[]const u8, null), inferGeoFromTitle("")); } test "inferGeoFromTitle: emerging markets" { try std.testing.expectEqual(@as(?[]const u8, geo.emerging), inferGeoFromTitle("Freedom 100 Emerging Markets ETF")); try std.testing.expectEqual(@as(?[]const u8, geo.emerging), inferGeoFromTitle("iShares Frontier Markets")); } test "inferGeoFromTitle: international developed" { try std.testing.expectEqual(@as(?[]const u8, geo.developed), inferGeoFromTitle("Vanguard FTSE Developed Markets")); try std.testing.expectEqual(@as(?[]const u8, geo.developed), inferGeoFromTitle("iShares MSCI EAFE International")); try std.testing.expectEqual(@as(?[]const u8, geo.developed), inferGeoFromTitle("Vanguard Total World ex-US")); } test "inferGeoFromTitle: no match -> null" { try std.testing.expectEqual(@as(?[]const u8, null), inferGeoFromTitle("SPDR S&P 500")); try std.testing.expectEqual(@as(?[]const u8, null), inferGeoFromTitle("iShares Semiconductor ETF")); }