From d6a104d2d5d85793c0e1b120f8838066f2822a36 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Tue, 3 Mar 2026 13:41:34 -0800 Subject: [PATCH] classification.zig to use srf parser --- src/models/classification.zig | 53 +++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/models/classification.zig b/src/models/classification.zig index ff6aede..e0be348 100644 --- a/src/models/classification.zig +++ b/src/models/classification.zig @@ -10,6 +10,7 @@ /// symbol::02315N600,asset_class::International Developed,pct:num:20 /// symbol::02315N600,asset_class::Bonds,pct:num:15 const std = @import("std"); +const srf = @import("srf"); /// A single classification entry for a symbol. pub const ClassificationEntry = struct { @@ -41,7 +42,7 @@ pub const ClassificationMap = struct { }; /// Parse a metadata SRF file into a ClassificationMap. -/// Each line is: symbol::,sector::,geo::,asset_class::,pct:num:

+/// Each record has: symbol::,sector::,geo::,asset_class::,pct:num:

/// All fields except symbol are optional. pct defaults to 100. pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !ClassificationMap { var entries = std.ArrayList(ClassificationEntry).empty; @@ -55,36 +56,32 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) ! entries.deinit(allocator); } - var line_iter = std.mem.splitScalar(u8, data, '\n'); - while (line_iter.next()) |line| { - const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace); - if (trimmed.len == 0 or trimmed[0] == '#') continue; - if (std.mem.startsWith(u8, trimmed, "#!")) continue; + var reader = std.Io.Reader.fixed(data); + const parsed = srf.parse(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData; + defer parsed.deinit(); - // Parse comma-separated key::value pairs + for (parsed.records.items) |record| { var symbol: ?[]const u8 = null; var sector: ?[]const u8 = null; var geo: ?[]const u8 = null; var asset_class: ?[]const u8 = null; var pct: f64 = 100.0; - var field_iter = std.mem.splitScalar(u8, trimmed, ','); - while (field_iter.next()) |field| { - const f = std.mem.trim(u8, field, &std.ascii.whitespace); - if (std.mem.startsWith(u8, f, "symbol::")) { - symbol = f["symbol::".len..]; - } else if (std.mem.startsWith(u8, f, "sector::")) { - sector = f["sector::".len..]; - } else if (std.mem.startsWith(u8, f, "geo::")) { - geo = f["geo::".len..]; - } else if (std.mem.startsWith(u8, f, "asset_class::")) { - asset_class = f["asset_class::".len..]; - } else if (std.mem.startsWith(u8, f, "pct:num:")) { - pct = std.fmt.parseFloat(f64, f["pct:num:".len..]) catch 100.0; + for (record.fields) |field| { + if (std.mem.eql(u8, field.key, "symbol")) { + if (field.value) |v| symbol = strVal(v); + } else if (std.mem.eql(u8, field.key, "sector")) { + if (field.value) |v| sector = strVal(v); + } else if (std.mem.eql(u8, field.key, "geo")) { + if (field.value) |v| geo = strVal(v); + } else if (std.mem.eql(u8, field.key, "asset_class")) { + if (field.value) |v| asset_class = strVal(v); + } else if (std.mem.eql(u8, field.key, "pct")) { + if (field.value) |v| pct = numVal(v); } } - const sym = symbol orelse continue; // skip lines without symbol + const sym = symbol orelse continue; try entries.append(allocator, .{ .symbol = try allocator.dupe(u8, sym), .sector = if (sector) |s| try allocator.dupe(u8, s) else null, @@ -100,6 +97,20 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) ! }; } +fn strVal(v: srf.Value) ?[]const u8 { + return switch (v) { + .string => |s| s, + else => null, + }; +} + +fn numVal(v: srf.Value) f64 { + return switch (v) { + .number => |n| n, + else => 100.0, + }; +} + test "parse classification file" { const data = \\#!srfv1