classification.zig to use srf parser

2026-03-03 13:41:34 -08:00 · 2026-03-03 13:41:34 -08:00 · d6a104d2d5
commit d6a104d2d5
parent 9819c93cfe
1 changed files with 32 additions and 21 deletions
--- a/src/models/classification.zig
+++ b/src/models/classification.zig
@ -10,6 +10,7 @@
 ///   symbol::02315N600,asset_class::International Developed,pct:num:20
 ///   symbol::02315N600,asset_class::Bonds,pct:num:15
 const std = @import("std");
+const srf = @import("srf");

 /// A single classification entry for a symbol.
 pub const ClassificationEntry = struct {
@ -41,7 +42,7 @@ pub const ClassificationMap = struct {
 };

 /// Parse a metadata SRF file into a ClassificationMap.
-/// Each line is: symbol::<SYM>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
+/// Each record has: symbol::<SYM>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
 /// All fields except symbol are optional. pct defaults to 100.
 pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !ClassificationMap {
    var entries = std.ArrayList(ClassificationEntry).empty;
@ -55,36 +56,32 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !
        entries.deinit(allocator);
    }

-    var line_iter = std.mem.splitScalar(u8, data, '\n');
-    while (line_iter.next()) |line| {
-        const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
-        if (trimmed.len == 0 or trimmed[0] == '#') continue;
-        if (std.mem.startsWith(u8, trimmed, "#!")) continue;
+    var reader = std.Io.Reader.fixed(data);
+    const parsed = srf.parse(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
+    defer parsed.deinit();

-        // Parse comma-separated key::value pairs
+    for (parsed.records.items) |record| {
        var symbol: ?[]const u8 = null;
        var sector: ?[]const u8 = null;
        var geo: ?[]const u8 = null;
        var asset_class: ?[]const u8 = null;
        var pct: f64 = 100.0;

-        var field_iter = std.mem.splitScalar(u8, trimmed, ',');
-        while (field_iter.next()) |field| {
-            const f = std.mem.trim(u8, field, &std.ascii.whitespace);
-            if (std.mem.startsWith(u8, f, "symbol::")) {
-                symbol = f["symbol::".len..];
-            } else if (std.mem.startsWith(u8, f, "sector::")) {
-                sector = f["sector::".len..];
-            } else if (std.mem.startsWith(u8, f, "geo::")) {
-                geo = f["geo::".len..];
-            } else if (std.mem.startsWith(u8, f, "asset_class::")) {
-                asset_class = f["asset_class::".len..];
-            } else if (std.mem.startsWith(u8, f, "pct:num:")) {
-                pct = std.fmt.parseFloat(f64, f["pct:num:".len..]) catch 100.0;
+        for (record.fields) |field| {
+            if (std.mem.eql(u8, field.key, "symbol")) {
+                if (field.value) |v| symbol = strVal(v);
+            } else if (std.mem.eql(u8, field.key, "sector")) {
+                if (field.value) |v| sector = strVal(v);
+            } else if (std.mem.eql(u8, field.key, "geo")) {
+                if (field.value) |v| geo = strVal(v);
+            } else if (std.mem.eql(u8, field.key, "asset_class")) {
+                if (field.value) |v| asset_class = strVal(v);
+            } else if (std.mem.eql(u8, field.key, "pct")) {
+                if (field.value) |v| pct = numVal(v);
            }
        }

-        const sym = symbol orelse continue; // skip lines without symbol
+        const sym = symbol orelse continue;
        try entries.append(allocator, .{
            .symbol = try allocator.dupe(u8, sym),
            .sector = if (sector) |s| try allocator.dupe(u8, s) else null,
@ -100,6 +97,20 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !
    };
 }

+fn strVal(v: srf.Value) ?[]const u8 {
+    return switch (v) {
+        .string => |s| s,
+        else => null,
+    };
+}
+
+fn numVal(v: srf.Value) f64 {
+    return switch (v) {
+        .number => |n| n,
+        else => 100.0,
+    };
+}
+
 test "parse classification file" {
    const data =
        \\#!srfv1