classification.zig to use srf parser

This commit is contained in:
Emil Lerch 2026-03-03 13:41:34 -08:00
parent 9819c93cfe
commit d6a104d2d5
Signed by: lobo
GPG key ID: A7B62D657EF764F8

View file

@ -10,6 +10,7 @@
/// symbol::02315N600,asset_class::International Developed,pct:num:20
/// symbol::02315N600,asset_class::Bonds,pct:num:15
const std = @import("std");
const srf = @import("srf");
/// A single classification entry for a symbol.
pub const ClassificationEntry = struct {
@ -41,7 +42,7 @@ pub const ClassificationMap = struct {
};
/// Parse a metadata SRF file into a ClassificationMap.
/// Each line is: symbol::<SYM>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
/// Each record has: symbol::<SYM>,sector::<S>,geo::<G>,asset_class::<A>,pct:num:<P>
/// All fields except symbol are optional. pct defaults to 100.
pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !ClassificationMap {
var entries = std.ArrayList(ClassificationEntry).empty;
@ -55,36 +56,32 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !
entries.deinit(allocator);
}
var line_iter = std.mem.splitScalar(u8, data, '\n');
while (line_iter.next()) |line| {
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (trimmed.len == 0 or trimmed[0] == '#') continue;
if (std.mem.startsWith(u8, trimmed, "#!")) continue;
var reader = std.Io.Reader.fixed(data);
const parsed = srf.parse(&reader, allocator, .{ .alloc_strings = false }) catch return error.InvalidData;
defer parsed.deinit();
// Parse comma-separated key::value pairs
for (parsed.records.items) |record| {
var symbol: ?[]const u8 = null;
var sector: ?[]const u8 = null;
var geo: ?[]const u8 = null;
var asset_class: ?[]const u8 = null;
var pct: f64 = 100.0;
var field_iter = std.mem.splitScalar(u8, trimmed, ',');
while (field_iter.next()) |field| {
const f = std.mem.trim(u8, field, &std.ascii.whitespace);
if (std.mem.startsWith(u8, f, "symbol::")) {
symbol = f["symbol::".len..];
} else if (std.mem.startsWith(u8, f, "sector::")) {
sector = f["sector::".len..];
} else if (std.mem.startsWith(u8, f, "geo::")) {
geo = f["geo::".len..];
} else if (std.mem.startsWith(u8, f, "asset_class::")) {
asset_class = f["asset_class::".len..];
} else if (std.mem.startsWith(u8, f, "pct:num:")) {
pct = std.fmt.parseFloat(f64, f["pct:num:".len..]) catch 100.0;
for (record.fields) |field| {
if (std.mem.eql(u8, field.key, "symbol")) {
if (field.value) |v| symbol = strVal(v);
} else if (std.mem.eql(u8, field.key, "sector")) {
if (field.value) |v| sector = strVal(v);
} else if (std.mem.eql(u8, field.key, "geo")) {
if (field.value) |v| geo = strVal(v);
} else if (std.mem.eql(u8, field.key, "asset_class")) {
if (field.value) |v| asset_class = strVal(v);
} else if (std.mem.eql(u8, field.key, "pct")) {
if (field.value) |v| pct = numVal(v);
}
}
const sym = symbol orelse continue; // skip lines without symbol
const sym = symbol orelse continue;
try entries.append(allocator, .{
.symbol = try allocator.dupe(u8, sym),
.sector = if (sector) |s| try allocator.dupe(u8, s) else null,
@ -100,6 +97,20 @@ pub fn parseClassificationFile(allocator: std.mem.Allocator, data: []const u8) !
};
}
fn strVal(v: srf.Value) ?[]const u8 {
return switch (v) {
.string => |s| s,
else => null,
};
}
fn numVal(v: srf.Value) f64 {
return switch (v) {
.number => |n| n,
else => 100.0,
};
}
test "parse classification file" {
const data =
\\#!srfv1