From 6991e31bdda688276de5c69a567e517ca861e987 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Mon, 27 Apr 2026 21:29:03 -0700 Subject: [PATCH] move generation to a build step - comptime just too slow --- AGENTS.md | 4 + build.zig | 33 ++++- build/gen_shiller.zig | 212 +++++++++++++++++++++++++++++ src/data/shiller.zig | 265 ++---------------------------------- src/models/shiller_year.zig | 10 ++ 5 files changed, 270 insertions(+), 254 deletions(-) create mode 100644 build/gen_shiller.zig create mode 100644 src/models/shiller_year.zig diff --git a/AGENTS.md b/AGENTS.md index 596a996..84b63e1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -150,3 +150,7 @@ Tests use `std.testing.allocator` (which detects leaks) and are structured as un | [SRF](https://git.lerch.org/lobo/srf) | Cache file format, portfolio/watchlist parsing, serialization | | [libvaxis](https://github.com/rockorager/libvaxis) (v0.5.1) | Terminal UI rendering | | [z2d](https://github.com/vancluever/z2d) (v0.10.0) | Pixel chart rendering (Kitty graphics protocol) | + +## Build system rules + +- **Never use `addAnonymousImport`** in `build.zig`. Always use `b.addModule()` + `addImport()`. Anonymous imports cause "file belongs to multiple modules" errors and make dependency wiring opaque. diff --git a/build.zig b/build.zig index a1e988e..7c24370 100644 --- a/build.zig +++ b/build.zig @@ -23,6 +23,10 @@ pub fn build(b: *std.Build) void { const srf_mod = srf_dep.module("srf"); + const shiller_mod = b.addModule("shiller_year", .{ + .root_source_file = b.path("src/models/shiller_year.zig"), + }); + // Build-time info: version string (from git describe) and build timestamp. // Exposed to application code as `@import("build_info")`. // @@ -51,8 +55,29 @@ pub fn build(b: *std.Build) void { .{ .name = "vaxis", .module = vaxis_dep.module("vaxis") }, .{ .name = "z2d", .module = z2d_dep.module("z2d") }, .{ .name = "build_info", .module = build_info }, + .{ .name = "shiller_year", .module = shiller_mod }, }; + // Generate Shiller annual returns data from ie_data.csv. + // Runs build/gen_shiller.zig as a native tool; outputs a .zig file + // that shiller.zig imports as a zero-cost const array. + const gen_shiller = b.addExecutable(.{ + .name = "gen_shiller", + .root_module = b.createModule(.{ + .root_source_file = b.path("build/gen_shiller.zig"), + .target = b.graph.host, + }), + }); + gen_shiller.root_module.addImport("shiller", shiller_mod); + + const gen_shiller_run = b.addRunArtifact(gen_shiller); + gen_shiller_run.addFileArg(b.path("src/data/ie_data.csv")); + const shiller_generated = gen_shiller_run.addOutputFileArg("shiller_generated.zig"); + const shiller_generated_mod = b.addModule("shiller_generated", .{ + .root_source_file = shiller_generated, + }); + shiller_generated_mod.addImport("shiller", shiller_mod); + // Unified executable (CLI + TUI in one binary) const exe = b.addExecutable(.{ .name = "zfin", @@ -63,6 +88,7 @@ pub fn build(b: *std.Build) void { .imports = imports, }), }); + exe.root_module.addImport("shiller_generated", shiller_generated_mod); b.installArtifact(exe); // Run step: `zig build run -- ` @@ -83,6 +109,7 @@ pub fn build(b: *std.Build) void { .optimize = optimize, .imports = imports, }) }); + tests.root_module.addImport("shiller_generated", shiller_generated_mod); test_step.dependOn(&b.addRunArtifact(tests).step); // Docs (still uses the library module for clean public API docs) @@ -108,12 +135,14 @@ pub fn build(b: *std.Build) void { // Coverage: `zig build coverage` (uses kcov, Linux x86_64/aarch64 only) { var cov = Coverage.init(b); - _ = cov.addModule(b.createModule(.{ + const cov_mod = b.createModule(.{ .root_source_file = b.path("src/main.zig"), .target = target, .optimize = optimize, .imports = imports, - }), "zfin"); + }); + cov_mod.addImport("shiller_generated", shiller_generated_mod); + _ = cov.addModule(cov_mod, "zfin"); } } diff --git a/build/gen_shiller.zig b/build/gen_shiller.zig new file mode 100644 index 0000000..d6f8a54 --- /dev/null +++ b/build/gen_shiller.zig @@ -0,0 +1,212 @@ +/// Build-time generator: reads ie_data.csv and outputs a Zig source file +/// containing the parsed Shiller annual returns as a const array. +/// +/// Uses the same algorithm as the former comptime parser: +/// - Skip 8 header lines +/// - Scan each line, fast-reject non-January by checking "YYYY.01," at fixed offsets +/// - Parse cols 4 (CPI), 6 (GS10), 9 (TR Price) via CsvFieldIterator +/// - Compute year-over-year returns on the fly +/// - Skip ~11 months after each January row +const std = @import("std"); +const ShillerYear = @import("shiller").ShillerYear; + +pub fn main() !void { + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const args = try std.process.argsAlloc(allocator); + if (args.len < 3) { + std.debug.print("Usage: gen_shiller \n", .{}); + std.process.exit(1); + } + + const csv_data = try std.fs.cwd().readFileAlloc(allocator, args[1], 10 * 1024 * 1024); + + var results: [200]ShillerYear = undefined; + + // Write output .zig file — just raw parallel arrays, no type dependencies. + const out_file = try std.fs.cwd().createFile(args[2], .{}); + defer out_file.close(); + + const parsed = try parseCsv(csv_data, &results); + var out_buf: [1024]u8 = undefined; + var file_writer = out_file.writer(&out_buf); + const writer = &file_writer.interface; + try writer.writeAll( + \\// Auto-generated from ie_data.csv — do not edit. + \\// Regenerate: zig build (runs build/gen_shiller.zig) + \\ + \\const ShillerYear = @import("shiller").ShillerYear; + \\ + \\pub const data = [_]ShillerYear{ + \\ + ); + + for (parsed) |p| + try writer.print( + " .{{ .year = {d}, .sp500_total_return = {d}, .bond_total_return = {d}, .cpi_inflation = {d} }},\n", + .{ p.year, p.sp500_total_return, p.bond_total_return, p.cpi_inflation }, + ); + + try writer.writeAll("};\n"); + try writer.flush(); +} + +fn parseCsv(csv_data: []const u8, buffer: []ShillerYear) ![]ShillerYear { + var prev_year: u16 = 0; + var prev_tr_price: f64 = 0; + var prev_gs10: f64 = 0; + var prev_cpi: f64 = 0; + + // Skip 8 header lines + var pos: usize = 0; + var newlines: usize = 0; + while (pos < csv_data.len and newlines < 8) : (pos += 1) { + if (csv_data[pos] == '\n') newlines += 1; + } + + const skip_bytes: usize = 11 * 96; + + var current: usize = 0; + while (pos < csv_data.len) { + const line_start = pos; + while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; + const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos; + if (pos < csv_data.len) pos += 1; + + const line = csv_data[line_start..line_end]; + if (line.len < 7) continue; + + // Fast reject: date is "YYYY.01," + if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue; + + const year = std.fmt.parseInt(u16, line[0..4], 10) catch continue; + + // Parse fields via CsvFieldIterator + var col_iter = CsvFieldIterator{ .data = line }; + _ = col_iter.next(); // col 0: Date + _ = col_iter.next(); // col 1: P + _ = col_iter.next(); // col 2: D + _ = col_iter.next(); // col 3: E + const cpi_field = col_iter.next() orelse continue; + _ = col_iter.next(); // col 5 + const gs10_field = col_iter.next() orelse continue; + _ = col_iter.next(); // col 7 + _ = col_iter.next(); // col 8 + const tr_price_field = col_iter.next() orelse continue; + + const cpi = parseF64WithCommas(cpi_field) orelse continue; + const gs10 = parseF64WithCommas(gs10_field) orelse continue; + const tr_price = parseF64WithCommas(tr_price_field) orelse continue; + + if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; + + if (prev_year > 0 and year == prev_year + 1) { + const cpi_change = (cpi / prev_cpi) - 1.0; + const real_sp500 = (tr_price / prev_tr_price) - 1.0; + + buffer[current] = .{ + .year = prev_year, + .sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0, + .bond_total_return = prev_gs10 / 100.0, + .cpi_inflation = cpi_change, + }; + current += 1; + } + + prev_year = year; + prev_tr_price = tr_price; + prev_gs10 = gs10; + prev_cpi = cpi; + + // Skip ~11 months + pos = @min(pos + skip_bytes, csv_data.len); + while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; + if (pos < csv_data.len) pos += 1; + } + return buffer[0..current]; +} +// --- Parsing helpers (same as shiller.zig's former comptime versions) --- + +const CsvFieldIterator = struct { + data: []const u8, + pos: usize = 0, + + fn next(self: *CsvFieldIterator) ?[]const u8 { + if (self.pos > self.data.len) return null; + if (self.pos == self.data.len) { + self.pos = self.data.len + 1; + return ""; + } + + const start = self.pos; + + // Handle quoted fields + if (self.pos < self.data.len and self.data[self.pos] == '"') { + self.pos += 1; + const qstart = self.pos; + while (self.pos < self.data.len) { + if (self.data[self.pos] == '"') { + if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') { + self.pos += 2; + } else { + break; + } + } else { + self.pos += 1; + } + } + const qend = self.pos; + if (self.pos < self.data.len) self.pos += 1; + if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; + return self.data[qstart..qend]; + } + + // Unquoted field + while (self.pos < self.data.len and self.data[self.pos] != ',') { + self.pos += 1; + } + const end = self.pos; + if (self.pos < self.data.len) self.pos += 1; + return self.data[start..end]; + } +}; + +fn parseF64WithCommas(s: []const u8) ?f64 { + if (s.len == 0) return null; + + var start: usize = 0; + var end: usize = s.len; + while (start < end and s[start] == ' ') start += 1; + while (end > start and s[end - 1] == ' ') end -= 1; + if (start >= end) return null; + + var negative = false; + if (s[start] == '-') { + negative = true; + start += 1; + } + + var integer_part: f64 = 0; + var i = start; + while (i < end and s[i] != '.') : (i += 1) { + if (s[i] == ',') continue; + if (s[i] < '0' or s[i] > '9') return null; + integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0')); + } + + var frac_part: f64 = 0; + if (i < end and s[i] == '.') { + i += 1; + var divisor: f64 = 10.0; + while (i < end) : (i += 1) { + if (s[i] < '0' or s[i] > '9') return null; + frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor; + divisor *= 10.0; + } + } + + const result = integer_part + frac_part; + return if (negative) -result else result; +} diff --git a/src/data/shiller.zig b/src/data/shiller.zig index f897695..2542d87 100644 --- a/src/data/shiller.zig +++ b/src/data/shiller.zig @@ -7,31 +7,15 @@ /// 2. Open in LibreOffice Calc, select the "Data" tab /// 3. File → Save As → CSV (ie_data.csv) /// 4. Replace src/data/ie_data.csv with the new file -/// 5. Rebuild — the data is parsed at comptime -/// -/// The CSV contains monthly observations from 1871 to present. This module -/// extracts January rows and computes year-over-year returns for: -/// - S&P 500 total return (price + dividends reinvested) -/// - 10-year Treasury bond total return -/// - CPI inflation +/// 5. Rebuild — build/gen_shiller.zig regenerates the data automatically /// /// All returns are nominal, expressed as decimals (0.12 = 12%). const std = @import("std"); +const generated = @import("shiller_generated"); +pub const ShillerYear = @import("shiller_year").ShillerYear; -pub const ShillerYear = struct { - year: u16, - /// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%) - sp500_total_return: f64, - /// 10-year Treasury bond total return (decimal) - bond_total_return: f64, - /// CPI inflation rate (decimal) - cpi_inflation: f64, -}; - -/// Comptime-parsed annual returns from the embedded Shiller CSV. -/// Each entry represents one calendar year's returns, computed from -/// January-to-January changes in the cumulative index columns. -pub const annual_returns: []const ShillerYear = parseShillerData(); +/// Annual returns from the Shiller dataset, generated at build time from ie_data.csv. +pub const annual_returns: []const ShillerYear = &generated.data; /// Number of available years of historical data. pub const year_count: usize = annual_returns.len; @@ -56,202 +40,15 @@ pub fn maxCycles(horizon: u16) usize { return span - horizon + 1; } -// --- Comptime CSV parsing --- - -const csv_data = @embedFile("ie_data.csv"); - -fn parseShillerData() []const ShillerYear { - @setEvalBranchQuota(120_000); - - var results: [200]ShillerYear = undefined; - var result_count: usize = 0; - - var prev_year: u16 = 0; - var prev_tr_price: f64 = 0; - var prev_gs10: f64 = 0; - var prev_cpi: f64 = 0; - - // Skip header (8 lines) - var pos: usize = 0; - var newlines: usize = 0; - while (pos < csv_data.len and newlines < 8) : (pos += 1) { - if (csv_data[pos] == '\n') newlines += 1; - } - - // Lines are ~135 bytes. After each January row, the next January is - // ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes, - // then scan forward to the next line boundary. This avoids scanning - // ~90% of the file byte-by-byte. - const skip_bytes = 11 * 96; - - while (pos < csv_data.len) { - // Find current line - const line_start = pos; - while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; - const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos; - if (pos < csv_data.len) pos += 1; - - const line = csv_data[line_start..line_end]; - if (line.len < 7) continue; - - // Fast reject: date is "YYYY.01," - if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue; - - const year = std.fmt.parseInt(u16, line[0..4], 10) catch continue; - - // Parse fields for this January row - var col_iter = CsvFieldIterator{ .data = line }; - _ = col_iter.next(); // col 0: Date - _ = col_iter.next(); // col 1: P - _ = col_iter.next(); // col 2: D - _ = col_iter.next(); // col 3: E - const cpi_field = col_iter.next() orelse continue; - _ = col_iter.next(); // col 5 - const gs10_field = col_iter.next() orelse continue; - _ = col_iter.next(); // col 7 - _ = col_iter.next(); // col 8 - const tr_price_field = col_iter.next() orelse continue; - - const cpi = parseF64WithCommas(cpi_field) orelse continue; - const gs10 = parseF64WithCommas(gs10_field) orelse continue; - const tr_price = parseF64WithCommas(tr_price_field) orelse continue; - - if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; - - // Compute return from previous January - if (prev_year > 0 and year == prev_year + 1) { - const cpi_change = (cpi / prev_cpi) - 1.0; - const real_sp500 = (tr_price / prev_tr_price) - 1.0; - - results[result_count] = .{ - .year = prev_year, - .sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0, - .bond_total_return = prev_gs10 / 100.0, - .cpi_inflation = cpi_change, - }; - result_count += 1; - } - - prev_year = year; - prev_tr_price = tr_price; - prev_gs10 = gs10; - prev_cpi = cpi; - - // Skip ahead ~11 months of data - pos = @min(pos + skip_bytes, csv_data.len); - // Realign to next line boundary - while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; - if (pos < csv_data.len) pos += 1; - } - - if (result_count == 0) return &.{}; - - const final = blk: { - var arr: [result_count]ShillerYear = undefined; - for (0..result_count) |i| { - arr[i] = results[i]; - } - break :blk arr; - }; - return &final; -} - -// --- Comptime parsing helpers --- - -const CsvFieldIterator = struct { - data: []const u8, - pos: usize = 0, - - fn next(self: *CsvFieldIterator) ?[]const u8 { - if (self.pos > self.data.len) return null; - if (self.pos == self.data.len) { - self.pos = self.data.len + 1; - return ""; - } - - const start = self.pos; - - // Handle quoted fields - if (self.pos < self.data.len and self.data[self.pos] == '"') { - self.pos += 1; // skip opening quote - const qstart = self.pos; - while (self.pos < self.data.len) { - if (self.data[self.pos] == '"') { - if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') { - self.pos += 2; // escaped quote - } else { - break; // end quote - } - } else { - self.pos += 1; - } - } - const qend = self.pos; - if (self.pos < self.data.len) self.pos += 1; // skip closing quote - if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma - return self.data[qstart..qend]; - } - - // Unquoted field - while (self.pos < self.data.len and self.data[self.pos] != ',') { - self.pos += 1; - } - const end = self.pos; - if (self.pos < self.data.len) self.pos += 1; // skip comma - return self.data[start..end]; - } -}; - -fn parseF64WithCommas(s: []const u8) ?f64 { - if (s.len == 0) return null; - - // Strip leading/trailing spaces - var start: usize = 0; - var end: usize = s.len; - while (start < end and s[start] == ' ') start += 1; - while (end > start and s[end - 1] == ' ') end -= 1; - if (start >= end) return null; - - var negative = false; - if (s[start] == '-') { - negative = true; - start += 1; - } - - var integer_part: f64 = 0; - var i = start; - while (i < end and s[i] != '.') : (i += 1) { - if (s[i] == ',') continue; // skip commas - if (s[i] < '0' or s[i] > '9') return null; - integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0')); - } - - var frac_part: f64 = 0; - if (i < end and s[i] == '.') { - i += 1; - var divisor: f64 = 10.0; - while (i < end) : (i += 1) { - if (s[i] < '0' or s[i] > '9') return null; - frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor; - divisor *= 10.0; - } - } - - const result = integer_part + frac_part; - return if (negative) -result else result; -} - // --- Tests --- test "annual returns are populated" { - // Should have data from 1871 to at least 2024 try std.testing.expect(annual_returns.len >= 150); try std.testing.expectEqual(@as(u16, 1871), first_year); try std.testing.expect(last_year >= 2024); } test "spot check 2008 crash" { - // 2008: S&P 500 nominal total return was approximately -37% for (annual_returns) |yr| { if (yr.year == 2008) { try std.testing.expect(yr.sp500_total_return < -0.30); @@ -263,7 +60,6 @@ test "spot check 2008 crash" { } test "spot check 1929 crash" { - // 1929: S&P 500 nominal total return should be significantly negative for (annual_returns) |yr| { if (yr.year == 1929) { try std.testing.expect(yr.sp500_total_return < -0.05); @@ -274,82 +70,47 @@ test "spot check 1929 crash" { } test "realReturn calculation" { - // 10% nominal with 3% inflation = ~6.8% real const real = realReturn(0.10, 0.03); try std.testing.expectApproxEqAbs(0.06796, real, 0.001); } test "maxCycles" { - // With ~154 years of data, a 30-year horizon should give ~124 cycles const cycles = maxCycles(30); try std.testing.expect(cycles >= 120); try std.testing.expect(cycles <= 130); - - // Horizon longer than data should give 0 try std.testing.expectEqual(@as(usize, 0), maxCycles(200)); } test "spot check known annual total returns" { - // Nominal total returns (real from TR Price + CPI). - // Jan-to-Jan measurement, so timing differs from calendar-year figures. const checks = [_]struct { year: u16, min: f64, max: f64 }{ - .{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year - .{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery - .{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis - .{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC - .{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery - .{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID + .{ .year = 1931, .min = -0.55, .max = -0.25 }, + .{ .year = 1933, .min = 0.30, .max = 0.80 }, + .{ .year = 1974, .min = -0.40, .max = -0.05 }, + .{ .year = 2008, .min = -0.50, .max = -0.25 }, + .{ .year = 2009, .min = 0.15, .max = 0.50 }, + .{ .year = 2021, .min = 0.10, .max = 0.45 }, }; for (checks) |chk| { - var found = false; for (annual_returns) |yr| { if (yr.year == chk.year) { - found = true; if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) { - std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{ - chk.year, yr.sp500_total_return, chk.min, chk.max, - }); return error.TestExpectedEqual; } break; } } - if (!found) { - std.debug.print("Year {d} not found in data\n", .{chk.year}); - return error.TestExpectedEqual; - } } } test "bond returns are reasonable" { - // Bond returns (GS10 yield) should be between 0% and 16% for (annual_returns) |yr| { - if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) { - std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{ - yr.year, yr.bond_total_return, - }); - return error.TestExpectedEqual; - } + if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) return error.TestExpectedEqual; } } test "CPI inflation is reasonable" { - // CPI should generally be between -20% and +25% (19th century had severe deflation) for (annual_returns) |yr| { - if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) { - std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{ - yr.year, yr.cpi_inflation, - }); - return error.TestExpectedEqual; - } + if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) return error.TestExpectedEqual; } } - -test "parseF64WithCommas" { - try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01); - try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01); - try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01); - try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("")); - try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA")); -} diff --git a/src/models/shiller_year.zig b/src/models/shiller_year.zig new file mode 100644 index 0000000..d18a79d --- /dev/null +++ b/src/models/shiller_year.zig @@ -0,0 +1,10 @@ +pub const ShillerYear = struct { + /// A single year's market returns from the Shiller CAPE dataset. + year: u16, + /// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%) + sp500_total_return: f64, + /// 10-year Treasury bond total return (decimal) + bond_total_return: f64, + /// CPI inflation rate (decimal) + cpi_inflation: f64, +};