move generation to a build step - comptime just too slow

This commit is contained in:
Emil Lerch 2026-04-27 21:29:03 -07:00
parent a129d5a372
commit 6991e31bdd
Signed by: lobo
GPG key ID: A7B62D657EF764F8
5 changed files with 270 additions and 254 deletions

View file

@ -150,3 +150,7 @@ Tests use `std.testing.allocator` (which detects leaks) and are structured as un
| [SRF](https://git.lerch.org/lobo/srf) | Cache file format, portfolio/watchlist parsing, serialization |
| [libvaxis](https://github.com/rockorager/libvaxis) (v0.5.1) | Terminal UI rendering |
| [z2d](https://github.com/vancluever/z2d) (v0.10.0) | Pixel chart rendering (Kitty graphics protocol) |
## Build system rules
- **Never use `addAnonymousImport`** in `build.zig`. Always use `b.addModule()` + `addImport()`. Anonymous imports cause "file belongs to multiple modules" errors and make dependency wiring opaque.

View file

@ -23,6 +23,10 @@ pub fn build(b: *std.Build) void {
const srf_mod = srf_dep.module("srf");
const shiller_mod = b.addModule("shiller_year", .{
.root_source_file = b.path("src/models/shiller_year.zig"),
});
// Build-time info: version string (from git describe) and build timestamp.
// Exposed to application code as `@import("build_info")`.
//
@ -51,8 +55,29 @@ pub fn build(b: *std.Build) void {
.{ .name = "vaxis", .module = vaxis_dep.module("vaxis") },
.{ .name = "z2d", .module = z2d_dep.module("z2d") },
.{ .name = "build_info", .module = build_info },
.{ .name = "shiller_year", .module = shiller_mod },
};
// Generate Shiller annual returns data from ie_data.csv.
// Runs build/gen_shiller.zig as a native tool; outputs a .zig file
// that shiller.zig imports as a zero-cost const array.
const gen_shiller = b.addExecutable(.{
.name = "gen_shiller",
.root_module = b.createModule(.{
.root_source_file = b.path("build/gen_shiller.zig"),
.target = b.graph.host,
}),
});
gen_shiller.root_module.addImport("shiller", shiller_mod);
const gen_shiller_run = b.addRunArtifact(gen_shiller);
gen_shiller_run.addFileArg(b.path("src/data/ie_data.csv"));
const shiller_generated = gen_shiller_run.addOutputFileArg("shiller_generated.zig");
const shiller_generated_mod = b.addModule("shiller_generated", .{
.root_source_file = shiller_generated,
});
shiller_generated_mod.addImport("shiller", shiller_mod);
// Unified executable (CLI + TUI in one binary)
const exe = b.addExecutable(.{
.name = "zfin",
@ -63,6 +88,7 @@ pub fn build(b: *std.Build) void {
.imports = imports,
}),
});
exe.root_module.addImport("shiller_generated", shiller_generated_mod);
b.installArtifact(exe);
// Run step: `zig build run -- <args>`
@ -83,6 +109,7 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
.imports = imports,
}) });
tests.root_module.addImport("shiller_generated", shiller_generated_mod);
test_step.dependOn(&b.addRunArtifact(tests).step);
// Docs (still uses the library module for clean public API docs)
@ -108,12 +135,14 @@ pub fn build(b: *std.Build) void {
// Coverage: `zig build coverage` (uses kcov, Linux x86_64/aarch64 only)
{
var cov = Coverage.init(b);
_ = cov.addModule(b.createModule(.{
const cov_mod = b.createModule(.{
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
.imports = imports,
}), "zfin");
});
cov_mod.addImport("shiller_generated", shiller_generated_mod);
_ = cov.addModule(cov_mod, "zfin");
}
}

212
build/gen_shiller.zig Normal file
View file

@ -0,0 +1,212 @@
/// Build-time generator: reads ie_data.csv and outputs a Zig source file
/// containing the parsed Shiller annual returns as a const array.
///
/// Uses the same algorithm as the former comptime parser:
/// - Skip 8 header lines
/// - Scan each line, fast-reject non-January by checking "YYYY.01," at fixed offsets
/// - Parse cols 4 (CPI), 6 (GS10), 9 (TR Price) via CsvFieldIterator
/// - Compute year-over-year returns on the fly
/// - Skip ~11 months after each January row
const std = @import("std");
const ShillerYear = @import("shiller").ShillerYear;
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
const args = try std.process.argsAlloc(allocator);
if (args.len < 3) {
std.debug.print("Usage: gen_shiller <ie_data.csv> <output.zig>\n", .{});
std.process.exit(1);
}
const csv_data = try std.fs.cwd().readFileAlloc(allocator, args[1], 10 * 1024 * 1024);
var results: [200]ShillerYear = undefined;
// Write output .zig file just raw parallel arrays, no type dependencies.
const out_file = try std.fs.cwd().createFile(args[2], .{});
defer out_file.close();
const parsed = try parseCsv(csv_data, &results);
var out_buf: [1024]u8 = undefined;
var file_writer = out_file.writer(&out_buf);
const writer = &file_writer.interface;
try writer.writeAll(
\\// Auto-generated from ie_data.csv — do not edit.
\\// Regenerate: zig build (runs build/gen_shiller.zig)
\\
\\const ShillerYear = @import("shiller").ShillerYear;
\\
\\pub const data = [_]ShillerYear{
\\
);
for (parsed) |p|
try writer.print(
" .{{ .year = {d}, .sp500_total_return = {d}, .bond_total_return = {d}, .cpi_inflation = {d} }},\n",
.{ p.year, p.sp500_total_return, p.bond_total_return, p.cpi_inflation },
);
try writer.writeAll("};\n");
try writer.flush();
}
fn parseCsv(csv_data: []const u8, buffer: []ShillerYear) ![]ShillerYear {
var prev_year: u16 = 0;
var prev_tr_price: f64 = 0;
var prev_gs10: f64 = 0;
var prev_cpi: f64 = 0;
// Skip 8 header lines
var pos: usize = 0;
var newlines: usize = 0;
while (pos < csv_data.len and newlines < 8) : (pos += 1) {
if (csv_data[pos] == '\n') newlines += 1;
}
const skip_bytes: usize = 11 * 96;
var current: usize = 0;
while (pos < csv_data.len) {
const line_start = pos;
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
if (pos < csv_data.len) pos += 1;
const line = csv_data[line_start..line_end];
if (line.len < 7) continue;
// Fast reject: date is "YYYY.01,"
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
const year = std.fmt.parseInt(u16, line[0..4], 10) catch continue;
// Parse fields via CsvFieldIterator
var col_iter = CsvFieldIterator{ .data = line };
_ = col_iter.next(); // col 0: Date
_ = col_iter.next(); // col 1: P
_ = col_iter.next(); // col 2: D
_ = col_iter.next(); // col 3: E
const cpi_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 5
const gs10_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 7
_ = col_iter.next(); // col 8
const tr_price_field = col_iter.next() orelse continue;
const cpi = parseF64WithCommas(cpi_field) orelse continue;
const gs10 = parseF64WithCommas(gs10_field) orelse continue;
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
if (prev_year > 0 and year == prev_year + 1) {
const cpi_change = (cpi / prev_cpi) - 1.0;
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
buffer[current] = .{
.year = prev_year,
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
.bond_total_return = prev_gs10 / 100.0,
.cpi_inflation = cpi_change,
};
current += 1;
}
prev_year = year;
prev_tr_price = tr_price;
prev_gs10 = gs10;
prev_cpi = cpi;
// Skip ~11 months
pos = @min(pos + skip_bytes, csv_data.len);
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
if (pos < csv_data.len) pos += 1;
}
return buffer[0..current];
}
// --- Parsing helpers (same as shiller.zig's former comptime versions) ---
const CsvFieldIterator = struct {
data: []const u8,
pos: usize = 0,
fn next(self: *CsvFieldIterator) ?[]const u8 {
if (self.pos > self.data.len) return null;
if (self.pos == self.data.len) {
self.pos = self.data.len + 1;
return "";
}
const start = self.pos;
// Handle quoted fields
if (self.pos < self.data.len and self.data[self.pos] == '"') {
self.pos += 1;
const qstart = self.pos;
while (self.pos < self.data.len) {
if (self.data[self.pos] == '"') {
if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') {
self.pos += 2;
} else {
break;
}
} else {
self.pos += 1;
}
}
const qend = self.pos;
if (self.pos < self.data.len) self.pos += 1;
if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1;
return self.data[qstart..qend];
}
// Unquoted field
while (self.pos < self.data.len and self.data[self.pos] != ',') {
self.pos += 1;
}
const end = self.pos;
if (self.pos < self.data.len) self.pos += 1;
return self.data[start..end];
}
};
fn parseF64WithCommas(s: []const u8) ?f64 {
if (s.len == 0) return null;
var start: usize = 0;
var end: usize = s.len;
while (start < end and s[start] == ' ') start += 1;
while (end > start and s[end - 1] == ' ') end -= 1;
if (start >= end) return null;
var negative = false;
if (s[start] == '-') {
negative = true;
start += 1;
}
var integer_part: f64 = 0;
var i = start;
while (i < end and s[i] != '.') : (i += 1) {
if (s[i] == ',') continue;
if (s[i] < '0' or s[i] > '9') return null;
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
}
var frac_part: f64 = 0;
if (i < end and s[i] == '.') {
i += 1;
var divisor: f64 = 10.0;
while (i < end) : (i += 1) {
if (s[i] < '0' or s[i] > '9') return null;
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
divisor *= 10.0;
}
}
const result = integer_part + frac_part;
return if (negative) -result else result;
}

View file

@ -7,31 +7,15 @@
/// 2. Open in LibreOffice Calc, select the "Data" tab
/// 3. File Save As CSV (ie_data.csv)
/// 4. Replace src/data/ie_data.csv with the new file
/// 5. Rebuild the data is parsed at comptime
///
/// The CSV contains monthly observations from 1871 to present. This module
/// extracts January rows and computes year-over-year returns for:
/// - S&P 500 total return (price + dividends reinvested)
/// - 10-year Treasury bond total return
/// - CPI inflation
/// 5. Rebuild build/gen_shiller.zig regenerates the data automatically
///
/// All returns are nominal, expressed as decimals (0.12 = 12%).
const std = @import("std");
const generated = @import("shiller_generated");
pub const ShillerYear = @import("shiller_year").ShillerYear;
pub const ShillerYear = struct {
year: u16,
/// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%)
sp500_total_return: f64,
/// 10-year Treasury bond total return (decimal)
bond_total_return: f64,
/// CPI inflation rate (decimal)
cpi_inflation: f64,
};
/// Comptime-parsed annual returns from the embedded Shiller CSV.
/// Each entry represents one calendar year's returns, computed from
/// January-to-January changes in the cumulative index columns.
pub const annual_returns: []const ShillerYear = parseShillerData();
/// Annual returns from the Shiller dataset, generated at build time from ie_data.csv.
pub const annual_returns: []const ShillerYear = &generated.data;
/// Number of available years of historical data.
pub const year_count: usize = annual_returns.len;
@ -56,202 +40,15 @@ pub fn maxCycles(horizon: u16) usize {
return span - horizon + 1;
}
// --- Comptime CSV parsing ---
const csv_data = @embedFile("ie_data.csv");
fn parseShillerData() []const ShillerYear {
@setEvalBranchQuota(120_000);
var results: [200]ShillerYear = undefined;
var result_count: usize = 0;
var prev_year: u16 = 0;
var prev_tr_price: f64 = 0;
var prev_gs10: f64 = 0;
var prev_cpi: f64 = 0;
// Skip header (8 lines)
var pos: usize = 0;
var newlines: usize = 0;
while (pos < csv_data.len and newlines < 8) : (pos += 1) {
if (csv_data[pos] == '\n') newlines += 1;
}
// Lines are ~135 bytes. After each January row, the next January is
// ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes,
// then scan forward to the next line boundary. This avoids scanning
// ~90% of the file byte-by-byte.
const skip_bytes = 11 * 96;
while (pos < csv_data.len) {
// Find current line
const line_start = pos;
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
if (pos < csv_data.len) pos += 1;
const line = csv_data[line_start..line_end];
if (line.len < 7) continue;
// Fast reject: date is "YYYY.01,"
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
const year = std.fmt.parseInt(u16, line[0..4], 10) catch continue;
// Parse fields for this January row
var col_iter = CsvFieldIterator{ .data = line };
_ = col_iter.next(); // col 0: Date
_ = col_iter.next(); // col 1: P
_ = col_iter.next(); // col 2: D
_ = col_iter.next(); // col 3: E
const cpi_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 5
const gs10_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 7
_ = col_iter.next(); // col 8
const tr_price_field = col_iter.next() orelse continue;
const cpi = parseF64WithCommas(cpi_field) orelse continue;
const gs10 = parseF64WithCommas(gs10_field) orelse continue;
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
// Compute return from previous January
if (prev_year > 0 and year == prev_year + 1) {
const cpi_change = (cpi / prev_cpi) - 1.0;
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
results[result_count] = .{
.year = prev_year,
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
.bond_total_return = prev_gs10 / 100.0,
.cpi_inflation = cpi_change,
};
result_count += 1;
}
prev_year = year;
prev_tr_price = tr_price;
prev_gs10 = gs10;
prev_cpi = cpi;
// Skip ahead ~11 months of data
pos = @min(pos + skip_bytes, csv_data.len);
// Realign to next line boundary
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
if (pos < csv_data.len) pos += 1;
}
if (result_count == 0) return &.{};
const final = blk: {
var arr: [result_count]ShillerYear = undefined;
for (0..result_count) |i| {
arr[i] = results[i];
}
break :blk arr;
};
return &final;
}
// --- Comptime parsing helpers ---
const CsvFieldIterator = struct {
data: []const u8,
pos: usize = 0,
fn next(self: *CsvFieldIterator) ?[]const u8 {
if (self.pos > self.data.len) return null;
if (self.pos == self.data.len) {
self.pos = self.data.len + 1;
return "";
}
const start = self.pos;
// Handle quoted fields
if (self.pos < self.data.len and self.data[self.pos] == '"') {
self.pos += 1; // skip opening quote
const qstart = self.pos;
while (self.pos < self.data.len) {
if (self.data[self.pos] == '"') {
if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') {
self.pos += 2; // escaped quote
} else {
break; // end quote
}
} else {
self.pos += 1;
}
}
const qend = self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip closing quote
if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma
return self.data[qstart..qend];
}
// Unquoted field
while (self.pos < self.data.len and self.data[self.pos] != ',') {
self.pos += 1;
}
const end = self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip comma
return self.data[start..end];
}
};
fn parseF64WithCommas(s: []const u8) ?f64 {
if (s.len == 0) return null;
// Strip leading/trailing spaces
var start: usize = 0;
var end: usize = s.len;
while (start < end and s[start] == ' ') start += 1;
while (end > start and s[end - 1] == ' ') end -= 1;
if (start >= end) return null;
var negative = false;
if (s[start] == '-') {
negative = true;
start += 1;
}
var integer_part: f64 = 0;
var i = start;
while (i < end and s[i] != '.') : (i += 1) {
if (s[i] == ',') continue; // skip commas
if (s[i] < '0' or s[i] > '9') return null;
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
}
var frac_part: f64 = 0;
if (i < end and s[i] == '.') {
i += 1;
var divisor: f64 = 10.0;
while (i < end) : (i += 1) {
if (s[i] < '0' or s[i] > '9') return null;
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
divisor *= 10.0;
}
}
const result = integer_part + frac_part;
return if (negative) -result else result;
}
// --- Tests ---
test "annual returns are populated" {
// Should have data from 1871 to at least 2024
try std.testing.expect(annual_returns.len >= 150);
try std.testing.expectEqual(@as(u16, 1871), first_year);
try std.testing.expect(last_year >= 2024);
}
test "spot check 2008 crash" {
// 2008: S&P 500 nominal total return was approximately -37%
for (annual_returns) |yr| {
if (yr.year == 2008) {
try std.testing.expect(yr.sp500_total_return < -0.30);
@ -263,7 +60,6 @@ test "spot check 2008 crash" {
}
test "spot check 1929 crash" {
// 1929: S&P 500 nominal total return should be significantly negative
for (annual_returns) |yr| {
if (yr.year == 1929) {
try std.testing.expect(yr.sp500_total_return < -0.05);
@ -274,82 +70,47 @@ test "spot check 1929 crash" {
}
test "realReturn calculation" {
// 10% nominal with 3% inflation = ~6.8% real
const real = realReturn(0.10, 0.03);
try std.testing.expectApproxEqAbs(0.06796, real, 0.001);
}
test "maxCycles" {
// With ~154 years of data, a 30-year horizon should give ~124 cycles
const cycles = maxCycles(30);
try std.testing.expect(cycles >= 120);
try std.testing.expect(cycles <= 130);
// Horizon longer than data should give 0
try std.testing.expectEqual(@as(usize, 0), maxCycles(200));
}
test "spot check known annual total returns" {
// Nominal total returns (real from TR Price + CPI).
// Jan-to-Jan measurement, so timing differs from calendar-year figures.
const checks = [_]struct { year: u16, min: f64, max: f64 }{
.{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year
.{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery
.{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis
.{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC
.{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery
.{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID
.{ .year = 1931, .min = -0.55, .max = -0.25 },
.{ .year = 1933, .min = 0.30, .max = 0.80 },
.{ .year = 1974, .min = -0.40, .max = -0.05 },
.{ .year = 2008, .min = -0.50, .max = -0.25 },
.{ .year = 2009, .min = 0.15, .max = 0.50 },
.{ .year = 2021, .min = 0.10, .max = 0.45 },
};
for (checks) |chk| {
var found = false;
for (annual_returns) |yr| {
if (yr.year == chk.year) {
found = true;
if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) {
std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{
chk.year, yr.sp500_total_return, chk.min, chk.max,
});
return error.TestExpectedEqual;
}
break;
}
}
if (!found) {
std.debug.print("Year {d} not found in data\n", .{chk.year});
return error.TestExpectedEqual;
}
}
}
test "bond returns are reasonable" {
// Bond returns (GS10 yield) should be between 0% and 16%
for (annual_returns) |yr| {
if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) {
std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{
yr.year, yr.bond_total_return,
});
return error.TestExpectedEqual;
}
if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) return error.TestExpectedEqual;
}
}
test "CPI inflation is reasonable" {
// CPI should generally be between -20% and +25% (19th century had severe deflation)
for (annual_returns) |yr| {
if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) {
std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{
yr.year, yr.cpi_inflation,
});
return error.TestExpectedEqual;
}
if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) return error.TestExpectedEqual;
}
}
test "parseF64WithCommas" {
try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01);
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas(""));
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA"));
}

View file

@ -0,0 +1,10 @@
pub const ShillerYear = struct {
/// A single year's market returns from the Shiller CAPE dataset.
year: u16,
/// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%)
sp500_total_return: f64,
/// 10-year Treasury bond total return (decimal)
bond_total_return: f64,
/// CPI inflation rate (decimal)
cpi_inflation: f64,
};