/// Build-time generator: reads ie_data.csv and outputs a Zig source file /// containing the parsed Shiller annual returns as a const array. /// /// Uses the same algorithm as the former comptime parser: /// - Skip 8 header lines /// - Scan each line, fast-reject non-January by checking "YYYY.01," at fixed offsets /// - Parse cols 4 (CPI), 6 (GS10), 9 (TR Price) via CsvFieldIterator /// - Compute year-over-year returns on the fly /// - Skip ~11 months after each January row const std = @import("std"); const ShillerYear = @import("shiller").ShillerYear; pub fn main() !void { var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena.deinit(); const allocator = arena.allocator(); const args = try std.process.argsAlloc(allocator); if (args.len < 3) { std.debug.print("Usage: gen_shiller \n", .{}); std.process.exit(1); } const csv_data = try std.fs.cwd().readFileAlloc(allocator, args[1], 10 * 1024 * 1024); var results: [200]ShillerYear = undefined; // Write output .zig file — just raw parallel arrays, no type dependencies. const out_file = try std.fs.cwd().createFile(args[2], .{}); defer out_file.close(); const parsed = try parseCsv(csv_data, &results); var out_buf: [1024]u8 = undefined; var file_writer = out_file.writer(&out_buf); const writer = &file_writer.interface; try writer.writeAll( \\// Auto-generated from ie_data.csv — do not edit. \\// Regenerate: zig build (runs build/gen_shiller.zig) \\ \\const ShillerYear = @import("shiller").ShillerYear; \\ \\pub const data = [_]ShillerYear{ \\ ); for (parsed) |p| try writer.print( " .{{ .year = {d}, .sp500_total_return = {d}, .bond_total_return = {d}, .cpi_inflation = {d} }},\n", .{ p.year, p.sp500_total_return, p.bond_total_return, p.cpi_inflation }, ); try writer.writeAll("};\n"); try writer.flush(); } fn parseCsv(csv_data: []const u8, buffer: []ShillerYear) ![]ShillerYear { var prev_year: u16 = 0; var prev_tr_price: f64 = 0; var prev_gs10: f64 = 0; var prev_cpi: f64 = 0; // Skip 8 header lines var pos: usize = 0; var newlines: usize = 0; while (pos < csv_data.len and newlines < 8) : (pos += 1) { if (csv_data[pos] == '\n') newlines += 1; } const skip_bytes: usize = 11 * 96; var current: usize = 0; while (pos < csv_data.len) { const line_start = pos; while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos; if (pos < csv_data.len) pos += 1; const line = csv_data[line_start..line_end]; if (line.len < 7) continue; // Fast reject: date is "YYYY.01," if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue; const year = std.fmt.parseInt(u16, line[0..4], 10) catch continue; // Parse fields via CsvFieldIterator var col_iter = CsvFieldIterator{ .data = line }; _ = col_iter.next(); // col 0: Date _ = col_iter.next(); // col 1: P _ = col_iter.next(); // col 2: D _ = col_iter.next(); // col 3: E const cpi_field = col_iter.next() orelse continue; _ = col_iter.next(); // col 5 const gs10_field = col_iter.next() orelse continue; _ = col_iter.next(); // col 7 _ = col_iter.next(); // col 8 const tr_price_field = col_iter.next() orelse continue; const cpi = parseF64WithCommas(cpi_field) orelse continue; const gs10 = parseF64WithCommas(gs10_field) orelse continue; const tr_price = parseF64WithCommas(tr_price_field) orelse continue; if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; if (prev_year > 0 and year == prev_year + 1) { const cpi_change = (cpi / prev_cpi) - 1.0; const real_sp500 = (tr_price / prev_tr_price) - 1.0; buffer[current] = .{ .year = prev_year, .sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0, .bond_total_return = prev_gs10 / 100.0, .cpi_inflation = cpi_change, }; current += 1; } prev_year = year; prev_tr_price = tr_price; prev_gs10 = gs10; prev_cpi = cpi; // Skip ~11 months pos = @min(pos + skip_bytes, csv_data.len); while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; if (pos < csv_data.len) pos += 1; } return buffer[0..current]; } // --- Parsing helpers (same as shiller.zig's former comptime versions) --- const CsvFieldIterator = struct { data: []const u8, pos: usize = 0, fn next(self: *CsvFieldIterator) ?[]const u8 { if (self.pos > self.data.len) return null; if (self.pos == self.data.len) { self.pos = self.data.len + 1; return ""; } const start = self.pos; // Handle quoted fields if (self.pos < self.data.len and self.data[self.pos] == '"') { self.pos += 1; const qstart = self.pos; while (self.pos < self.data.len) { if (self.data[self.pos] == '"') { if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') { self.pos += 2; } else { break; } } else { self.pos += 1; } } const qend = self.pos; if (self.pos < self.data.len) self.pos += 1; if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; return self.data[qstart..qend]; } // Unquoted field while (self.pos < self.data.len and self.data[self.pos] != ',') { self.pos += 1; } const end = self.pos; if (self.pos < self.data.len) self.pos += 1; return self.data[start..end]; } }; fn parseF64WithCommas(s: []const u8) ?f64 { if (s.len == 0) return null; var start: usize = 0; var end: usize = s.len; while (start < end and s[start] == ' ') start += 1; while (end > start and s[end - 1] == ' ') end -= 1; if (start >= end) return null; var negative = false; if (s[start] == '-') { negative = true; start += 1; } var integer_part: f64 = 0; var i = start; while (i < end and s[i] != '.') : (i += 1) { if (s[i] == ',') continue; if (s[i] < '0' or s[i] > '9') return null; integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0')); } var frac_part: f64 = 0; if (i < end and s[i] == '.') { i += 1; var divisor: f64 = 10.0; while (i < end) : (i += 1) { if (s[i] < '0' or s[i] > '9') return null; frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor; divisor *= 10.0; } } const result = integer_part + frac_part; return if (negative) -result else result; }