first pass optimization of shiller data

This commit is contained in:
Emil Lerch 2026-04-27 19:45:15 -07:00
parent 4f75c2e006
commit fe0e10eaca
Signed by: lobo
GPG key ID: A7B62D657EF764F8

View file

@ -61,58 +61,56 @@ pub fn maxCycles(horizon: u16) usize {
const csv_data = @embedFile("ie_data.csv");
fn parseShillerData() []const ShillerYear {
@setEvalBranchQuota(1_000_000);
@setEvalBranchQuota(120_000);
// First pass: collect January rows with their cumulative indices.
// We need January Total Return Price (col 9), Bond Returns (col 17), CPI (col 4).
const JanRow = struct {
year: u16,
tr_price: f64, // cumulative S&P 500 total return index (nominal)
gs10: f64, // 10-year Treasury yield (percentage points)
cpi: f64,
};
var results: [200]ShillerYear = undefined;
var result_count: usize = 0;
var jan_rows: [200]JanRow = undefined;
var jan_count: usize = 0;
var prev_year: u16 = 0;
var prev_tr_price: f64 = 0;
var prev_gs10: f64 = 0;
var prev_cpi: f64 = 0;
var line_iter = LineIterator{ .data = csv_data };
// Skip header lines (first 8 lines are headers)
for (0..8) |_| {
_ = line_iter.next();
// Skip header (8 lines)
var pos: usize = 0;
var newlines: usize = 0;
while (pos < csv_data.len and newlines < 8) : (pos += 1) {
if (csv_data[pos] == '\n') newlines += 1;
}
while (line_iter.next()) |line| {
if (line.len == 0) continue;
// Lines are ~135 bytes. After each January row, the next January is
// ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes,
// then scan forward to the next line boundary. This avoids scanning
// ~90% of the file byte-by-byte.
const skip_bytes = 11 * 96;
// Parse the date column to check if this is a January row
while (pos < csv_data.len) {
// Find current line
const line_start = pos;
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
if (pos < csv_data.len) pos += 1;
const line = csv_data[line_start..line_end];
if (line.len < 7) continue;
// Fast reject: date is "YYYY.01,"
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
const year = parseU16(line[0..4]) orelse continue;
// Parse fields for this January row
var col_iter = CsvFieldIterator{ .data = line };
const date_field = col_iter.next() orelse continue; // col 0: Date
if (date_field.len == 0) continue;
// Date format is "YYYY.MM" where January = ".01", October = ".1"
// (October drops the leading zero). Check for exactly ".01".
const dot_pos = indexOf(date_field, '.') orelse continue;
const year_str = date_field[0..dot_pos];
const month_str = date_field[dot_pos + 1 ..];
const year = parseU16(year_str) orelse continue;
// January is exactly "01" (2 chars). ".1" = October, ".11" = November.
if (month_str.len != 2) continue;
const month = parseU16(month_str) orelse continue;
if (month != 1) continue;
// Skip to the columns we need
_ = col_iter.next(); // col 0: Date
_ = col_iter.next(); // col 1: P
_ = col_iter.next(); // col 2: D
_ = col_iter.next(); // col 3: E
const cpi_field = col_iter.next() orelse continue; // col 4: CPI
_ = col_iter.next(); // col 5: Date Fraction
const gs10_field = col_iter.next() orelse continue; // col 6: Rate GS10
_ = col_iter.next(); // col 7: Real Price
_ = col_iter.next(); // col 8: Real Dividend
const tr_price_field = col_iter.next() orelse continue; // col 9: Total Return Price (nominal)
const cpi_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 5
const gs10_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 7
_ = col_iter.next(); // col 8
const tr_price_field = col_iter.next() orelse continue;
const cpi = parseF64(stripSpaces(cpi_field)) orelse continue;
const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue;
@ -120,44 +118,88 @@ fn parseShillerData() []const ShillerYear {
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
jan_rows[jan_count] = .{
.year = year,
.tr_price = tr_price,
.gs10 = gs10,
.cpi = cpi,
};
jan_count += 1;
// Compute return from previous January
if (prev_year > 0 and year == prev_year + 1) {
const cpi_change = (cpi / prev_cpi) - 1.0;
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
results[result_count] = .{
.year = prev_year,
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
.bond_total_return = prev_gs10 / 100.0,
.cpi_inflation = cpi_change,
};
result_count += 1;
}
prev_year = year;
prev_tr_price = tr_price;
prev_gs10 = gs10;
prev_cpi = cpi;
// Skip ahead ~11 months of data
pos = @min(pos + skip_bytes, csv_data.len);
// Realign to next line boundary
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
if (pos < csv_data.len) pos += 1;
}
// Second pass: compute year-over-year returns from consecutive January values.
if (jan_count < 2) return &.{};
// Single pass: scan lines, parse January rows, compute returns on the fly.
while (pos < csv_data.len) {
const line_start = pos;
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
if (pos < csv_data.len) pos += 1;
var results: [200]ShillerYear = undefined;
var result_count: usize = 0;
const line = csv_data[line_start..line_end];
if (line.len < 7) continue;
for (1..jan_count) |i| {
const prev = jan_rows[i - 1];
const curr = jan_rows[i];
// Fast reject: date is "YYYY.01," check fixed offsets
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
// Ensure consecutive years
if (curr.year != prev.year + 1) continue;
const year = parseU16(line[0..4]) orelse continue;
const cpi_change = (curr.cpi / prev.cpi) - 1.0;
// TR Price is a real (inflation-adjusted) total return index.
// Convert to nominal: (1 + real) * (1 + inflation) - 1
const real_sp500 = (curr.tr_price / prev.tr_price) - 1.0;
const nominal_sp500 = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0;
// Parse fields for this January row
var col_iter = CsvFieldIterator{ .data = line };
_ = col_iter.next(); // col 0: Date
_ = col_iter.next(); // col 1: P
_ = col_iter.next(); // col 2: D
_ = col_iter.next(); // col 3: E
const cpi_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 5
const gs10_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 7
_ = col_iter.next(); // col 8
const tr_price_field = col_iter.next() orelse continue;
results[result_count] = .{
.year = prev.year,
.sp500_total_return = nominal_sp500,
.bond_total_return = prev.gs10 / 100.0, // GS10 yield as nominal bond return (FIRECalc convention)
.cpi_inflation = cpi_change,
};
result_count += 1;
const cpi = parseF64(stripSpaces(cpi_field)) orelse continue;
const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue;
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
// Compute return from previous January (if consecutive)
if (prev_year > 0 and year == prev_year + 1) {
const cpi_change = (cpi / prev_cpi) - 1.0;
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
results[result_count] = .{
.year = prev_year,
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
.bond_total_return = prev_gs10 / 100.0,
.cpi_inflation = cpi_change,
};
result_count += 1;
}
prev_year = year;
prev_tr_price = tr_price;
prev_gs10 = gs10;
prev_cpi = cpi;
}
// Copy into a correctly-sized slice
if (result_count == 0) return &.{};
const final = blk: {
var arr: [result_count]ShillerYear = undefined;
for (0..result_count) |i| {
@ -170,22 +212,6 @@ fn parseShillerData() []const ShillerYear {
// --- Comptime parsing helpers ---
const LineIterator = struct {
data: []const u8,
pos: usize = 0,
fn next(self: *LineIterator) ?[]const u8 {
if (self.pos >= self.data.len) return null;
const start = self.pos;
while (self.pos < self.data.len and self.data[self.pos] != '\n') {
self.pos += 1;
}
const end = if (self.pos > start and self.data[self.pos - 1] == '\r') self.pos - 1 else self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip \n
return self.data[start..end];
}
};
const CsvFieldIterator = struct {
data: []const u8,
pos: usize = 0,