diff --git a/src/data/shiller.zig b/src/data/shiller.zig index 7c878a6..d35625c 100644 --- a/src/data/shiller.zig +++ b/src/data/shiller.zig @@ -61,58 +61,56 @@ pub fn maxCycles(horizon: u16) usize { const csv_data = @embedFile("ie_data.csv"); fn parseShillerData() []const ShillerYear { - @setEvalBranchQuota(1_000_000); + @setEvalBranchQuota(120_000); - // First pass: collect January rows with their cumulative indices. - // We need January Total Return Price (col 9), Bond Returns (col 17), CPI (col 4). - const JanRow = struct { - year: u16, - tr_price: f64, // cumulative S&P 500 total return index (nominal) - gs10: f64, // 10-year Treasury yield (percentage points) - cpi: f64, - }; + var results: [200]ShillerYear = undefined; + var result_count: usize = 0; - var jan_rows: [200]JanRow = undefined; - var jan_count: usize = 0; + var prev_year: u16 = 0; + var prev_tr_price: f64 = 0; + var prev_gs10: f64 = 0; + var prev_cpi: f64 = 0; - var line_iter = LineIterator{ .data = csv_data }; - - // Skip header lines (first 8 lines are headers) - for (0..8) |_| { - _ = line_iter.next(); + // Skip header (8 lines) + var pos: usize = 0; + var newlines: usize = 0; + while (pos < csv_data.len and newlines < 8) : (pos += 1) { + if (csv_data[pos] == '\n') newlines += 1; } - while (line_iter.next()) |line| { - if (line.len == 0) continue; + // Lines are ~135 bytes. After each January row, the next January is + // ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes, + // then scan forward to the next line boundary. This avoids scanning + // ~90% of the file byte-by-byte. + const skip_bytes = 11 * 96; - // Parse the date column to check if this is a January row + while (pos < csv_data.len) { + // Find current line + const line_start = pos; + while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; + const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos; + if (pos < csv_data.len) pos += 1; + + const line = csv_data[line_start..line_end]; + if (line.len < 7) continue; + + // Fast reject: date is "YYYY.01," + if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue; + + const year = parseU16(line[0..4]) orelse continue; + + // Parse fields for this January row var col_iter = CsvFieldIterator{ .data = line }; - const date_field = col_iter.next() orelse continue; // col 0: Date - if (date_field.len == 0) continue; - - // Date format is "YYYY.MM" where January = ".01", October = ".1" - // (October drops the leading zero). Check for exactly ".01". - const dot_pos = indexOf(date_field, '.') orelse continue; - const year_str = date_field[0..dot_pos]; - const month_str = date_field[dot_pos + 1 ..]; - - const year = parseU16(year_str) orelse continue; - - // January is exactly "01" (2 chars). ".1" = October, ".11" = November. - if (month_str.len != 2) continue; - const month = parseU16(month_str) orelse continue; - if (month != 1) continue; - - // Skip to the columns we need + _ = col_iter.next(); // col 0: Date _ = col_iter.next(); // col 1: P _ = col_iter.next(); // col 2: D _ = col_iter.next(); // col 3: E - const cpi_field = col_iter.next() orelse continue; // col 4: CPI - _ = col_iter.next(); // col 5: Date Fraction - const gs10_field = col_iter.next() orelse continue; // col 6: Rate GS10 - _ = col_iter.next(); // col 7: Real Price - _ = col_iter.next(); // col 8: Real Dividend - const tr_price_field = col_iter.next() orelse continue; // col 9: Total Return Price (nominal) + const cpi_field = col_iter.next() orelse continue; + _ = col_iter.next(); // col 5 + const gs10_field = col_iter.next() orelse continue; + _ = col_iter.next(); // col 7 + _ = col_iter.next(); // col 8 + const tr_price_field = col_iter.next() orelse continue; const cpi = parseF64(stripSpaces(cpi_field)) orelse continue; const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue; @@ -120,44 +118,88 @@ fn parseShillerData() []const ShillerYear { if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; - jan_rows[jan_count] = .{ - .year = year, - .tr_price = tr_price, - .gs10 = gs10, - .cpi = cpi, - }; - jan_count += 1; + // Compute return from previous January + if (prev_year > 0 and year == prev_year + 1) { + const cpi_change = (cpi / prev_cpi) - 1.0; + const real_sp500 = (tr_price / prev_tr_price) - 1.0; + + results[result_count] = .{ + .year = prev_year, + .sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0, + .bond_total_return = prev_gs10 / 100.0, + .cpi_inflation = cpi_change, + }; + result_count += 1; + } + + prev_year = year; + prev_tr_price = tr_price; + prev_gs10 = gs10; + prev_cpi = cpi; + + // Skip ahead ~11 months of data + pos = @min(pos + skip_bytes, csv_data.len); + // Realign to next line boundary + while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; + if (pos < csv_data.len) pos += 1; } - // Second pass: compute year-over-year returns from consecutive January values. - if (jan_count < 2) return &.{}; + // Single pass: scan lines, parse January rows, compute returns on the fly. + while (pos < csv_data.len) { + const line_start = pos; + while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; + const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos; + if (pos < csv_data.len) pos += 1; - var results: [200]ShillerYear = undefined; - var result_count: usize = 0; + const line = csv_data[line_start..line_end]; + if (line.len < 7) continue; - for (1..jan_count) |i| { - const prev = jan_rows[i - 1]; - const curr = jan_rows[i]; + // Fast reject: date is "YYYY.01," — check fixed offsets + if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue; - // Ensure consecutive years - if (curr.year != prev.year + 1) continue; + const year = parseU16(line[0..4]) orelse continue; - const cpi_change = (curr.cpi / prev.cpi) - 1.0; - // TR Price is a real (inflation-adjusted) total return index. - // Convert to nominal: (1 + real) * (1 + inflation) - 1 - const real_sp500 = (curr.tr_price / prev.tr_price) - 1.0; - const nominal_sp500 = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0; + // Parse fields for this January row + var col_iter = CsvFieldIterator{ .data = line }; + _ = col_iter.next(); // col 0: Date + _ = col_iter.next(); // col 1: P + _ = col_iter.next(); // col 2: D + _ = col_iter.next(); // col 3: E + const cpi_field = col_iter.next() orelse continue; + _ = col_iter.next(); // col 5 + const gs10_field = col_iter.next() orelse continue; + _ = col_iter.next(); // col 7 + _ = col_iter.next(); // col 8 + const tr_price_field = col_iter.next() orelse continue; - results[result_count] = .{ - .year = prev.year, - .sp500_total_return = nominal_sp500, - .bond_total_return = prev.gs10 / 100.0, // GS10 yield as nominal bond return (FIRECalc convention) - .cpi_inflation = cpi_change, - }; - result_count += 1; + const cpi = parseF64(stripSpaces(cpi_field)) orelse continue; + const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue; + const tr_price = parseF64WithCommas(tr_price_field) orelse continue; + + if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; + + // Compute return from previous January (if consecutive) + if (prev_year > 0 and year == prev_year + 1) { + const cpi_change = (cpi / prev_cpi) - 1.0; + const real_sp500 = (tr_price / prev_tr_price) - 1.0; + + results[result_count] = .{ + .year = prev_year, + .sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0, + .bond_total_return = prev_gs10 / 100.0, + .cpi_inflation = cpi_change, + }; + result_count += 1; + } + + prev_year = year; + prev_tr_price = tr_price; + prev_gs10 = gs10; + prev_cpi = cpi; } - // Copy into a correctly-sized slice + if (result_count == 0) return &.{}; + const final = blk: { var arr: [result_count]ShillerYear = undefined; for (0..result_count) |i| { @@ -170,22 +212,6 @@ fn parseShillerData() []const ShillerYear { // --- Comptime parsing helpers --- -const LineIterator = struct { - data: []const u8, - pos: usize = 0, - - fn next(self: *LineIterator) ?[]const u8 { - if (self.pos >= self.data.len) return null; - const start = self.pos; - while (self.pos < self.data.len and self.data[self.pos] != '\n') { - self.pos += 1; - } - const end = if (self.pos > start and self.data[self.pos - 1] == '\r') self.pos - 1 else self.pos; - if (self.pos < self.data.len) self.pos += 1; // skip \n - return self.data[start..end]; - } -}; - const CsvFieldIterator = struct { data: []const u8, pos: usize = 0,