first pass optimization of shiller data
This commit is contained in:
parent
4f75c2e006
commit
fe0e10eaca
1 changed files with 113 additions and 87 deletions
|
|
@ -61,58 +61,56 @@ pub fn maxCycles(horizon: u16) usize {
|
|||
const csv_data = @embedFile("ie_data.csv");
|
||||
|
||||
fn parseShillerData() []const ShillerYear {
|
||||
@setEvalBranchQuota(1_000_000);
|
||||
@setEvalBranchQuota(120_000);
|
||||
|
||||
// First pass: collect January rows with their cumulative indices.
|
||||
// We need January Total Return Price (col 9), Bond Returns (col 17), CPI (col 4).
|
||||
const JanRow = struct {
|
||||
year: u16,
|
||||
tr_price: f64, // cumulative S&P 500 total return index (nominal)
|
||||
gs10: f64, // 10-year Treasury yield (percentage points)
|
||||
cpi: f64,
|
||||
};
|
||||
var results: [200]ShillerYear = undefined;
|
||||
var result_count: usize = 0;
|
||||
|
||||
var jan_rows: [200]JanRow = undefined;
|
||||
var jan_count: usize = 0;
|
||||
var prev_year: u16 = 0;
|
||||
var prev_tr_price: f64 = 0;
|
||||
var prev_gs10: f64 = 0;
|
||||
var prev_cpi: f64 = 0;
|
||||
|
||||
var line_iter = LineIterator{ .data = csv_data };
|
||||
|
||||
// Skip header lines (first 8 lines are headers)
|
||||
for (0..8) |_| {
|
||||
_ = line_iter.next();
|
||||
// Skip header (8 lines)
|
||||
var pos: usize = 0;
|
||||
var newlines: usize = 0;
|
||||
while (pos < csv_data.len and newlines < 8) : (pos += 1) {
|
||||
if (csv_data[pos] == '\n') newlines += 1;
|
||||
}
|
||||
|
||||
while (line_iter.next()) |line| {
|
||||
if (line.len == 0) continue;
|
||||
// Lines are ~135 bytes. After each January row, the next January is
|
||||
// ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes,
|
||||
// then scan forward to the next line boundary. This avoids scanning
|
||||
// ~90% of the file byte-by-byte.
|
||||
const skip_bytes = 11 * 96;
|
||||
|
||||
// Parse the date column to check if this is a January row
|
||||
while (pos < csv_data.len) {
|
||||
// Find current line
|
||||
const line_start = pos;
|
||||
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
|
||||
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
|
||||
if (pos < csv_data.len) pos += 1;
|
||||
|
||||
const line = csv_data[line_start..line_end];
|
||||
if (line.len < 7) continue;
|
||||
|
||||
// Fast reject: date is "YYYY.01,"
|
||||
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
|
||||
|
||||
const year = parseU16(line[0..4]) orelse continue;
|
||||
|
||||
// Parse fields for this January row
|
||||
var col_iter = CsvFieldIterator{ .data = line };
|
||||
const date_field = col_iter.next() orelse continue; // col 0: Date
|
||||
if (date_field.len == 0) continue;
|
||||
|
||||
// Date format is "YYYY.MM" where January = ".01", October = ".1"
|
||||
// (October drops the leading zero). Check for exactly ".01".
|
||||
const dot_pos = indexOf(date_field, '.') orelse continue;
|
||||
const year_str = date_field[0..dot_pos];
|
||||
const month_str = date_field[dot_pos + 1 ..];
|
||||
|
||||
const year = parseU16(year_str) orelse continue;
|
||||
|
||||
// January is exactly "01" (2 chars). ".1" = October, ".11" = November.
|
||||
if (month_str.len != 2) continue;
|
||||
const month = parseU16(month_str) orelse continue;
|
||||
if (month != 1) continue;
|
||||
|
||||
// Skip to the columns we need
|
||||
_ = col_iter.next(); // col 0: Date
|
||||
_ = col_iter.next(); // col 1: P
|
||||
_ = col_iter.next(); // col 2: D
|
||||
_ = col_iter.next(); // col 3: E
|
||||
const cpi_field = col_iter.next() orelse continue; // col 4: CPI
|
||||
_ = col_iter.next(); // col 5: Date Fraction
|
||||
const gs10_field = col_iter.next() orelse continue; // col 6: Rate GS10
|
||||
_ = col_iter.next(); // col 7: Real Price
|
||||
_ = col_iter.next(); // col 8: Real Dividend
|
||||
const tr_price_field = col_iter.next() orelse continue; // col 9: Total Return Price (nominal)
|
||||
const cpi_field = col_iter.next() orelse continue;
|
||||
_ = col_iter.next(); // col 5
|
||||
const gs10_field = col_iter.next() orelse continue;
|
||||
_ = col_iter.next(); // col 7
|
||||
_ = col_iter.next(); // col 8
|
||||
const tr_price_field = col_iter.next() orelse continue;
|
||||
|
||||
const cpi = parseF64(stripSpaces(cpi_field)) orelse continue;
|
||||
const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue;
|
||||
|
|
@ -120,44 +118,88 @@ fn parseShillerData() []const ShillerYear {
|
|||
|
||||
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
|
||||
|
||||
jan_rows[jan_count] = .{
|
||||
.year = year,
|
||||
.tr_price = tr_price,
|
||||
.gs10 = gs10,
|
||||
.cpi = cpi,
|
||||
};
|
||||
jan_count += 1;
|
||||
// Compute return from previous January
|
||||
if (prev_year > 0 and year == prev_year + 1) {
|
||||
const cpi_change = (cpi / prev_cpi) - 1.0;
|
||||
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
|
||||
|
||||
results[result_count] = .{
|
||||
.year = prev_year,
|
||||
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
|
||||
.bond_total_return = prev_gs10 / 100.0,
|
||||
.cpi_inflation = cpi_change,
|
||||
};
|
||||
result_count += 1;
|
||||
}
|
||||
|
||||
prev_year = year;
|
||||
prev_tr_price = tr_price;
|
||||
prev_gs10 = gs10;
|
||||
prev_cpi = cpi;
|
||||
|
||||
// Skip ahead ~11 months of data
|
||||
pos = @min(pos + skip_bytes, csv_data.len);
|
||||
// Realign to next line boundary
|
||||
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
|
||||
if (pos < csv_data.len) pos += 1;
|
||||
}
|
||||
|
||||
// Second pass: compute year-over-year returns from consecutive January values.
|
||||
if (jan_count < 2) return &.{};
|
||||
// Single pass: scan lines, parse January rows, compute returns on the fly.
|
||||
while (pos < csv_data.len) {
|
||||
const line_start = pos;
|
||||
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
|
||||
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
|
||||
if (pos < csv_data.len) pos += 1;
|
||||
|
||||
var results: [200]ShillerYear = undefined;
|
||||
var result_count: usize = 0;
|
||||
const line = csv_data[line_start..line_end];
|
||||
if (line.len < 7) continue;
|
||||
|
||||
for (1..jan_count) |i| {
|
||||
const prev = jan_rows[i - 1];
|
||||
const curr = jan_rows[i];
|
||||
// Fast reject: date is "YYYY.01," — check fixed offsets
|
||||
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
|
||||
|
||||
// Ensure consecutive years
|
||||
if (curr.year != prev.year + 1) continue;
|
||||
const year = parseU16(line[0..4]) orelse continue;
|
||||
|
||||
const cpi_change = (curr.cpi / prev.cpi) - 1.0;
|
||||
// TR Price is a real (inflation-adjusted) total return index.
|
||||
// Convert to nominal: (1 + real) * (1 + inflation) - 1
|
||||
const real_sp500 = (curr.tr_price / prev.tr_price) - 1.0;
|
||||
const nominal_sp500 = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0;
|
||||
// Parse fields for this January row
|
||||
var col_iter = CsvFieldIterator{ .data = line };
|
||||
_ = col_iter.next(); // col 0: Date
|
||||
_ = col_iter.next(); // col 1: P
|
||||
_ = col_iter.next(); // col 2: D
|
||||
_ = col_iter.next(); // col 3: E
|
||||
const cpi_field = col_iter.next() orelse continue;
|
||||
_ = col_iter.next(); // col 5
|
||||
const gs10_field = col_iter.next() orelse continue;
|
||||
_ = col_iter.next(); // col 7
|
||||
_ = col_iter.next(); // col 8
|
||||
const tr_price_field = col_iter.next() orelse continue;
|
||||
|
||||
results[result_count] = .{
|
||||
.year = prev.year,
|
||||
.sp500_total_return = nominal_sp500,
|
||||
.bond_total_return = prev.gs10 / 100.0, // GS10 yield as nominal bond return (FIRECalc convention)
|
||||
.cpi_inflation = cpi_change,
|
||||
};
|
||||
result_count += 1;
|
||||
const cpi = parseF64(stripSpaces(cpi_field)) orelse continue;
|
||||
const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue;
|
||||
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
|
||||
|
||||
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
|
||||
|
||||
// Compute return from previous January (if consecutive)
|
||||
if (prev_year > 0 and year == prev_year + 1) {
|
||||
const cpi_change = (cpi / prev_cpi) - 1.0;
|
||||
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
|
||||
|
||||
results[result_count] = .{
|
||||
.year = prev_year,
|
||||
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
|
||||
.bond_total_return = prev_gs10 / 100.0,
|
||||
.cpi_inflation = cpi_change,
|
||||
};
|
||||
result_count += 1;
|
||||
}
|
||||
|
||||
prev_year = year;
|
||||
prev_tr_price = tr_price;
|
||||
prev_gs10 = gs10;
|
||||
prev_cpi = cpi;
|
||||
}
|
||||
|
||||
// Copy into a correctly-sized slice
|
||||
if (result_count == 0) return &.{};
|
||||
|
||||
const final = blk: {
|
||||
var arr: [result_count]ShillerYear = undefined;
|
||||
for (0..result_count) |i| {
|
||||
|
|
@ -170,22 +212,6 @@ fn parseShillerData() []const ShillerYear {
|
|||
|
||||
// --- Comptime parsing helpers ---
|
||||
|
||||
const LineIterator = struct {
|
||||
data: []const u8,
|
||||
pos: usize = 0,
|
||||
|
||||
fn next(self: *LineIterator) ?[]const u8 {
|
||||
if (self.pos >= self.data.len) return null;
|
||||
const start = self.pos;
|
||||
while (self.pos < self.data.len and self.data[self.pos] != '\n') {
|
||||
self.pos += 1;
|
||||
}
|
||||
const end = if (self.pos > start and self.data[self.pos - 1] == '\r') self.pos - 1 else self.pos;
|
||||
if (self.pos < self.data.len) self.pos += 1; // skip \n
|
||||
return self.data[start..end];
|
||||
}
|
||||
};
|
||||
|
||||
const CsvFieldIterator = struct {
|
||||
data: []const u8,
|
||||
pos: usize = 0,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue