add shiller comptime parsed variable for use by rest of app

This commit is contained in:
Emil Lerch 2026-04-27 17:01:21 -07:00
parent 43bd0e61e7
commit dda49efe27
Signed by: lobo
GPG key ID: A7B62D657EF764F8

440
src/data/shiller.zig Normal file
View file

@ -0,0 +1,440 @@
/// Embedded historical market data derived from Robert Shiller's CAPE dataset.
///
/// Source: https://shillerdata.com/ (ie_data.xls, "Data" tab saved as CSV)
///
/// To update:
/// 1. Download ie_data.xls from https://shillerdata.com/
/// 2. Open in LibreOffice Calc, select the "Data" tab
/// 3. File Save As CSV (ie_data.csv)
/// 4. Replace src/data/ie_data.csv with the new file
/// 5. Rebuild the data is parsed at comptime
///
/// The CSV contains monthly observations from 1871 to present. This module
/// extracts January rows and computes year-over-year returns for:
/// - S&P 500 total return (price + dividends reinvested)
/// - 10-year Treasury bond total return
/// - CPI inflation
///
/// All returns are nominal, expressed as decimals (0.12 = 12%).
const std = @import("std");
pub const ShillerYear = struct {
year: u16,
/// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%)
sp500_total_return: f64,
/// 10-year Treasury bond total return (decimal)
bond_total_return: f64,
/// CPI inflation rate (decimal)
cpi_inflation: f64,
};
/// Comptime-parsed annual returns from the embedded Shiller CSV.
/// Each entry represents one calendar year's returns, computed from
/// January-to-January changes in the cumulative index columns.
pub const annual_returns: []const ShillerYear = parseShillerData();
/// Number of available years of historical data.
pub const year_count: usize = annual_returns.len;
/// First year with complete data.
pub const first_year: u16 = if (annual_returns.len > 0) annual_returns[0].year else 0;
/// Last year with complete data.
pub const last_year: u16 = if (annual_returns.len > 0) annual_returns[annual_returns.len - 1].year else 0;
/// Convert a nominal return to a real (inflation-adjusted) return.
pub fn realReturn(nominal: f64, inflation: f64) f64 {
return (1.0 + nominal) / (1.0 + inflation) - 1.0;
}
/// Return the maximum number of complete simulation cycles for a given horizon.
/// A cycle starting in year Y needs data through year Y + horizon - 1.
pub fn maxCycles(horizon: u16) usize {
if (annual_returns.len == 0) return 0;
const span = last_year - first_year + 1;
if (horizon > span) return 0;
return span - horizon + 1;
}
// --- Comptime CSV parsing ---
const csv_data = @embedFile("ie_data.csv");
fn parseShillerData() []const ShillerYear {
@setEvalBranchQuota(1_000_000);
// First pass: collect January rows with their cumulative indices.
// We need January Total Return Price (col 9), Bond Returns (col 17), CPI (col 4).
const JanRow = struct {
year: u16,
tr_price: f64, // cumulative S&P 500 total return index (nominal)
gs10: f64, // 10-year Treasury yield (percentage points)
cpi: f64,
};
var jan_rows: [200]JanRow = undefined;
var jan_count: usize = 0;
var line_iter = LineIterator{ .data = csv_data };
// Skip header lines (first 8 lines are headers)
for (0..8) |_| {
_ = line_iter.next();
}
while (line_iter.next()) |line| {
if (line.len == 0) continue;
// Parse the date column to check if this is a January row
var col_iter = CsvFieldIterator{ .data = line };
const date_field = col_iter.next() orelse continue; // col 0: Date
if (date_field.len == 0) continue;
// Date format is "YYYY.MM" where January = ".01", October = ".1"
// (October drops the leading zero). Check for exactly ".01".
const dot_pos = indexOf(date_field, '.') orelse continue;
const year_str = date_field[0..dot_pos];
const month_str = date_field[dot_pos + 1 ..];
const year = parseU16(year_str) orelse continue;
// January is exactly "01" (2 chars). ".1" = October, ".11" = November.
if (month_str.len != 2) continue;
const month = parseU16(month_str) orelse continue;
if (month != 1) continue;
// Skip to the columns we need
_ = col_iter.next(); // col 1: P
_ = col_iter.next(); // col 2: D
_ = col_iter.next(); // col 3: E
const cpi_field = col_iter.next() orelse continue; // col 4: CPI
_ = col_iter.next(); // col 5: Date Fraction
const gs10_field = col_iter.next() orelse continue; // col 6: Rate GS10
_ = col_iter.next(); // col 7: Real Price
_ = col_iter.next(); // col 8: Real Dividend
const tr_price_field = col_iter.next() orelse continue; // col 9: Total Return Price (nominal)
const cpi = parseF64(stripSpaces(cpi_field)) orelse continue;
const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue;
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
jan_rows[jan_count] = .{
.year = year,
.tr_price = tr_price,
.gs10 = gs10,
.cpi = cpi,
};
jan_count += 1;
}
// Second pass: compute year-over-year returns from consecutive January values.
if (jan_count < 2) return &.{};
var results: [200]ShillerYear = undefined;
var result_count: usize = 0;
for (1..jan_count) |i| {
const prev = jan_rows[i - 1];
const curr = jan_rows[i];
// Ensure consecutive years
if (curr.year != prev.year + 1) continue;
const cpi_change = (curr.cpi / prev.cpi) - 1.0;
// TR Price is a real (inflation-adjusted) total return index.
// Convert to nominal: (1 + real) * (1 + inflation) - 1
const real_sp500 = (curr.tr_price / prev.tr_price) - 1.0;
const nominal_sp500 = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0;
results[result_count] = .{
.year = prev.year,
.sp500_total_return = nominal_sp500,
.bond_total_return = prev.gs10 / 100.0, // GS10 yield as nominal bond return (FIRECalc convention)
.cpi_inflation = cpi_change,
};
result_count += 1;
}
// Copy into a correctly-sized slice
const final = blk: {
var arr: [result_count]ShillerYear = undefined;
for (0..result_count) |i| {
arr[i] = results[i];
}
break :blk arr;
};
return &final;
}
// --- Comptime parsing helpers ---
const LineIterator = struct {
data: []const u8,
pos: usize = 0,
fn next(self: *LineIterator) ?[]const u8 {
if (self.pos >= self.data.len) return null;
const start = self.pos;
while (self.pos < self.data.len and self.data[self.pos] != '\n') {
self.pos += 1;
}
const end = if (self.pos > start and self.data[self.pos - 1] == '\r') self.pos - 1 else self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip \n
return self.data[start..end];
}
};
const CsvFieldIterator = struct {
data: []const u8,
pos: usize = 0,
fn next(self: *CsvFieldIterator) ?[]const u8 {
if (self.pos > self.data.len) return null;
if (self.pos == self.data.len) {
self.pos = self.data.len + 1;
return "";
}
const start = self.pos;
// Handle quoted fields
if (self.pos < self.data.len and self.data[self.pos] == '"') {
self.pos += 1; // skip opening quote
const qstart = self.pos;
while (self.pos < self.data.len) {
if (self.data[self.pos] == '"') {
if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') {
self.pos += 2; // escaped quote
} else {
break; // end quote
}
} else {
self.pos += 1;
}
}
const qend = self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip closing quote
if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma
return self.data[qstart..qend];
}
// Unquoted field
while (self.pos < self.data.len and self.data[self.pos] != ',') {
self.pos += 1;
}
const end = self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip comma
return self.data[start..end];
}
};
fn indexOf(s: []const u8, ch: u8) ?usize {
for (s, 0..) |c, i| {
if (c == ch) return i;
}
return null;
}
fn parseU16(s: []const u8) ?u16 {
if (s.len == 0) return null;
var result: u16 = 0;
for (s) |c| {
if (c < '0' or c > '9') return null;
result = result * 10 + @as(u16, c - '0');
}
return result;
}
fn parseF64(s: []const u8) ?f64 {
if (s.len == 0) return null;
var negative = false;
var start: usize = 0;
if (s[0] == '-') {
negative = true;
start = 1;
}
var integer_part: f64 = 0;
var i = start;
while (i < s.len and s[i] != '.') : (i += 1) {
if (s[i] < '0' or s[i] > '9') return null;
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
}
var frac_part: f64 = 0;
if (i < s.len and s[i] == '.') {
i += 1;
var divisor: f64 = 10.0;
while (i < s.len) : (i += 1) {
if (s[i] < '0' or s[i] > '9') return null;
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
divisor *= 10.0;
}
}
const result = integer_part + frac_part;
return if (negative) -result else result;
}
fn stripSpaces(s: []const u8) []const u8 {
var start: usize = 0;
var end: usize = s.len;
while (start < end and s[start] == ' ') start += 1;
while (end > start and s[end - 1] == ' ') end -= 1;
return s[start..end];
}
fn parseF64WithCommas(s: []const u8) ?f64 {
if (s.len == 0) return null;
// Strip leading/trailing spaces
var start: usize = 0;
var end: usize = s.len;
while (start < end and s[start] == ' ') start += 1;
while (end > start and s[end - 1] == ' ') end -= 1;
if (start >= end) return null;
var negative = false;
if (s[start] == '-') {
negative = true;
start += 1;
}
var integer_part: f64 = 0;
var i = start;
while (i < end and s[i] != '.') : (i += 1) {
if (s[i] == ',') continue; // skip commas
if (s[i] < '0' or s[i] > '9') return null;
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
}
var frac_part: f64 = 0;
if (i < end and s[i] == '.') {
i += 1;
var divisor: f64 = 10.0;
while (i < end) : (i += 1) {
if (s[i] < '0' or s[i] > '9') return null;
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
divisor *= 10.0;
}
}
const result = integer_part + frac_part;
return if (negative) -result else result;
}
// --- Tests ---
test "annual returns are populated" {
// Should have data from 1871 to at least 2024
try std.testing.expect(annual_returns.len >= 150);
try std.testing.expectEqual(@as(u16, 1871), first_year);
try std.testing.expect(last_year >= 2024);
}
test "spot check 2008 crash" {
// 2008: S&P 500 nominal total return was approximately -37%
for (annual_returns) |yr| {
if (yr.year == 2008) {
try std.testing.expect(yr.sp500_total_return < -0.30);
try std.testing.expect(yr.sp500_total_return > -0.50);
return;
}
}
return error.YearNotFound;
}
test "spot check 1929 crash" {
// 1929: S&P 500 nominal total return should be significantly negative
for (annual_returns) |yr| {
if (yr.year == 1929) {
try std.testing.expect(yr.sp500_total_return < -0.05);
return;
}
}
return error.YearNotFound;
}
test "realReturn calculation" {
// 10% nominal with 3% inflation = ~6.8% real
const real = realReturn(0.10, 0.03);
try std.testing.expectApproxEqAbs(0.06796, real, 0.001);
}
test "maxCycles" {
// With ~154 years of data, a 30-year horizon should give ~124 cycles
const cycles = maxCycles(30);
try std.testing.expect(cycles >= 120);
try std.testing.expect(cycles <= 130);
// Horizon longer than data should give 0
try std.testing.expectEqual(@as(usize, 0), maxCycles(200));
}
test "spot check known annual total returns" {
// Nominal total returns (real from TR Price + CPI).
// Jan-to-Jan measurement, so timing differs from calendar-year figures.
const checks = [_]struct { year: u16, min: f64, max: f64 }{
.{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year
.{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery
.{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis
.{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC
.{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery
.{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID
};
for (checks) |chk| {
var found = false;
for (annual_returns) |yr| {
if (yr.year == chk.year) {
found = true;
if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) {
std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{
chk.year, yr.sp500_total_return, chk.min, chk.max,
});
return error.TestExpectedEqual;
}
break;
}
}
if (!found) {
std.debug.print("Year {d} not found in data\n", .{chk.year});
return error.TestExpectedEqual;
}
}
}
test "bond returns are reasonable" {
// Bond returns (GS10 yield) should be between 0% and 16%
for (annual_returns) |yr| {
if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) {
std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{
yr.year, yr.bond_total_return,
});
return error.TestExpectedEqual;
}
}
}
test "CPI inflation is reasonable" {
// CPI should generally be between -20% and +25% (19th century had severe deflation)
for (annual_returns) |yr| {
if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) {
std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{
yr.year, yr.cpi_inflation,
});
return error.TestExpectedEqual;
}
}
}
test "parseF64WithCommas" {
try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01);
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas(""));
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA"));
}