355 lines
12 KiB
Zig
355 lines
12 KiB
Zig
/// Embedded historical market data derived from Robert Shiller's CAPE dataset.
|
||
///
|
||
/// Source: https://shillerdata.com/ (ie_data.xls, "Data" tab saved as CSV)
|
||
///
|
||
/// To update:
|
||
/// 1. Download ie_data.xls from https://shillerdata.com/
|
||
/// 2. Open in LibreOffice Calc, select the "Data" tab
|
||
/// 3. File → Save As → CSV (ie_data.csv)
|
||
/// 4. Replace src/data/ie_data.csv with the new file
|
||
/// 5. Rebuild — the data is parsed at comptime
|
||
///
|
||
/// The CSV contains monthly observations from 1871 to present. This module
|
||
/// extracts January rows and computes year-over-year returns for:
|
||
/// - S&P 500 total return (price + dividends reinvested)
|
||
/// - 10-year Treasury bond total return
|
||
/// - CPI inflation
|
||
///
|
||
/// All returns are nominal, expressed as decimals (0.12 = 12%).
|
||
const std = @import("std");
|
||
|
||
pub const ShillerYear = struct {
|
||
year: u16,
|
||
/// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%)
|
||
sp500_total_return: f64,
|
||
/// 10-year Treasury bond total return (decimal)
|
||
bond_total_return: f64,
|
||
/// CPI inflation rate (decimal)
|
||
cpi_inflation: f64,
|
||
};
|
||
|
||
/// Comptime-parsed annual returns from the embedded Shiller CSV.
|
||
/// Each entry represents one calendar year's returns, computed from
|
||
/// January-to-January changes in the cumulative index columns.
|
||
pub const annual_returns: []const ShillerYear = parseShillerData();
|
||
|
||
/// Number of available years of historical data.
|
||
pub const year_count: usize = annual_returns.len;
|
||
|
||
/// First year with complete data.
|
||
pub const first_year: u16 = if (annual_returns.len > 0) annual_returns[0].year else 0;
|
||
|
||
/// Last year with complete data.
|
||
pub const last_year: u16 = if (annual_returns.len > 0) annual_returns[annual_returns.len - 1].year else 0;
|
||
|
||
/// Convert a nominal return to a real (inflation-adjusted) return.
|
||
pub fn realReturn(nominal: f64, inflation: f64) f64 {
|
||
return (1.0 + nominal) / (1.0 + inflation) - 1.0;
|
||
}
|
||
|
||
/// Return the maximum number of complete simulation cycles for a given horizon.
|
||
/// A cycle starting in year Y needs data through year Y + horizon - 1.
|
||
pub fn maxCycles(horizon: u16) usize {
|
||
if (annual_returns.len == 0) return 0;
|
||
const span = last_year - first_year + 1;
|
||
if (horizon > span) return 0;
|
||
return span - horizon + 1;
|
||
}
|
||
|
||
// --- Comptime CSV parsing ---
|
||
|
||
const csv_data = @embedFile("ie_data.csv");
|
||
|
||
fn parseShillerData() []const ShillerYear {
|
||
@setEvalBranchQuota(120_000);
|
||
|
||
var results: [200]ShillerYear = undefined;
|
||
var result_count: usize = 0;
|
||
|
||
var prev_year: u16 = 0;
|
||
var prev_tr_price: f64 = 0;
|
||
var prev_gs10: f64 = 0;
|
||
var prev_cpi: f64 = 0;
|
||
|
||
// Skip header (8 lines)
|
||
var pos: usize = 0;
|
||
var newlines: usize = 0;
|
||
while (pos < csv_data.len and newlines < 8) : (pos += 1) {
|
||
if (csv_data[pos] == '\n') newlines += 1;
|
||
}
|
||
|
||
// Lines are ~135 bytes. After each January row, the next January is
|
||
// ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes,
|
||
// then scan forward to the next line boundary. This avoids scanning
|
||
// ~90% of the file byte-by-byte.
|
||
const skip_bytes = 11 * 96;
|
||
|
||
while (pos < csv_data.len) {
|
||
// Find current line
|
||
const line_start = pos;
|
||
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
|
||
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
|
||
if (pos < csv_data.len) pos += 1;
|
||
|
||
const line = csv_data[line_start..line_end];
|
||
if (line.len < 7) continue;
|
||
|
||
// Fast reject: date is "YYYY.01,"
|
||
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
|
||
|
||
const year = std.fmt.parseInt(u16, line[0..4], 10) catch continue;
|
||
|
||
// Parse fields for this January row
|
||
var col_iter = CsvFieldIterator{ .data = line };
|
||
_ = col_iter.next(); // col 0: Date
|
||
_ = col_iter.next(); // col 1: P
|
||
_ = col_iter.next(); // col 2: D
|
||
_ = col_iter.next(); // col 3: E
|
||
const cpi_field = col_iter.next() orelse continue;
|
||
_ = col_iter.next(); // col 5
|
||
const gs10_field = col_iter.next() orelse continue;
|
||
_ = col_iter.next(); // col 7
|
||
_ = col_iter.next(); // col 8
|
||
const tr_price_field = col_iter.next() orelse continue;
|
||
|
||
const cpi = parseF64WithCommas(cpi_field) orelse continue;
|
||
const gs10 = parseF64WithCommas(gs10_field) orelse continue;
|
||
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
|
||
|
||
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
|
||
|
||
// Compute return from previous January
|
||
if (prev_year > 0 and year == prev_year + 1) {
|
||
const cpi_change = (cpi / prev_cpi) - 1.0;
|
||
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
|
||
|
||
results[result_count] = .{
|
||
.year = prev_year,
|
||
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
|
||
.bond_total_return = prev_gs10 / 100.0,
|
||
.cpi_inflation = cpi_change,
|
||
};
|
||
result_count += 1;
|
||
}
|
||
|
||
prev_year = year;
|
||
prev_tr_price = tr_price;
|
||
prev_gs10 = gs10;
|
||
prev_cpi = cpi;
|
||
|
||
// Skip ahead ~11 months of data
|
||
pos = @min(pos + skip_bytes, csv_data.len);
|
||
// Realign to next line boundary
|
||
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
|
||
if (pos < csv_data.len) pos += 1;
|
||
}
|
||
|
||
if (result_count == 0) return &.{};
|
||
|
||
const final = blk: {
|
||
var arr: [result_count]ShillerYear = undefined;
|
||
for (0..result_count) |i| {
|
||
arr[i] = results[i];
|
||
}
|
||
break :blk arr;
|
||
};
|
||
return &final;
|
||
}
|
||
|
||
// --- Comptime parsing helpers ---
|
||
|
||
const CsvFieldIterator = struct {
|
||
data: []const u8,
|
||
pos: usize = 0,
|
||
|
||
fn next(self: *CsvFieldIterator) ?[]const u8 {
|
||
if (self.pos > self.data.len) return null;
|
||
if (self.pos == self.data.len) {
|
||
self.pos = self.data.len + 1;
|
||
return "";
|
||
}
|
||
|
||
const start = self.pos;
|
||
|
||
// Handle quoted fields
|
||
if (self.pos < self.data.len and self.data[self.pos] == '"') {
|
||
self.pos += 1; // skip opening quote
|
||
const qstart = self.pos;
|
||
while (self.pos < self.data.len) {
|
||
if (self.data[self.pos] == '"') {
|
||
if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') {
|
||
self.pos += 2; // escaped quote
|
||
} else {
|
||
break; // end quote
|
||
}
|
||
} else {
|
||
self.pos += 1;
|
||
}
|
||
}
|
||
const qend = self.pos;
|
||
if (self.pos < self.data.len) self.pos += 1; // skip closing quote
|
||
if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma
|
||
return self.data[qstart..qend];
|
||
}
|
||
|
||
// Unquoted field
|
||
while (self.pos < self.data.len and self.data[self.pos] != ',') {
|
||
self.pos += 1;
|
||
}
|
||
const end = self.pos;
|
||
if (self.pos < self.data.len) self.pos += 1; // skip comma
|
||
return self.data[start..end];
|
||
}
|
||
};
|
||
|
||
fn parseF64WithCommas(s: []const u8) ?f64 {
|
||
if (s.len == 0) return null;
|
||
|
||
// Strip leading/trailing spaces
|
||
var start: usize = 0;
|
||
var end: usize = s.len;
|
||
while (start < end and s[start] == ' ') start += 1;
|
||
while (end > start and s[end - 1] == ' ') end -= 1;
|
||
if (start >= end) return null;
|
||
|
||
var negative = false;
|
||
if (s[start] == '-') {
|
||
negative = true;
|
||
start += 1;
|
||
}
|
||
|
||
var integer_part: f64 = 0;
|
||
var i = start;
|
||
while (i < end and s[i] != '.') : (i += 1) {
|
||
if (s[i] == ',') continue; // skip commas
|
||
if (s[i] < '0' or s[i] > '9') return null;
|
||
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
|
||
}
|
||
|
||
var frac_part: f64 = 0;
|
||
if (i < end and s[i] == '.') {
|
||
i += 1;
|
||
var divisor: f64 = 10.0;
|
||
while (i < end) : (i += 1) {
|
||
if (s[i] < '0' or s[i] > '9') return null;
|
||
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
|
||
divisor *= 10.0;
|
||
}
|
||
}
|
||
|
||
const result = integer_part + frac_part;
|
||
return if (negative) -result else result;
|
||
}
|
||
|
||
// --- Tests ---
|
||
|
||
test "annual returns are populated" {
|
||
// Should have data from 1871 to at least 2024
|
||
try std.testing.expect(annual_returns.len >= 150);
|
||
try std.testing.expectEqual(@as(u16, 1871), first_year);
|
||
try std.testing.expect(last_year >= 2024);
|
||
}
|
||
|
||
test "spot check 2008 crash" {
|
||
// 2008: S&P 500 nominal total return was approximately -37%
|
||
for (annual_returns) |yr| {
|
||
if (yr.year == 2008) {
|
||
try std.testing.expect(yr.sp500_total_return < -0.30);
|
||
try std.testing.expect(yr.sp500_total_return > -0.50);
|
||
return;
|
||
}
|
||
}
|
||
return error.YearNotFound;
|
||
}
|
||
|
||
test "spot check 1929 crash" {
|
||
// 1929: S&P 500 nominal total return should be significantly negative
|
||
for (annual_returns) |yr| {
|
||
if (yr.year == 1929) {
|
||
try std.testing.expect(yr.sp500_total_return < -0.05);
|
||
return;
|
||
}
|
||
}
|
||
return error.YearNotFound;
|
||
}
|
||
|
||
test "realReturn calculation" {
|
||
// 10% nominal with 3% inflation = ~6.8% real
|
||
const real = realReturn(0.10, 0.03);
|
||
try std.testing.expectApproxEqAbs(0.06796, real, 0.001);
|
||
}
|
||
|
||
test "maxCycles" {
|
||
// With ~154 years of data, a 30-year horizon should give ~124 cycles
|
||
const cycles = maxCycles(30);
|
||
try std.testing.expect(cycles >= 120);
|
||
try std.testing.expect(cycles <= 130);
|
||
|
||
// Horizon longer than data should give 0
|
||
try std.testing.expectEqual(@as(usize, 0), maxCycles(200));
|
||
}
|
||
|
||
test "spot check known annual total returns" {
|
||
// Nominal total returns (real from TR Price + CPI).
|
||
// Jan-to-Jan measurement, so timing differs from calendar-year figures.
|
||
const checks = [_]struct { year: u16, min: f64, max: f64 }{
|
||
.{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year
|
||
.{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery
|
||
.{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis
|
||
.{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC
|
||
.{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery
|
||
.{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID
|
||
};
|
||
|
||
for (checks) |chk| {
|
||
var found = false;
|
||
for (annual_returns) |yr| {
|
||
if (yr.year == chk.year) {
|
||
found = true;
|
||
if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) {
|
||
std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{
|
||
chk.year, yr.sp500_total_return, chk.min, chk.max,
|
||
});
|
||
return error.TestExpectedEqual;
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
if (!found) {
|
||
std.debug.print("Year {d} not found in data\n", .{chk.year});
|
||
return error.TestExpectedEqual;
|
||
}
|
||
}
|
||
}
|
||
|
||
test "bond returns are reasonable" {
|
||
// Bond returns (GS10 yield) should be between 0% and 16%
|
||
for (annual_returns) |yr| {
|
||
if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) {
|
||
std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{
|
||
yr.year, yr.bond_total_return,
|
||
});
|
||
return error.TestExpectedEqual;
|
||
}
|
||
}
|
||
}
|
||
|
||
test "CPI inflation is reasonable" {
|
||
// CPI should generally be between -20% and +25% (19th century had severe deflation)
|
||
for (annual_returns) |yr| {
|
||
if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) {
|
||
std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{
|
||
yr.year, yr.cpi_inflation,
|
||
});
|
||
return error.TestExpectedEqual;
|
||
}
|
||
}
|
||
}
|
||
|
||
test "parseF64WithCommas" {
|
||
try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01);
|
||
try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01);
|
||
try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01);
|
||
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas(""));
|
||
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA"));
|
||
}
|