add shiller comptime parsed variable for use by rest of app
This commit is contained in:
parent
43bd0e61e7
commit
dda49efe27
1 changed files with 440 additions and 0 deletions
440
src/data/shiller.zig
Normal file
440
src/data/shiller.zig
Normal file
|
|
@ -0,0 +1,440 @@
|
|||
/// Embedded historical market data derived from Robert Shiller's CAPE dataset.
|
||||
///
|
||||
/// Source: https://shillerdata.com/ (ie_data.xls, "Data" tab saved as CSV)
|
||||
///
|
||||
/// To update:
|
||||
/// 1. Download ie_data.xls from https://shillerdata.com/
|
||||
/// 2. Open in LibreOffice Calc, select the "Data" tab
|
||||
/// 3. File → Save As → CSV (ie_data.csv)
|
||||
/// 4. Replace src/data/ie_data.csv with the new file
|
||||
/// 5. Rebuild — the data is parsed at comptime
|
||||
///
|
||||
/// The CSV contains monthly observations from 1871 to present. This module
|
||||
/// extracts January rows and computes year-over-year returns for:
|
||||
/// - S&P 500 total return (price + dividends reinvested)
|
||||
/// - 10-year Treasury bond total return
|
||||
/// - CPI inflation
|
||||
///
|
||||
/// All returns are nominal, expressed as decimals (0.12 = 12%).
|
||||
const std = @import("std");
|
||||
|
||||
pub const ShillerYear = struct {
|
||||
year: u16,
|
||||
/// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%)
|
||||
sp500_total_return: f64,
|
||||
/// 10-year Treasury bond total return (decimal)
|
||||
bond_total_return: f64,
|
||||
/// CPI inflation rate (decimal)
|
||||
cpi_inflation: f64,
|
||||
};
|
||||
|
||||
/// Comptime-parsed annual returns from the embedded Shiller CSV.
|
||||
/// Each entry represents one calendar year's returns, computed from
|
||||
/// January-to-January changes in the cumulative index columns.
|
||||
pub const annual_returns: []const ShillerYear = parseShillerData();
|
||||
|
||||
/// Number of available years of historical data.
|
||||
pub const year_count: usize = annual_returns.len;
|
||||
|
||||
/// First year with complete data.
|
||||
pub const first_year: u16 = if (annual_returns.len > 0) annual_returns[0].year else 0;
|
||||
|
||||
/// Last year with complete data.
|
||||
pub const last_year: u16 = if (annual_returns.len > 0) annual_returns[annual_returns.len - 1].year else 0;
|
||||
|
||||
/// Convert a nominal return to a real (inflation-adjusted) return.
|
||||
pub fn realReturn(nominal: f64, inflation: f64) f64 {
|
||||
return (1.0 + nominal) / (1.0 + inflation) - 1.0;
|
||||
}
|
||||
|
||||
/// Return the maximum number of complete simulation cycles for a given horizon.
|
||||
/// A cycle starting in year Y needs data through year Y + horizon - 1.
|
||||
pub fn maxCycles(horizon: u16) usize {
|
||||
if (annual_returns.len == 0) return 0;
|
||||
const span = last_year - first_year + 1;
|
||||
if (horizon > span) return 0;
|
||||
return span - horizon + 1;
|
||||
}
|
||||
|
||||
// --- Comptime CSV parsing ---
|
||||
|
||||
const csv_data = @embedFile("ie_data.csv");
|
||||
|
||||
fn parseShillerData() []const ShillerYear {
|
||||
@setEvalBranchQuota(1_000_000);
|
||||
|
||||
// First pass: collect January rows with their cumulative indices.
|
||||
// We need January Total Return Price (col 9), Bond Returns (col 17), CPI (col 4).
|
||||
const JanRow = struct {
|
||||
year: u16,
|
||||
tr_price: f64, // cumulative S&P 500 total return index (nominal)
|
||||
gs10: f64, // 10-year Treasury yield (percentage points)
|
||||
cpi: f64,
|
||||
};
|
||||
|
||||
var jan_rows: [200]JanRow = undefined;
|
||||
var jan_count: usize = 0;
|
||||
|
||||
var line_iter = LineIterator{ .data = csv_data };
|
||||
|
||||
// Skip header lines (first 8 lines are headers)
|
||||
for (0..8) |_| {
|
||||
_ = line_iter.next();
|
||||
}
|
||||
|
||||
while (line_iter.next()) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
// Parse the date column to check if this is a January row
|
||||
var col_iter = CsvFieldIterator{ .data = line };
|
||||
const date_field = col_iter.next() orelse continue; // col 0: Date
|
||||
if (date_field.len == 0) continue;
|
||||
|
||||
// Date format is "YYYY.MM" where January = ".01", October = ".1"
|
||||
// (October drops the leading zero). Check for exactly ".01".
|
||||
const dot_pos = indexOf(date_field, '.') orelse continue;
|
||||
const year_str = date_field[0..dot_pos];
|
||||
const month_str = date_field[dot_pos + 1 ..];
|
||||
|
||||
const year = parseU16(year_str) orelse continue;
|
||||
|
||||
// January is exactly "01" (2 chars). ".1" = October, ".11" = November.
|
||||
if (month_str.len != 2) continue;
|
||||
const month = parseU16(month_str) orelse continue;
|
||||
if (month != 1) continue;
|
||||
|
||||
// Skip to the columns we need
|
||||
_ = col_iter.next(); // col 1: P
|
||||
_ = col_iter.next(); // col 2: D
|
||||
_ = col_iter.next(); // col 3: E
|
||||
const cpi_field = col_iter.next() orelse continue; // col 4: CPI
|
||||
_ = col_iter.next(); // col 5: Date Fraction
|
||||
const gs10_field = col_iter.next() orelse continue; // col 6: Rate GS10
|
||||
_ = col_iter.next(); // col 7: Real Price
|
||||
_ = col_iter.next(); // col 8: Real Dividend
|
||||
const tr_price_field = col_iter.next() orelse continue; // col 9: Total Return Price (nominal)
|
||||
|
||||
const cpi = parseF64(stripSpaces(cpi_field)) orelse continue;
|
||||
const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue;
|
||||
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
|
||||
|
||||
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
|
||||
|
||||
jan_rows[jan_count] = .{
|
||||
.year = year,
|
||||
.tr_price = tr_price,
|
||||
.gs10 = gs10,
|
||||
.cpi = cpi,
|
||||
};
|
||||
jan_count += 1;
|
||||
}
|
||||
|
||||
// Second pass: compute year-over-year returns from consecutive January values.
|
||||
if (jan_count < 2) return &.{};
|
||||
|
||||
var results: [200]ShillerYear = undefined;
|
||||
var result_count: usize = 0;
|
||||
|
||||
for (1..jan_count) |i| {
|
||||
const prev = jan_rows[i - 1];
|
||||
const curr = jan_rows[i];
|
||||
|
||||
// Ensure consecutive years
|
||||
if (curr.year != prev.year + 1) continue;
|
||||
|
||||
const cpi_change = (curr.cpi / prev.cpi) - 1.0;
|
||||
// TR Price is a real (inflation-adjusted) total return index.
|
||||
// Convert to nominal: (1 + real) * (1 + inflation) - 1
|
||||
const real_sp500 = (curr.tr_price / prev.tr_price) - 1.0;
|
||||
const nominal_sp500 = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0;
|
||||
|
||||
results[result_count] = .{
|
||||
.year = prev.year,
|
||||
.sp500_total_return = nominal_sp500,
|
||||
.bond_total_return = prev.gs10 / 100.0, // GS10 yield as nominal bond return (FIRECalc convention)
|
||||
.cpi_inflation = cpi_change,
|
||||
};
|
||||
result_count += 1;
|
||||
}
|
||||
|
||||
// Copy into a correctly-sized slice
|
||||
const final = blk: {
|
||||
var arr: [result_count]ShillerYear = undefined;
|
||||
for (0..result_count) |i| {
|
||||
arr[i] = results[i];
|
||||
}
|
||||
break :blk arr;
|
||||
};
|
||||
return &final;
|
||||
}
|
||||
|
||||
// --- Comptime parsing helpers ---
|
||||
|
||||
const LineIterator = struct {
|
||||
data: []const u8,
|
||||
pos: usize = 0,
|
||||
|
||||
fn next(self: *LineIterator) ?[]const u8 {
|
||||
if (self.pos >= self.data.len) return null;
|
||||
const start = self.pos;
|
||||
while (self.pos < self.data.len and self.data[self.pos] != '\n') {
|
||||
self.pos += 1;
|
||||
}
|
||||
const end = if (self.pos > start and self.data[self.pos - 1] == '\r') self.pos - 1 else self.pos;
|
||||
if (self.pos < self.data.len) self.pos += 1; // skip \n
|
||||
return self.data[start..end];
|
||||
}
|
||||
};
|
||||
|
||||
const CsvFieldIterator = struct {
|
||||
data: []const u8,
|
||||
pos: usize = 0,
|
||||
|
||||
fn next(self: *CsvFieldIterator) ?[]const u8 {
|
||||
if (self.pos > self.data.len) return null;
|
||||
if (self.pos == self.data.len) {
|
||||
self.pos = self.data.len + 1;
|
||||
return "";
|
||||
}
|
||||
|
||||
const start = self.pos;
|
||||
|
||||
// Handle quoted fields
|
||||
if (self.pos < self.data.len and self.data[self.pos] == '"') {
|
||||
self.pos += 1; // skip opening quote
|
||||
const qstart = self.pos;
|
||||
while (self.pos < self.data.len) {
|
||||
if (self.data[self.pos] == '"') {
|
||||
if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') {
|
||||
self.pos += 2; // escaped quote
|
||||
} else {
|
||||
break; // end quote
|
||||
}
|
||||
} else {
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
const qend = self.pos;
|
||||
if (self.pos < self.data.len) self.pos += 1; // skip closing quote
|
||||
if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma
|
||||
return self.data[qstart..qend];
|
||||
}
|
||||
|
||||
// Unquoted field
|
||||
while (self.pos < self.data.len and self.data[self.pos] != ',') {
|
||||
self.pos += 1;
|
||||
}
|
||||
const end = self.pos;
|
||||
if (self.pos < self.data.len) self.pos += 1; // skip comma
|
||||
return self.data[start..end];
|
||||
}
|
||||
};
|
||||
|
||||
fn indexOf(s: []const u8, ch: u8) ?usize {
|
||||
for (s, 0..) |c, i| {
|
||||
if (c == ch) return i;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
fn parseU16(s: []const u8) ?u16 {
|
||||
if (s.len == 0) return null;
|
||||
var result: u16 = 0;
|
||||
for (s) |c| {
|
||||
if (c < '0' or c > '9') return null;
|
||||
result = result * 10 + @as(u16, c - '0');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn parseF64(s: []const u8) ?f64 {
|
||||
if (s.len == 0) return null;
|
||||
|
||||
var negative = false;
|
||||
var start: usize = 0;
|
||||
if (s[0] == '-') {
|
||||
negative = true;
|
||||
start = 1;
|
||||
}
|
||||
|
||||
var integer_part: f64 = 0;
|
||||
var i = start;
|
||||
while (i < s.len and s[i] != '.') : (i += 1) {
|
||||
if (s[i] < '0' or s[i] > '9') return null;
|
||||
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
|
||||
}
|
||||
|
||||
var frac_part: f64 = 0;
|
||||
if (i < s.len and s[i] == '.') {
|
||||
i += 1;
|
||||
var divisor: f64 = 10.0;
|
||||
while (i < s.len) : (i += 1) {
|
||||
if (s[i] < '0' or s[i] > '9') return null;
|
||||
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
|
||||
divisor *= 10.0;
|
||||
}
|
||||
}
|
||||
|
||||
const result = integer_part + frac_part;
|
||||
return if (negative) -result else result;
|
||||
}
|
||||
|
||||
fn stripSpaces(s: []const u8) []const u8 {
|
||||
var start: usize = 0;
|
||||
var end: usize = s.len;
|
||||
while (start < end and s[start] == ' ') start += 1;
|
||||
while (end > start and s[end - 1] == ' ') end -= 1;
|
||||
return s[start..end];
|
||||
}
|
||||
|
||||
fn parseF64WithCommas(s: []const u8) ?f64 {
|
||||
if (s.len == 0) return null;
|
||||
|
||||
// Strip leading/trailing spaces
|
||||
var start: usize = 0;
|
||||
var end: usize = s.len;
|
||||
while (start < end and s[start] == ' ') start += 1;
|
||||
while (end > start and s[end - 1] == ' ') end -= 1;
|
||||
if (start >= end) return null;
|
||||
|
||||
var negative = false;
|
||||
if (s[start] == '-') {
|
||||
negative = true;
|
||||
start += 1;
|
||||
}
|
||||
|
||||
var integer_part: f64 = 0;
|
||||
var i = start;
|
||||
while (i < end and s[i] != '.') : (i += 1) {
|
||||
if (s[i] == ',') continue; // skip commas
|
||||
if (s[i] < '0' or s[i] > '9') return null;
|
||||
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
|
||||
}
|
||||
|
||||
var frac_part: f64 = 0;
|
||||
if (i < end and s[i] == '.') {
|
||||
i += 1;
|
||||
var divisor: f64 = 10.0;
|
||||
while (i < end) : (i += 1) {
|
||||
if (s[i] < '0' or s[i] > '9') return null;
|
||||
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
|
||||
divisor *= 10.0;
|
||||
}
|
||||
}
|
||||
|
||||
const result = integer_part + frac_part;
|
||||
return if (negative) -result else result;
|
||||
}
|
||||
|
||||
// --- Tests ---
|
||||
|
||||
test "annual returns are populated" {
|
||||
// Should have data from 1871 to at least 2024
|
||||
try std.testing.expect(annual_returns.len >= 150);
|
||||
try std.testing.expectEqual(@as(u16, 1871), first_year);
|
||||
try std.testing.expect(last_year >= 2024);
|
||||
}
|
||||
|
||||
test "spot check 2008 crash" {
|
||||
// 2008: S&P 500 nominal total return was approximately -37%
|
||||
for (annual_returns) |yr| {
|
||||
if (yr.year == 2008) {
|
||||
try std.testing.expect(yr.sp500_total_return < -0.30);
|
||||
try std.testing.expect(yr.sp500_total_return > -0.50);
|
||||
return;
|
||||
}
|
||||
}
|
||||
return error.YearNotFound;
|
||||
}
|
||||
|
||||
test "spot check 1929 crash" {
|
||||
// 1929: S&P 500 nominal total return should be significantly negative
|
||||
for (annual_returns) |yr| {
|
||||
if (yr.year == 1929) {
|
||||
try std.testing.expect(yr.sp500_total_return < -0.05);
|
||||
return;
|
||||
}
|
||||
}
|
||||
return error.YearNotFound;
|
||||
}
|
||||
|
||||
test "realReturn calculation" {
|
||||
// 10% nominal with 3% inflation = ~6.8% real
|
||||
const real = realReturn(0.10, 0.03);
|
||||
try std.testing.expectApproxEqAbs(0.06796, real, 0.001);
|
||||
}
|
||||
|
||||
test "maxCycles" {
|
||||
// With ~154 years of data, a 30-year horizon should give ~124 cycles
|
||||
const cycles = maxCycles(30);
|
||||
try std.testing.expect(cycles >= 120);
|
||||
try std.testing.expect(cycles <= 130);
|
||||
|
||||
// Horizon longer than data should give 0
|
||||
try std.testing.expectEqual(@as(usize, 0), maxCycles(200));
|
||||
}
|
||||
|
||||
test "spot check known annual total returns" {
|
||||
// Nominal total returns (real from TR Price + CPI).
|
||||
// Jan-to-Jan measurement, so timing differs from calendar-year figures.
|
||||
const checks = [_]struct { year: u16, min: f64, max: f64 }{
|
||||
.{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year
|
||||
.{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery
|
||||
.{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis
|
||||
.{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC
|
||||
.{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery
|
||||
.{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID
|
||||
};
|
||||
|
||||
for (checks) |chk| {
|
||||
var found = false;
|
||||
for (annual_returns) |yr| {
|
||||
if (yr.year == chk.year) {
|
||||
found = true;
|
||||
if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) {
|
||||
std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{
|
||||
chk.year, yr.sp500_total_return, chk.min, chk.max,
|
||||
});
|
||||
return error.TestExpectedEqual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
std.debug.print("Year {d} not found in data\n", .{chk.year});
|
||||
return error.TestExpectedEqual;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "bond returns are reasonable" {
|
||||
// Bond returns (GS10 yield) should be between 0% and 16%
|
||||
for (annual_returns) |yr| {
|
||||
if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) {
|
||||
std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{
|
||||
yr.year, yr.bond_total_return,
|
||||
});
|
||||
return error.TestExpectedEqual;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "CPI inflation is reasonable" {
|
||||
// CPI should generally be between -20% and +25% (19th century had severe deflation)
|
||||
for (annual_returns) |yr| {
|
||||
if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) {
|
||||
std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{
|
||||
yr.year, yr.cpi_inflation,
|
||||
});
|
||||
return error.TestExpectedEqual;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "parseF64WithCommas" {
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01);
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01);
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01);
|
||||
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas(""));
|
||||
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA"));
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue