zfin/src/data/shiller.zig

355 lines
12 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/// Embedded historical market data derived from Robert Shiller's CAPE dataset.
///
/// Source: https://shillerdata.com/ (ie_data.xls, "Data" tab saved as CSV)
///
/// To update:
/// 1. Download ie_data.xls from https://shillerdata.com/
/// 2. Open in LibreOffice Calc, select the "Data" tab
/// 3. File → Save As → CSV (ie_data.csv)
/// 4. Replace src/data/ie_data.csv with the new file
/// 5. Rebuild — the data is parsed at comptime
///
/// The CSV contains monthly observations from 1871 to present. This module
/// extracts January rows and computes year-over-year returns for:
/// - S&P 500 total return (price + dividends reinvested)
/// - 10-year Treasury bond total return
/// - CPI inflation
///
/// All returns are nominal, expressed as decimals (0.12 = 12%).
const std = @import("std");
pub const ShillerYear = struct {
year: u16,
/// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%)
sp500_total_return: f64,
/// 10-year Treasury bond total return (decimal)
bond_total_return: f64,
/// CPI inflation rate (decimal)
cpi_inflation: f64,
};
/// Comptime-parsed annual returns from the embedded Shiller CSV.
/// Each entry represents one calendar year's returns, computed from
/// January-to-January changes in the cumulative index columns.
pub const annual_returns: []const ShillerYear = parseShillerData();
/// Number of available years of historical data.
pub const year_count: usize = annual_returns.len;
/// First year with complete data.
pub const first_year: u16 = if (annual_returns.len > 0) annual_returns[0].year else 0;
/// Last year with complete data.
pub const last_year: u16 = if (annual_returns.len > 0) annual_returns[annual_returns.len - 1].year else 0;
/// Convert a nominal return to a real (inflation-adjusted) return.
pub fn realReturn(nominal: f64, inflation: f64) f64 {
return (1.0 + nominal) / (1.0 + inflation) - 1.0;
}
/// Return the maximum number of complete simulation cycles for a given horizon.
/// A cycle starting in year Y needs data through year Y + horizon - 1.
pub fn maxCycles(horizon: u16) usize {
if (annual_returns.len == 0) return 0;
const span = last_year - first_year + 1;
if (horizon > span) return 0;
return span - horizon + 1;
}
// --- Comptime CSV parsing ---
const csv_data = @embedFile("ie_data.csv");
fn parseShillerData() []const ShillerYear {
@setEvalBranchQuota(120_000);
var results: [200]ShillerYear = undefined;
var result_count: usize = 0;
var prev_year: u16 = 0;
var prev_tr_price: f64 = 0;
var prev_gs10: f64 = 0;
var prev_cpi: f64 = 0;
// Skip header (8 lines)
var pos: usize = 0;
var newlines: usize = 0;
while (pos < csv_data.len and newlines < 8) : (pos += 1) {
if (csv_data[pos] == '\n') newlines += 1;
}
// Lines are ~135 bytes. After each January row, the next January is
// ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes,
// then scan forward to the next line boundary. This avoids scanning
// ~90% of the file byte-by-byte.
const skip_bytes = 11 * 96;
while (pos < csv_data.len) {
// Find current line
const line_start = pos;
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos;
if (pos < csv_data.len) pos += 1;
const line = csv_data[line_start..line_end];
if (line.len < 7) continue;
// Fast reject: date is "YYYY.01,"
if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue;
const year = std.fmt.parseInt(u16, line[0..4], 10) catch continue;
// Parse fields for this January row
var col_iter = CsvFieldIterator{ .data = line };
_ = col_iter.next(); // col 0: Date
_ = col_iter.next(); // col 1: P
_ = col_iter.next(); // col 2: D
_ = col_iter.next(); // col 3: E
const cpi_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 5
const gs10_field = col_iter.next() orelse continue;
_ = col_iter.next(); // col 7
_ = col_iter.next(); // col 8
const tr_price_field = col_iter.next() orelse continue;
const cpi = parseF64WithCommas(cpi_field) orelse continue;
const gs10 = parseF64WithCommas(gs10_field) orelse continue;
const tr_price = parseF64WithCommas(tr_price_field) orelse continue;
if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue;
// Compute return from previous January
if (prev_year > 0 and year == prev_year + 1) {
const cpi_change = (cpi / prev_cpi) - 1.0;
const real_sp500 = (tr_price / prev_tr_price) - 1.0;
results[result_count] = .{
.year = prev_year,
.sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0,
.bond_total_return = prev_gs10 / 100.0,
.cpi_inflation = cpi_change,
};
result_count += 1;
}
prev_year = year;
prev_tr_price = tr_price;
prev_gs10 = gs10;
prev_cpi = cpi;
// Skip ahead ~11 months of data
pos = @min(pos + skip_bytes, csv_data.len);
// Realign to next line boundary
while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1;
if (pos < csv_data.len) pos += 1;
}
if (result_count == 0) return &.{};
const final = blk: {
var arr: [result_count]ShillerYear = undefined;
for (0..result_count) |i| {
arr[i] = results[i];
}
break :blk arr;
};
return &final;
}
// --- Comptime parsing helpers ---
const CsvFieldIterator = struct {
data: []const u8,
pos: usize = 0,
fn next(self: *CsvFieldIterator) ?[]const u8 {
if (self.pos > self.data.len) return null;
if (self.pos == self.data.len) {
self.pos = self.data.len + 1;
return "";
}
const start = self.pos;
// Handle quoted fields
if (self.pos < self.data.len and self.data[self.pos] == '"') {
self.pos += 1; // skip opening quote
const qstart = self.pos;
while (self.pos < self.data.len) {
if (self.data[self.pos] == '"') {
if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') {
self.pos += 2; // escaped quote
} else {
break; // end quote
}
} else {
self.pos += 1;
}
}
const qend = self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip closing quote
if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma
return self.data[qstart..qend];
}
// Unquoted field
while (self.pos < self.data.len and self.data[self.pos] != ',') {
self.pos += 1;
}
const end = self.pos;
if (self.pos < self.data.len) self.pos += 1; // skip comma
return self.data[start..end];
}
};
fn parseF64WithCommas(s: []const u8) ?f64 {
if (s.len == 0) return null;
// Strip leading/trailing spaces
var start: usize = 0;
var end: usize = s.len;
while (start < end and s[start] == ' ') start += 1;
while (end > start and s[end - 1] == ' ') end -= 1;
if (start >= end) return null;
var negative = false;
if (s[start] == '-') {
negative = true;
start += 1;
}
var integer_part: f64 = 0;
var i = start;
while (i < end and s[i] != '.') : (i += 1) {
if (s[i] == ',') continue; // skip commas
if (s[i] < '0' or s[i] > '9') return null;
integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0'));
}
var frac_part: f64 = 0;
if (i < end and s[i] == '.') {
i += 1;
var divisor: f64 = 10.0;
while (i < end) : (i += 1) {
if (s[i] < '0' or s[i] > '9') return null;
frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor;
divisor *= 10.0;
}
}
const result = integer_part + frac_part;
return if (negative) -result else result;
}
// --- Tests ---
test "annual returns are populated" {
// Should have data from 1871 to at least 2024
try std.testing.expect(annual_returns.len >= 150);
try std.testing.expectEqual(@as(u16, 1871), first_year);
try std.testing.expect(last_year >= 2024);
}
test "spot check 2008 crash" {
// 2008: S&P 500 nominal total return was approximately -37%
for (annual_returns) |yr| {
if (yr.year == 2008) {
try std.testing.expect(yr.sp500_total_return < -0.30);
try std.testing.expect(yr.sp500_total_return > -0.50);
return;
}
}
return error.YearNotFound;
}
test "spot check 1929 crash" {
// 1929: S&P 500 nominal total return should be significantly negative
for (annual_returns) |yr| {
if (yr.year == 1929) {
try std.testing.expect(yr.sp500_total_return < -0.05);
return;
}
}
return error.YearNotFound;
}
test "realReturn calculation" {
// 10% nominal with 3% inflation = ~6.8% real
const real = realReturn(0.10, 0.03);
try std.testing.expectApproxEqAbs(0.06796, real, 0.001);
}
test "maxCycles" {
// With ~154 years of data, a 30-year horizon should give ~124 cycles
const cycles = maxCycles(30);
try std.testing.expect(cycles >= 120);
try std.testing.expect(cycles <= 130);
// Horizon longer than data should give 0
try std.testing.expectEqual(@as(usize, 0), maxCycles(200));
}
test "spot check known annual total returns" {
// Nominal total returns (real from TR Price + CPI).
// Jan-to-Jan measurement, so timing differs from calendar-year figures.
const checks = [_]struct { year: u16, min: f64, max: f64 }{
.{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year
.{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery
.{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis
.{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC
.{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery
.{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID
};
for (checks) |chk| {
var found = false;
for (annual_returns) |yr| {
if (yr.year == chk.year) {
found = true;
if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) {
std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{
chk.year, yr.sp500_total_return, chk.min, chk.max,
});
return error.TestExpectedEqual;
}
break;
}
}
if (!found) {
std.debug.print("Year {d} not found in data\n", .{chk.year});
return error.TestExpectedEqual;
}
}
}
test "bond returns are reasonable" {
// Bond returns (GS10 yield) should be between 0% and 16%
for (annual_returns) |yr| {
if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) {
std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{
yr.year, yr.bond_total_return,
});
return error.TestExpectedEqual;
}
}
}
test "CPI inflation is reasonable" {
// CPI should generally be between -20% and +25% (19th century had severe deflation)
for (annual_returns) |yr| {
if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) {
std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{
yr.year, yr.cpi_inflation,
});
return error.TestExpectedEqual;
}
}
}
test "parseF64WithCommas" {
try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01);
try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01);
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas(""));
try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA"));
}