diff --git a/src/data/shiller.zig b/src/data/shiller.zig new file mode 100644 index 0000000..7c878a6 --- /dev/null +++ b/src/data/shiller.zig @@ -0,0 +1,440 @@ +/// Embedded historical market data derived from Robert Shiller's CAPE dataset. +/// +/// Source: https://shillerdata.com/ (ie_data.xls, "Data" tab saved as CSV) +/// +/// To update: +/// 1. Download ie_data.xls from https://shillerdata.com/ +/// 2. Open in LibreOffice Calc, select the "Data" tab +/// 3. File → Save As → CSV (ie_data.csv) +/// 4. Replace src/data/ie_data.csv with the new file +/// 5. Rebuild — the data is parsed at comptime +/// +/// The CSV contains monthly observations from 1871 to present. This module +/// extracts January rows and computes year-over-year returns for: +/// - S&P 500 total return (price + dividends reinvested) +/// - 10-year Treasury bond total return +/// - CPI inflation +/// +/// All returns are nominal, expressed as decimals (0.12 = 12%). +const std = @import("std"); + +pub const ShillerYear = struct { + year: u16, + /// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%) + sp500_total_return: f64, + /// 10-year Treasury bond total return (decimal) + bond_total_return: f64, + /// CPI inflation rate (decimal) + cpi_inflation: f64, +}; + +/// Comptime-parsed annual returns from the embedded Shiller CSV. +/// Each entry represents one calendar year's returns, computed from +/// January-to-January changes in the cumulative index columns. +pub const annual_returns: []const ShillerYear = parseShillerData(); + +/// Number of available years of historical data. +pub const year_count: usize = annual_returns.len; + +/// First year with complete data. +pub const first_year: u16 = if (annual_returns.len > 0) annual_returns[0].year else 0; + +/// Last year with complete data. +pub const last_year: u16 = if (annual_returns.len > 0) annual_returns[annual_returns.len - 1].year else 0; + +/// Convert a nominal return to a real (inflation-adjusted) return. +pub fn realReturn(nominal: f64, inflation: f64) f64 { + return (1.0 + nominal) / (1.0 + inflation) - 1.0; +} + +/// Return the maximum number of complete simulation cycles for a given horizon. +/// A cycle starting in year Y needs data through year Y + horizon - 1. +pub fn maxCycles(horizon: u16) usize { + if (annual_returns.len == 0) return 0; + const span = last_year - first_year + 1; + if (horizon > span) return 0; + return span - horizon + 1; +} + +// --- Comptime CSV parsing --- + +const csv_data = @embedFile("ie_data.csv"); + +fn parseShillerData() []const ShillerYear { + @setEvalBranchQuota(1_000_000); + + // First pass: collect January rows with their cumulative indices. + // We need January Total Return Price (col 9), Bond Returns (col 17), CPI (col 4). + const JanRow = struct { + year: u16, + tr_price: f64, // cumulative S&P 500 total return index (nominal) + gs10: f64, // 10-year Treasury yield (percentage points) + cpi: f64, + }; + + var jan_rows: [200]JanRow = undefined; + var jan_count: usize = 0; + + var line_iter = LineIterator{ .data = csv_data }; + + // Skip header lines (first 8 lines are headers) + for (0..8) |_| { + _ = line_iter.next(); + } + + while (line_iter.next()) |line| { + if (line.len == 0) continue; + + // Parse the date column to check if this is a January row + var col_iter = CsvFieldIterator{ .data = line }; + const date_field = col_iter.next() orelse continue; // col 0: Date + if (date_field.len == 0) continue; + + // Date format is "YYYY.MM" where January = ".01", October = ".1" + // (October drops the leading zero). Check for exactly ".01". + const dot_pos = indexOf(date_field, '.') orelse continue; + const year_str = date_field[0..dot_pos]; + const month_str = date_field[dot_pos + 1 ..]; + + const year = parseU16(year_str) orelse continue; + + // January is exactly "01" (2 chars). ".1" = October, ".11" = November. + if (month_str.len != 2) continue; + const month = parseU16(month_str) orelse continue; + if (month != 1) continue; + + // Skip to the columns we need + _ = col_iter.next(); // col 1: P + _ = col_iter.next(); // col 2: D + _ = col_iter.next(); // col 3: E + const cpi_field = col_iter.next() orelse continue; // col 4: CPI + _ = col_iter.next(); // col 5: Date Fraction + const gs10_field = col_iter.next() orelse continue; // col 6: Rate GS10 + _ = col_iter.next(); // col 7: Real Price + _ = col_iter.next(); // col 8: Real Dividend + const tr_price_field = col_iter.next() orelse continue; // col 9: Total Return Price (nominal) + + const cpi = parseF64(stripSpaces(cpi_field)) orelse continue; + const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue; + const tr_price = parseF64WithCommas(tr_price_field) orelse continue; + + if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; + + jan_rows[jan_count] = .{ + .year = year, + .tr_price = tr_price, + .gs10 = gs10, + .cpi = cpi, + }; + jan_count += 1; + } + + // Second pass: compute year-over-year returns from consecutive January values. + if (jan_count < 2) return &.{}; + + var results: [200]ShillerYear = undefined; + var result_count: usize = 0; + + for (1..jan_count) |i| { + const prev = jan_rows[i - 1]; + const curr = jan_rows[i]; + + // Ensure consecutive years + if (curr.year != prev.year + 1) continue; + + const cpi_change = (curr.cpi / prev.cpi) - 1.0; + // TR Price is a real (inflation-adjusted) total return index. + // Convert to nominal: (1 + real) * (1 + inflation) - 1 + const real_sp500 = (curr.tr_price / prev.tr_price) - 1.0; + const nominal_sp500 = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0; + + results[result_count] = .{ + .year = prev.year, + .sp500_total_return = nominal_sp500, + .bond_total_return = prev.gs10 / 100.0, // GS10 yield as nominal bond return (FIRECalc convention) + .cpi_inflation = cpi_change, + }; + result_count += 1; + } + + // Copy into a correctly-sized slice + const final = blk: { + var arr: [result_count]ShillerYear = undefined; + for (0..result_count) |i| { + arr[i] = results[i]; + } + break :blk arr; + }; + return &final; +} + +// --- Comptime parsing helpers --- + +const LineIterator = struct { + data: []const u8, + pos: usize = 0, + + fn next(self: *LineIterator) ?[]const u8 { + if (self.pos >= self.data.len) return null; + const start = self.pos; + while (self.pos < self.data.len and self.data[self.pos] != '\n') { + self.pos += 1; + } + const end = if (self.pos > start and self.data[self.pos - 1] == '\r') self.pos - 1 else self.pos; + if (self.pos < self.data.len) self.pos += 1; // skip \n + return self.data[start..end]; + } +}; + +const CsvFieldIterator = struct { + data: []const u8, + pos: usize = 0, + + fn next(self: *CsvFieldIterator) ?[]const u8 { + if (self.pos > self.data.len) return null; + if (self.pos == self.data.len) { + self.pos = self.data.len + 1; + return ""; + } + + const start = self.pos; + + // Handle quoted fields + if (self.pos < self.data.len and self.data[self.pos] == '"') { + self.pos += 1; // skip opening quote + const qstart = self.pos; + while (self.pos < self.data.len) { + if (self.data[self.pos] == '"') { + if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') { + self.pos += 2; // escaped quote + } else { + break; // end quote + } + } else { + self.pos += 1; + } + } + const qend = self.pos; + if (self.pos < self.data.len) self.pos += 1; // skip closing quote + if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma + return self.data[qstart..qend]; + } + + // Unquoted field + while (self.pos < self.data.len and self.data[self.pos] != ',') { + self.pos += 1; + } + const end = self.pos; + if (self.pos < self.data.len) self.pos += 1; // skip comma + return self.data[start..end]; + } +}; + +fn indexOf(s: []const u8, ch: u8) ?usize { + for (s, 0..) |c, i| { + if (c == ch) return i; + } + return null; +} + +fn parseU16(s: []const u8) ?u16 { + if (s.len == 0) return null; + var result: u16 = 0; + for (s) |c| { + if (c < '0' or c > '9') return null; + result = result * 10 + @as(u16, c - '0'); + } + return result; +} + +fn parseF64(s: []const u8) ?f64 { + if (s.len == 0) return null; + + var negative = false; + var start: usize = 0; + if (s[0] == '-') { + negative = true; + start = 1; + } + + var integer_part: f64 = 0; + var i = start; + while (i < s.len and s[i] != '.') : (i += 1) { + if (s[i] < '0' or s[i] > '9') return null; + integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0')); + } + + var frac_part: f64 = 0; + if (i < s.len and s[i] == '.') { + i += 1; + var divisor: f64 = 10.0; + while (i < s.len) : (i += 1) { + if (s[i] < '0' or s[i] > '9') return null; + frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor; + divisor *= 10.0; + } + } + + const result = integer_part + frac_part; + return if (negative) -result else result; +} + +fn stripSpaces(s: []const u8) []const u8 { + var start: usize = 0; + var end: usize = s.len; + while (start < end and s[start] == ' ') start += 1; + while (end > start and s[end - 1] == ' ') end -= 1; + return s[start..end]; +} + +fn parseF64WithCommas(s: []const u8) ?f64 { + if (s.len == 0) return null; + + // Strip leading/trailing spaces + var start: usize = 0; + var end: usize = s.len; + while (start < end and s[start] == ' ') start += 1; + while (end > start and s[end - 1] == ' ') end -= 1; + if (start >= end) return null; + + var negative = false; + if (s[start] == '-') { + negative = true; + start += 1; + } + + var integer_part: f64 = 0; + var i = start; + while (i < end and s[i] != '.') : (i += 1) { + if (s[i] == ',') continue; // skip commas + if (s[i] < '0' or s[i] > '9') return null; + integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0')); + } + + var frac_part: f64 = 0; + if (i < end and s[i] == '.') { + i += 1; + var divisor: f64 = 10.0; + while (i < end) : (i += 1) { + if (s[i] < '0' or s[i] > '9') return null; + frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor; + divisor *= 10.0; + } + } + + const result = integer_part + frac_part; + return if (negative) -result else result; +} + +// --- Tests --- + +test "annual returns are populated" { + // Should have data from 1871 to at least 2024 + try std.testing.expect(annual_returns.len >= 150); + try std.testing.expectEqual(@as(u16, 1871), first_year); + try std.testing.expect(last_year >= 2024); +} + +test "spot check 2008 crash" { + // 2008: S&P 500 nominal total return was approximately -37% + for (annual_returns) |yr| { + if (yr.year == 2008) { + try std.testing.expect(yr.sp500_total_return < -0.30); + try std.testing.expect(yr.sp500_total_return > -0.50); + return; + } + } + return error.YearNotFound; +} + +test "spot check 1929 crash" { + // 1929: S&P 500 nominal total return should be significantly negative + for (annual_returns) |yr| { + if (yr.year == 1929) { + try std.testing.expect(yr.sp500_total_return < -0.05); + return; + } + } + return error.YearNotFound; +} + +test "realReturn calculation" { + // 10% nominal with 3% inflation = ~6.8% real + const real = realReturn(0.10, 0.03); + try std.testing.expectApproxEqAbs(0.06796, real, 0.001); +} + +test "maxCycles" { + // With ~154 years of data, a 30-year horizon should give ~124 cycles + const cycles = maxCycles(30); + try std.testing.expect(cycles >= 120); + try std.testing.expect(cycles <= 130); + + // Horizon longer than data should give 0 + try std.testing.expectEqual(@as(usize, 0), maxCycles(200)); +} + +test "spot check known annual total returns" { + // Nominal total returns (real from TR Price + CPI). + // Jan-to-Jan measurement, so timing differs from calendar-year figures. + const checks = [_]struct { year: u16, min: f64, max: f64 }{ + .{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year + .{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery + .{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis + .{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC + .{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery + .{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID + }; + + for (checks) |chk| { + var found = false; + for (annual_returns) |yr| { + if (yr.year == chk.year) { + found = true; + if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) { + std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{ + chk.year, yr.sp500_total_return, chk.min, chk.max, + }); + return error.TestExpectedEqual; + } + break; + } + } + if (!found) { + std.debug.print("Year {d} not found in data\n", .{chk.year}); + return error.TestExpectedEqual; + } + } +} + +test "bond returns are reasonable" { + // Bond returns (GS10 yield) should be between 0% and 16% + for (annual_returns) |yr| { + if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) { + std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{ + yr.year, yr.bond_total_return, + }); + return error.TestExpectedEqual; + } + } +} + +test "CPI inflation is reasonable" { + // CPI should generally be between -20% and +25% (19th century had severe deflation) + for (annual_returns) |yr| { + if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) { + std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{ + yr.year, yr.cpi_inflation, + }); + return error.TestExpectedEqual; + } + } +} + +test "parseF64WithCommas" { + try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01); + try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01); + try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01); + try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("")); + try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA")); +}