/// Embedded historical market data derived from Robert Shiller's CAPE dataset. /// /// Source: https://shillerdata.com/ (ie_data.xls, "Data" tab saved as CSV) /// /// To update: /// 1. Download ie_data.xls from https://shillerdata.com/ /// 2. Open in LibreOffice Calc, select the "Data" tab /// 3. File → Save As → CSV (ie_data.csv) /// 4. Replace src/data/ie_data.csv with the new file /// 5. Rebuild — the data is parsed at comptime /// /// The CSV contains monthly observations from 1871 to present. This module /// extracts January rows and computes year-over-year returns for: /// - S&P 500 total return (price + dividends reinvested) /// - 10-year Treasury bond total return /// - CPI inflation /// /// All returns are nominal, expressed as decimals (0.12 = 12%). const std = @import("std"); pub const ShillerYear = struct { year: u16, /// S&P 500 total return including dividends (decimal, e.g. 0.12 = 12%) sp500_total_return: f64, /// 10-year Treasury bond total return (decimal) bond_total_return: f64, /// CPI inflation rate (decimal) cpi_inflation: f64, }; /// Comptime-parsed annual returns from the embedded Shiller CSV. /// Each entry represents one calendar year's returns, computed from /// January-to-January changes in the cumulative index columns. pub const annual_returns: []const ShillerYear = parseShillerData(); /// Number of available years of historical data. pub const year_count: usize = annual_returns.len; /// First year with complete data. pub const first_year: u16 = if (annual_returns.len > 0) annual_returns[0].year else 0; /// Last year with complete data. pub const last_year: u16 = if (annual_returns.len > 0) annual_returns[annual_returns.len - 1].year else 0; /// Convert a nominal return to a real (inflation-adjusted) return. pub fn realReturn(nominal: f64, inflation: f64) f64 { return (1.0 + nominal) / (1.0 + inflation) - 1.0; } /// Return the maximum number of complete simulation cycles for a given horizon. /// A cycle starting in year Y needs data through year Y + horizon - 1. pub fn maxCycles(horizon: u16) usize { if (annual_returns.len == 0) return 0; const span = last_year - first_year + 1; if (horizon > span) return 0; return span - horizon + 1; } // --- Comptime CSV parsing --- const csv_data = @embedFile("ie_data.csv"); fn parseShillerData() []const ShillerYear { @setEvalBranchQuota(120_000); var results: [200]ShillerYear = undefined; var result_count: usize = 0; var prev_year: u16 = 0; var prev_tr_price: f64 = 0; var prev_gs10: f64 = 0; var prev_cpi: f64 = 0; // Skip header (8 lines) var pos: usize = 0; var newlines: usize = 0; while (pos < csv_data.len and newlines < 8) : (pos += 1) { if (csv_data[pos] == '\n') newlines += 1; } // Lines are ~135 bytes. After each January row, the next January is // ~12 lines away. Skip 11 × min_line_length (~96) = 1056 bytes, // then scan forward to the next line boundary. This avoids scanning // ~90% of the file byte-by-byte. const skip_bytes = 11 * 96; while (pos < csv_data.len) { // Find current line const line_start = pos; while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos; if (pos < csv_data.len) pos += 1; const line = csv_data[line_start..line_end]; if (line.len < 7) continue; // Fast reject: date is "YYYY.01," if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue; const year = parseU16(line[0..4]) orelse continue; // Parse fields for this January row var col_iter = CsvFieldIterator{ .data = line }; _ = col_iter.next(); // col 0: Date _ = col_iter.next(); // col 1: P _ = col_iter.next(); // col 2: D _ = col_iter.next(); // col 3: E const cpi_field = col_iter.next() orelse continue; _ = col_iter.next(); // col 5 const gs10_field = col_iter.next() orelse continue; _ = col_iter.next(); // col 7 _ = col_iter.next(); // col 8 const tr_price_field = col_iter.next() orelse continue; const cpi = parseF64(stripSpaces(cpi_field)) orelse continue; const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue; const tr_price = parseF64WithCommas(tr_price_field) orelse continue; if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; // Compute return from previous January if (prev_year > 0 and year == prev_year + 1) { const cpi_change = (cpi / prev_cpi) - 1.0; const real_sp500 = (tr_price / prev_tr_price) - 1.0; results[result_count] = .{ .year = prev_year, .sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0, .bond_total_return = prev_gs10 / 100.0, .cpi_inflation = cpi_change, }; result_count += 1; } prev_year = year; prev_tr_price = tr_price; prev_gs10 = gs10; prev_cpi = cpi; // Skip ahead ~11 months of data pos = @min(pos + skip_bytes, csv_data.len); // Realign to next line boundary while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; if (pos < csv_data.len) pos += 1; } // Single pass: scan lines, parse January rows, compute returns on the fly. while (pos < csv_data.len) { const line_start = pos; while (pos < csv_data.len and csv_data[pos] != '\n') pos += 1; const line_end = if (pos > line_start and csv_data[pos - 1] == '\r') pos - 1 else pos; if (pos < csv_data.len) pos += 1; const line = csv_data[line_start..line_end]; if (line.len < 7) continue; // Fast reject: date is "YYYY.01," — check fixed offsets if (line[4] != '.' or line[5] != '0' or line[6] != '1') continue; const year = parseU16(line[0..4]) orelse continue; // Parse fields for this January row var col_iter = CsvFieldIterator{ .data = line }; _ = col_iter.next(); // col 0: Date _ = col_iter.next(); // col 1: P _ = col_iter.next(); // col 2: D _ = col_iter.next(); // col 3: E const cpi_field = col_iter.next() orelse continue; _ = col_iter.next(); // col 5 const gs10_field = col_iter.next() orelse continue; _ = col_iter.next(); // col 7 _ = col_iter.next(); // col 8 const tr_price_field = col_iter.next() orelse continue; const cpi = parseF64(stripSpaces(cpi_field)) orelse continue; const gs10 = parseF64(stripSpaces(gs10_field)) orelse continue; const tr_price = parseF64WithCommas(tr_price_field) orelse continue; if (cpi == 0.0 or tr_price == 0.0 or gs10 == 0.0) continue; // Compute return from previous January (if consecutive) if (prev_year > 0 and year == prev_year + 1) { const cpi_change = (cpi / prev_cpi) - 1.0; const real_sp500 = (tr_price / prev_tr_price) - 1.0; results[result_count] = .{ .year = prev_year, .sp500_total_return = (1.0 + real_sp500) * (1.0 + cpi_change) - 1.0, .bond_total_return = prev_gs10 / 100.0, .cpi_inflation = cpi_change, }; result_count += 1; } prev_year = year; prev_tr_price = tr_price; prev_gs10 = gs10; prev_cpi = cpi; } if (result_count == 0) return &.{}; const final = blk: { var arr: [result_count]ShillerYear = undefined; for (0..result_count) |i| { arr[i] = results[i]; } break :blk arr; }; return &final; } // --- Comptime parsing helpers --- const CsvFieldIterator = struct { data: []const u8, pos: usize = 0, fn next(self: *CsvFieldIterator) ?[]const u8 { if (self.pos > self.data.len) return null; if (self.pos == self.data.len) { self.pos = self.data.len + 1; return ""; } const start = self.pos; // Handle quoted fields if (self.pos < self.data.len and self.data[self.pos] == '"') { self.pos += 1; // skip opening quote const qstart = self.pos; while (self.pos < self.data.len) { if (self.data[self.pos] == '"') { if (self.pos + 1 < self.data.len and self.data[self.pos + 1] == '"') { self.pos += 2; // escaped quote } else { break; // end quote } } else { self.pos += 1; } } const qend = self.pos; if (self.pos < self.data.len) self.pos += 1; // skip closing quote if (self.pos < self.data.len and self.data[self.pos] == ',') self.pos += 1; // skip comma return self.data[qstart..qend]; } // Unquoted field while (self.pos < self.data.len and self.data[self.pos] != ',') { self.pos += 1; } const end = self.pos; if (self.pos < self.data.len) self.pos += 1; // skip comma return self.data[start..end]; } }; fn indexOf(s: []const u8, ch: u8) ?usize { for (s, 0..) |c, i| { if (c == ch) return i; } return null; } fn parseU16(s: []const u8) ?u16 { if (s.len == 0) return null; var result: u16 = 0; for (s) |c| { if (c < '0' or c > '9') return null; result = result * 10 + @as(u16, c - '0'); } return result; } fn parseF64(s: []const u8) ?f64 { if (s.len == 0) return null; var negative = false; var start: usize = 0; if (s[0] == '-') { negative = true; start = 1; } var integer_part: f64 = 0; var i = start; while (i < s.len and s[i] != '.') : (i += 1) { if (s[i] < '0' or s[i] > '9') return null; integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0')); } var frac_part: f64 = 0; if (i < s.len and s[i] == '.') { i += 1; var divisor: f64 = 10.0; while (i < s.len) : (i += 1) { if (s[i] < '0' or s[i] > '9') return null; frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor; divisor *= 10.0; } } const result = integer_part + frac_part; return if (negative) -result else result; } fn stripSpaces(s: []const u8) []const u8 { var start: usize = 0; var end: usize = s.len; while (start < end and s[start] == ' ') start += 1; while (end > start and s[end - 1] == ' ') end -= 1; return s[start..end]; } fn parseF64WithCommas(s: []const u8) ?f64 { if (s.len == 0) return null; // Strip leading/trailing spaces var start: usize = 0; var end: usize = s.len; while (start < end and s[start] == ' ') start += 1; while (end > start and s[end - 1] == ' ') end -= 1; if (start >= end) return null; var negative = false; if (s[start] == '-') { negative = true; start += 1; } var integer_part: f64 = 0; var i = start; while (i < end and s[i] != '.') : (i += 1) { if (s[i] == ',') continue; // skip commas if (s[i] < '0' or s[i] > '9') return null; integer_part = integer_part * 10.0 + @as(f64, @floatFromInt(s[i] - '0')); } var frac_part: f64 = 0; if (i < end and s[i] == '.') { i += 1; var divisor: f64 = 10.0; while (i < end) : (i += 1) { if (s[i] < '0' or s[i] > '9') return null; frac_part += @as(f64, @floatFromInt(s[i] - '0')) / divisor; divisor *= 10.0; } } const result = integer_part + frac_part; return if (negative) -result else result; } // --- Tests --- test "annual returns are populated" { // Should have data from 1871 to at least 2024 try std.testing.expect(annual_returns.len >= 150); try std.testing.expectEqual(@as(u16, 1871), first_year); try std.testing.expect(last_year >= 2024); } test "spot check 2008 crash" { // 2008: S&P 500 nominal total return was approximately -37% for (annual_returns) |yr| { if (yr.year == 2008) { try std.testing.expect(yr.sp500_total_return < -0.30); try std.testing.expect(yr.sp500_total_return > -0.50); return; } } return error.YearNotFound; } test "spot check 1929 crash" { // 1929: S&P 500 nominal total return should be significantly negative for (annual_returns) |yr| { if (yr.year == 1929) { try std.testing.expect(yr.sp500_total_return < -0.05); return; } } return error.YearNotFound; } test "realReturn calculation" { // 10% nominal with 3% inflation = ~6.8% real const real = realReturn(0.10, 0.03); try std.testing.expectApproxEqAbs(0.06796, real, 0.001); } test "maxCycles" { // With ~154 years of data, a 30-year horizon should give ~124 cycles const cycles = maxCycles(30); try std.testing.expect(cycles >= 120); try std.testing.expect(cycles <= 130); // Horizon longer than data should give 0 try std.testing.expectEqual(@as(usize, 0), maxCycles(200)); } test "spot check known annual total returns" { // Nominal total returns (real from TR Price + CPI). // Jan-to-Jan measurement, so timing differs from calendar-year figures. const checks = [_]struct { year: u16, min: f64, max: f64 }{ .{ .year = 1931, .min = -0.55, .max = -0.25 }, // Great Depression year .{ .year = 1933, .min = 0.30, .max = 0.80 }, // Recovery .{ .year = 1974, .min = -0.40, .max = -0.05 }, // Oil crisis .{ .year = 2008, .min = -0.50, .max = -0.25 }, // GFC .{ .year = 2009, .min = 0.15, .max = 0.50 }, // Recovery .{ .year = 2021, .min = 0.10, .max = 0.45 }, // Post-COVID }; for (checks) |chk| { var found = false; for (annual_returns) |yr| { if (yr.year == chk.year) { found = true; if (yr.sp500_total_return < chk.min or yr.sp500_total_return > chk.max) { std.debug.print("Year {d}: SP500 TR = {d:.4}, expected {d:.2} to {d:.2}\n", .{ chk.year, yr.sp500_total_return, chk.min, chk.max, }); return error.TestExpectedEqual; } break; } } if (!found) { std.debug.print("Year {d} not found in data\n", .{chk.year}); return error.TestExpectedEqual; } } } test "bond returns are reasonable" { // Bond returns (GS10 yield) should be between 0% and 16% for (annual_returns) |yr| { if (yr.bond_total_return < 0.0 or yr.bond_total_return > 0.16) { std.debug.print("Year {d}: Bond TR = {d:.4}, out of range\n", .{ yr.year, yr.bond_total_return, }); return error.TestExpectedEqual; } } } test "CPI inflation is reasonable" { // CPI should generally be between -20% and +25% (19th century had severe deflation) for (annual_returns) |yr| { if (yr.cpi_inflation < -0.20 or yr.cpi_inflation > 0.25) { std.debug.print("Year {d}: CPI = {d:.4}, out of range\n", .{ yr.year, yr.cpi_inflation, }); return error.TestExpectedEqual; } } } test "parseF64WithCommas" { try std.testing.expectApproxEqAbs(@as(f64, 9944.73), parseF64WithCommas(" 9,944.73 ").?, 0.01); try std.testing.expectApproxEqAbs(@as(f64, 1036099.07), parseF64WithCommas(" 1,036,099.07 ").?, 0.01); try std.testing.expectApproxEqAbs(@as(f64, 116.82), parseF64WithCommas(" 116.82 ").?, 0.01); try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("")); try std.testing.expectEqual(@as(?f64, null), parseF64WithCommas("NA")); }