update cache implementation with proper L1/L2 persistence

This commit is contained in:
Emil Lerch 2025-12-19 14:35:23 -08:00
parent 5fd9743e10
commit 0a175c55d3
Signed by: lobo
GPG key ID: A7B62D657EF764F8
3 changed files with 274 additions and 4 deletions

238
src/cache/Cache.zig vendored
View file

@ -3,14 +3,14 @@ const Lru = @import("Lru.zig");
const Cache = @This();
const log = std.log.scoped(.cache);
allocator: std.mem.Allocator,
lru: Lru,
cache_dir: []const u8,
file_threshold: usize,
pub const Config = struct {
max_entries: usize = 10_000,
file_threshold: usize = 1024,
cache_dir: []const u8,
};
@ -19,21 +19,50 @@ pub fn init(allocator: std.mem.Allocator, config: Config) !Cache {
if (err != error.PathAlreadyExists) return err;
};
return Cache{
var cache = Cache{
.allocator = allocator,
.lru = try Lru.init(allocator, config.max_entries),
.cache_dir = try allocator.dupe(u8, config.cache_dir),
.file_threshold = config.file_threshold,
};
cache.lru.setEvictionCallback(&cache, evictCallback);
// Clean up expired files and populate L1 cache from L2
cache.loadFromDir() catch |err| {
std.log.warn("Failed to load cache from {s}: {}", .{ config.cache_dir, err });
};
return cache;
}
fn evictCallback(ctx: *anyopaque, key: []const u8) void {
const self: *Cache = @ptrCast(@alignCast(ctx));
self.deleteFile(key);
}
pub fn get(self: *Cache, key: []const u8) ?[]const u8 {
// Check L1 (memory) first
if (self.lru.get(key)) |value| return value;
// Check L2 (disk)
const cached = self.loadFromFile(key) catch return null;
defer self.allocator.free(cached.key);
defer self.allocator.free(cached.value);
// L2 exists - promote to L1
self.lru.put(key, cached.value, cached.expires) catch return null;
return self.lru.get(key);
}
pub fn put(self: *Cache, key: []const u8, value: []const u8, ttl_seconds: u64) !void {
const now = std.time.milliTimestamp();
const expires = now + @as(i64, @intCast(ttl_seconds * 1000));
// Write to L2 (disk) first
try self.saveToFile(key, value, expires);
// Add to L1 (memory)
try self.lru.put(key, value, expires);
}
@ -41,3 +70,204 @@ pub fn deinit(self: *Cache) void {
self.lru.deinit();
self.allocator.free(self.cache_dir);
}
fn getCacheFilename(self: *Cache, key: []const u8) ![]const u8 {
var hasher = std.hash.Wyhash.init(0);
hasher.update(key);
const hash = hasher.final();
return std.fmt.allocPrint(self.allocator, "{x}.json", .{hash});
}
const CacheEntry = struct {
key: []const u8,
value: []const u8,
expires: i64,
};
/// Loads cached value from file (path calculated by the key). An error will be thrown
/// if the file access fails OR if the data has expired.
/// If the data has expired, the file will be deleted
fn loadFromFile(self: *Cache, key: []const u8) !CacheEntry {
const filename = try self.getCacheFilename(key);
defer self.allocator.free(filename);
const file_path = try std.fs.path.join(self.allocator, &.{ self.cache_dir, filename });
defer self.allocator.free(file_path);
return self.loadFromFilePath(file_path);
}
/// Loads cached value from file (using a path). An error will be thrown
/// if the file access fails OR if the data has expired.
/// If the data has expired, the file will be deleted
fn loadFromFilePath(self: *Cache, file_path: []const u8) !CacheEntry {
const file = try std.fs.cwd().openFile(file_path, .{});
defer file.close();
var buffer: [10 * 1024 * 1024]u8 = undefined;
var file_reader = file.reader(&buffer);
const reader = &file_reader.interface;
const cached = try deserialize(self.allocator, reader);
errdefer self.allocator.free(cached.value);
// Check if expired
const now = std.time.milliTimestamp();
if (cached.expires <= now) {
// We're expired, delete expired file
self.deleteFile(cached.key);
return error.Expired;
}
return cached;
}
fn serialize(writer: *std.Io.Writer, key: []const u8, value: []const u8, expires: i64) !void {
const entry = CacheEntry{ .key = key, .value = value, .expires = expires };
try writer.print("{f}", .{std.json.fmt(entry, .{})});
}
fn deserialize(allocator: std.mem.Allocator, reader: *std.Io.Reader) !CacheEntry {
var json_reader = std.json.Reader.init(allocator, reader);
defer json_reader.deinit();
const parsed = try std.json.parseFromTokenSource(
CacheEntry,
allocator,
&json_reader,
.{},
);
defer parsed.deinit();
return .{
.key = try allocator.dupe(u8, parsed.value.key),
.value = try allocator.dupe(u8, parsed.value.value),
.expires = parsed.value.expires,
};
}
fn saveToFile(self: *Cache, key: []const u8, value: []const u8, expires: i64) !void {
const filename = try self.getCacheFilename(key);
defer self.allocator.free(filename);
const file_path = try std.fs.path.join(self.allocator, &.{ self.cache_dir, filename });
defer self.allocator.free(file_path);
const file = try std.fs.cwd().createFile(file_path, .{});
defer file.close();
var buffer: [4096]u8 = undefined;
var file_writer = file.writer(&buffer);
const writer = &file_writer.interface;
try serialize(writer, key, value, expires);
try writer.flush();
}
fn loadFromDir(self: *Cache) !void {
var dir = try std.fs.cwd().openDir(self.cache_dir, .{ .iterate = true });
defer dir.close();
var it = dir.iterate();
while (try it.next()) |entry| {
if (entry.kind != .file) continue;
const file_path = try std.fs.path.join(self.allocator, &.{ self.cache_dir, entry.name });
defer self.allocator.free(file_path);
const cached = self.loadFromFilePath(file_path) catch continue;
defer self.allocator.free(cached.key);
defer self.allocator.free(cached.value);
// Populate L1 cache from L2
self.lru.put(cached.key, cached.value, cached.expires) catch continue;
}
}
fn deleteFile(self: *Cache, key: []const u8) void {
const filename = self.getCacheFilename(key) catch @panic("OOM");
defer self.allocator.free(filename);
const file_path = std.fs.path.join(self.allocator, &.{ self.cache_dir, filename }) catch @panic("OOM");
defer self.allocator.free(file_path);
std.fs.cwd().deleteFile(file_path) catch |e| {
log.warn("Error deleting expired cache file {s}: {}", .{ file_path, e });
};
}
test "serialize and deserialize" {
const allocator = std.testing.allocator;
const key = "test_key";
const value = "test_value";
const expires: i64 = 1234567890;
var buffer: [1024]u8 = undefined;
var fixed_writer = std.Io.Writer.fixed(&buffer);
try serialize(&fixed_writer, key, value, expires);
try fixed_writer.flush();
const serialized = buffer[0..fixed_writer.end];
try std.testing.expectEqualStrings("{\"key\":\"test_key\",\"value\":\"test_value\",\"expires\":1234567890}", serialized);
var fixed_reader = std.Io.Reader.fixed(serialized);
const cached = try deserialize(allocator, &fixed_reader);
defer allocator.free(cached.key);
defer allocator.free(cached.value);
try std.testing.expectEqualStrings(key, cached.key);
try std.testing.expectEqualStrings(value, cached.value);
try std.testing.expectEqual(expires, cached.expires);
}
test "deserialize handles integer expires" {
const allocator = std.testing.allocator;
const json = "{\"key\":\"k\",\"value\":\"v\",\"expires\":9999999999999}";
var fixed_reader = std.Io.Reader.fixed(json);
const cached = try deserialize(allocator, &fixed_reader);
defer allocator.free(cached.key);
defer allocator.free(cached.value);
try std.testing.expectEqualStrings("k", cached.key);
try std.testing.expectEqualStrings("v", cached.value);
try std.testing.expectEqual(9999999999999, cached.expires);
}
test "L1/L2 cache flow" {
const allocator = std.testing.allocator;
var tmp_dir = std.testing.tmpDir(.{});
defer tmp_dir.cleanup();
var path_buf: [std.fs.max_path_bytes]u8 = undefined;
const cache_dir = try tmp_dir.dir.realpath(".", &path_buf);
var cache = try Cache.init(allocator, .{ .max_entries = 10, .cache_dir = cache_dir });
defer cache.deinit();
// Put item in cache
try cache.put("key1", "value1", 900);
// Should be in L1
try std.testing.expectEqualStrings("value1", cache.get("key1").?);
// Manually remove from L1 to simulate eviction
if (cache.lru.map.fetchRemove("key1")) |kv| {
allocator.free(kv.value.value);
allocator.free(kv.key);
}
// key1 should not be in L1
try std.testing.expect(cache.lru.get("key1") == null);
// Get should promote from L2 to L1
try std.testing.expectEqualStrings("value1", cache.get("key1").?);
// Now it should be in L1
try std.testing.expectEqualStrings("value1", cache.lru.get("key1").?);
}

33
src/cache/Lru.zig vendored
View file

@ -5,6 +5,8 @@ const Lru = @This();
allocator: std.mem.Allocator,
map: std.StringHashMap(Entry),
max_entries: usize,
evict_fn: ?*const fn (ctx: *anyopaque, key: []const u8) void = null,
evict_ctx: ?*anyopaque = null,
const Entry = struct {
value: []const u8,
@ -20,6 +22,11 @@ pub fn init(allocator: std.mem.Allocator, max_entries: usize) !Lru {
};
}
pub fn setEvictionCallback(self: *Lru, ctx: *anyopaque, callback: *const fn (ctx: *anyopaque, key: []const u8) void) void {
self.evict_ctx = ctx;
self.evict_fn = callback;
}
pub fn get(self: *Lru, key: []const u8) ?[]const u8 {
var entry = self.map.getPtr(key) orelse return null;
@ -72,6 +79,9 @@ fn evictOldest(self: *Lru) void {
fn remove(self: *Lru, key: []const u8) void {
if (self.map.fetchRemove(key)) |kv| {
if (self.evict_fn) |callback| {
callback(self.evict_ctx.?, kv.key);
}
self.allocator.free(kv.value.value);
self.allocator.free(kv.key);
}
@ -86,6 +96,29 @@ pub fn deinit(self: *Lru) void {
self.map.deinit();
}
pub const Iterator = struct {
inner: std.StringHashMap(Entry).Iterator,
pub const Item = struct {
key: []const u8,
value: []const u8,
expires: i64,
};
pub fn next(self: *Iterator) ?Item {
const entry = self.inner.next() orelse return null;
return Item{
.key = entry.key_ptr.*,
.value = entry.value_ptr.value,
.expires = entry.value_ptr.expires,
};
}
};
pub fn iterator(self: *Lru) Iterator {
return .{ .inner = self.map.iterator() };
}
test "LRU basic operations" {
var lru = try Lru.init(std.testing.allocator, 3);
defer lru.deinit();

View file

@ -175,6 +175,13 @@ fn handleWeatherInternal(
}
} else try ansi.render(allocator, weather, .{ .use_imperial = use_imperial });
// Will use a TTL to a random value between 1000-2000 seconds (16-33 minutes).
// We want to avoid a thundering herd problem where all cached entries expire
// at exactly the same time, causing a spike of requests to the weather provider.
// Base TTL: 1000 seconds (~16 minutes)
// Random jitter: 0-1000 seconds
// Total: 1000-2000 seconds (16-33 minutes)
const ttl = 1000 + std.crypto.random.intRangeAtMost(u64, 0, 1000);
try opts.cache.put(cache_key, output, ttl);