From b2c6b3b65177be02a2fbf745a32ab14feb581c73 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Mon, 5 Jan 2026 00:39:24 -0800 Subject: [PATCH] ip2location --- CACHE_CONFIGURATION.md | 140 +++++++++++++++++++++++++ README.md | 54 ++++++++-- src/config.zig | 33 +++++- src/location/GeoIp.zig | 55 ++++++++-- src/location/Ip2location.zig | 194 +++++++++++++++++++++++++++++++++++ src/main.zig | 9 +- 6 files changed, 467 insertions(+), 18 deletions(-) create mode 100644 CACHE_CONFIGURATION.md create mode 100644 src/location/Ip2location.zig diff --git a/CACHE_CONFIGURATION.md b/CACHE_CONFIGURATION.md new file mode 100644 index 0000000..f6c7351 --- /dev/null +++ b/CACHE_CONFIGURATION.md @@ -0,0 +1,140 @@ +# Cache Configuration + +wttr.in uses three separate caches following Linux Filesystem Hierarchy Standard (FHS) and XDG Base Directory specifications. + +## External Services + +### Required Services +- **Met.no Weather API** - Primary weather data provider + - No API key required + - Free, open API from Norwegian Meteorological Institute + - Rate limit: Be respectful, use caching + +### Optional Services +- **IP2Location.io** - Fallback IP geolocation service + - API key required: Sign up at https://www.ip2location.io/ + - Used when MaxMind GeoIP database lookup fails + - Free tier: 30,000 requests/month + - Set via `IP2LOCATION_API_KEY` environment variable + +### Database Files +- **MaxMind GeoLite2 City** - IP geolocation database + - Free database, auto-downloaded if missing + - No API key required for database usage + - Updates available monthly from MaxMind + +## Cache Locations + +All caches default to `$XDG_CACHE_HOME/wttr` (typically `~/.cache/wttr`). + +### 1. Weather Response Cache +**Purpose:** Caches weather API responses to reduce upstream requests + +**Default Location:** `$XDG_CACHE_HOME/wttr/` (individual files) +**Environment Variable:** `WTTR_CACHE_DIR` +**Size:** 10,000 entries (configurable via `WTTR_CACHE_SIZE`) +**Expiration:** 1000-2000 seconds (16-33 minutes, randomized to avoid thundering herd) +**Eviction:** LRU (Least Recently Used) + +This is the main cache that stores weather forecast responses from Met.no. Each entry has a randomized TTL to prevent cache stampedes. + +### 2. Geocoding Cache (Optional) +**Purpose:** Caches location name → coordinates mappings + +**Default:** Disabled (in-memory only) +**Environment Variable:** `WTTR_GEOCACHE_FILE` +**Format:** JSON +**Expiration:** None (persists indefinitely) +**Eviction:** None (grows unbounded) + +When enabled, persists geocoding lookups to disk. Saves every 15 minutes if dirty. Useful for reducing external geocoding API calls. + +### 3. IP2Location Cache +**Purpose:** Caches IP → coordinates from IP2Location API + +**Default Location:** `$XDG_CACHE_HOME/wttr/ip2location.cache` +**Environment Variable:** `IP2LOCATION_CACHE_FILE` +**Format:** Binary (32-byte records) +**Expiration:** None (persists indefinitely) +**Eviction:** None (append-only, grows unbounded) + +Only used when `IP2LOCATION_API_KEY` is configured. Provides fallback when MaxMind GeoIP database lookup fails. + +## GeoIP Database Location + +**Default:** `$XDG_CACHE_HOME/wttr/GeoLite2-City.mmdb` +**Environment Variable:** `WTTR_GEOLITE_PATH` + +This is the MaxMind GeoLite2 database. It will be automatically downloaded if missing. + +## Environment Variables Summary + +| Variable | Default | Description | +|----------|---------|-------------| +| `WTTR_CACHE_DIR` | `$XDG_CACHE_HOME/wttr` | Weather response cache directory | +| `WTTR_CACHE_SIZE` | `10000` | Maximum number of cached weather responses | +| `WTTR_GEOCACHE_FILE` | (none) | Optional persistent geocoding cache file | +| `WTTR_GEOLITE_PATH` | `$XDG_CACHE_HOME/wttr/GeoLite2-City.mmdb` | MaxMind GeoLite2 database path | +| `IP2LOCATION_API_KEY` | (none) | API key for IP2Location fallback service | +| `IP2LOCATION_CACHE_FILE` | `$XDG_CACHE_HOME/wttr/ip2location.cache` | IP2Location cache file | +| `XDG_CACHE_HOME` | `~/.cache` | XDG base cache directory | + +## Examples + +### Minimal Configuration (defaults) +```bash +./wttr +# Uses ~/.cache/wttr/ for all caches and GeoIP database +``` + +### Custom Cache Location +```bash +WTTR_CACHE_DIR=/var/cache/wttr ./wttr +# All caches and GeoIP in /var/cache/wttr/ +``` + +### Enable Persistent Geocoding Cache +```bash +WTTR_GEOCACHE_FILE=~/.cache/wttr/geocache.json ./wttr +``` + +### With IP2Location Fallback +```bash +IP2LOCATION_API_KEY=your_key_here ./wttr +# Cache at ~/.cache/wttr/ip2location.cache +``` + +### Production Setup +```bash +WTTR_CACHE_DIR=/var/cache/wttr \ +WTTR_CACHE_SIZE=50000 \ +WTTR_GEOCACHE_FILE=/var/cache/wttr/geocache.json \ +IP2LOCATION_API_KEY=your_key_here \ +./wttr +# GeoIP and IP2Location cache also in /var/cache/wttr/ +``` + +## Cache Maintenance + +### Weather Cache +- **Automatic expiration:** Entries expire after 16-33 minutes (randomized) +- **LRU eviction:** When cache reaches max size (10,000 entries), least recently used entries are removed +- **Disk cleanup:** Expired files are cleaned up on access +- Safe to delete entire cache directory; will be recreated as needed + +### Geocoding Cache +- **No expiration:** Entries persist indefinitely +- **No eviction:** Cache grows unbounded +- **Auto-save:** Writes to disk every 15 minutes when modified +- Consider periodic cleanup if cache grows too large + +### IP2Location Cache +- **No expiration:** Entries persist indefinitely +- **Append-only:** File grows unbounded (32 bytes per unique IP) +- **No cleanup:** Consider periodic truncation for long-running deployments +- Safe to delete; will be recreated on next API lookup + +### GeoIP Database +- **Manual updates:** Download new database periodically for accuracy +- **Auto-download:** Database is automatically downloaded if missing on startup +- Typical update frequency: monthly (MaxMind releases) diff --git a/README.md b/README.md index 0eb6a2f..bb1a08c 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,42 @@ This directory contains comprehensive documentation for rewriting wttr.in in Zig. +## Quick Start + +```bash +# Minimal setup (uses defaults) +./wttr + +# With IP2Location fallback (optional) +IP2LOCATION_API_KEY=your_key_here ./wttr + +# Custom cache location +WTTR_CACHE_DIR=/var/cache/wttr ./wttr +``` + +See [CACHE_CONFIGURATION.md](CACHE_CONFIGURATION.md) for detailed configuration options. + +## External Services & API Keys + +### Required Services (No API Key) +- **Met.no Weather API** - Primary weather data provider + - Free, open API from Norwegian Meteorological Institute + - No registration required + - Rate limit: Be respectful, use caching (built-in) + +### Optional Services +- **IP2Location.io** - Fallback IP geolocation + - **API Key Required:** Sign up at https://www.ip2location.io/ + - Free tier: 30,000 requests/month + - Only used when MaxMind GeoIP database lookup fails + - Set via `IP2LOCATION_API_KEY` environment variable + +### Database Files (Auto-Downloaded) +- **MaxMind GeoLite2 City** - IP geolocation database + - Free database, automatically downloaded if missing + - No API key required + - Stored in `~/.cache/wttr/GeoLite2-City.mmdb` by default + ## Current Implementation Status ### Implemented Features @@ -17,6 +53,7 @@ This directory contains comprehensive documentation for rewriting wttr.in in Zig - Error handling (404/500 status codes) - Configuration from environment variables - **Imperial units auto-detection**: Automatically uses imperial units (°F, mph) for US IP addresses and `lang=us`, with explicit `?u` and `?m` overrides +- **IP2Location fallback**: Optional fallback geolocation service with persistent cache ### Missing Features (To Be Implemented Later) @@ -48,15 +85,18 @@ This directory contains comprehensive documentation for rewriting wttr.in in Zig - Based on location coordinates and timezone - Display in custom format output -7. **Online GeoIP Fallback** - - When local GeoIP database lookup fails, fallback to online service - - Requires API key configuration - - Persistent cache for online lookup results - - Legacy system uses ip2location or similar service - ## Documentation Files -### [TARGET_ARCHITECTURE.md](TARGET_ARCHITECTURE.md) ⭐ NEW +### [CACHE_CONFIGURATION.md](CACHE_CONFIGURATION.md) ⭐ NEW +Complete cache and external services documentation: +- External services (Met.no, IP2Location, MaxMind GeoLite2) +- API key requirements +- Cache locations and policies +- Expiration and eviction strategies +- Environment variables +- Configuration examples + +### [TARGET_ARCHITECTURE.md](TARGET_ARCHITECTURE.md) Target architecture for Zig rewrite: - Single binary design - Simplified caching (one layer) diff --git a/src/config.zig b/src/config.zig index 1acaf58..7b2a4ab 100644 --- a/src/config.zig +++ b/src/config.zig @@ -7,10 +7,22 @@ pub const Config = struct { cache_dir: []const u8, geolite_path: []const u8, geocache_file: ?[]const u8, + ip2location_api_key: ?[]const u8, + ip2location_cache_file: []const u8, pub fn load(allocator: std.mem.Allocator) !Config { var env = try std.process.getEnvMap(allocator); defer env.deinit(); + + // Get XDG_CACHE_HOME or default to ~/.cache + const home = env.get("HOME") orelse "/tmp"; + const xdg_cache = env.get("XDG_CACHE_HOME") orelse + try std.fmt.allocPrint(allocator, "{s}/.cache", .{home}); + defer if (env.get("XDG_CACHE_HOME") == null) allocator.free(xdg_cache); + + const default_cache_dir = try std.fmt.allocPrint(allocator, "{s}/wttr", .{xdg_cache}); + defer allocator.free(default_cache_dir); + return Config{ .listen_host = env.get("WTTR_LISTEN_HOST") orelse try allocator.dupe(u8, "0.0.0.0"), .listen_port = if (env.get("WTTR_LISTEN_PORT")) |p| @@ -21,9 +33,23 @@ pub const Config = struct { try std.fmt.parseInt(usize, s, 10) else 10_000, - .cache_dir = try allocator.dupe(u8, env.get("WTTR_CACHE_DIR") orelse "/tmp/wttr-cache"), - .geolite_path = try allocator.dupe(u8, env.get("WTTR_GEOLITE_PATH") orelse "./GeoLite2-City.mmdb"), + .cache_dir = try allocator.dupe(u8, env.get("WTTR_CACHE_DIR") orelse default_cache_dir), + .geolite_path = blk: { + if (env.get("WTTR_GEOLITE_PATH")) |v| { + break :blk try allocator.dupe(u8, v); + } + break :blk try std.fmt.allocPrint(allocator, "{s}/GeoLite2-City.mmdb", .{ + env.get("WTTR_CACHE_DIR") orelse default_cache_dir, + }); + }, .geocache_file = if (env.get("WTTR_GEOCACHE_FILE")) |v| try allocator.dupe(u8, v) else null, + .ip2location_api_key = if (env.get("IP2LOCATION_API_KEY")) |v| try allocator.dupe(u8, v) else null, + .ip2location_cache_file = blk: { + if (env.get("IP2LOCATION_CACHE_FILE")) |v| { + break :blk try allocator.dupe(u8, v); + } + break :blk try std.fmt.allocPrint(allocator, "{s}/ip2location.cache", .{env.get("WTTR_CACHE_DIR") orelse default_cache_dir}); + }, }; } @@ -32,6 +58,8 @@ pub const Config = struct { allocator.free(self.cache_dir); allocator.free(self.geolite_path); if (self.geocache_file) |f| allocator.free(f); + if (self.ip2location_api_key) |k| allocator.free(k); + allocator.free(self.ip2location_cache_file); } }; @@ -43,6 +71,5 @@ test "config loads defaults" { try std.testing.expectEqualStrings("0.0.0.0", cfg.listen_host); try std.testing.expectEqual(@as(u16, 8002), cfg.listen_port); try std.testing.expectEqual(@as(usize, 10_000), cfg.cache_size); - try std.testing.expectEqualStrings("./GeoLite2-City.mmdb", cfg.geolite_path); try std.testing.expect(cfg.geocache_file == null); } diff --git a/src/location/GeoIp.zig b/src/location/GeoIp.zig index 2d853f7..9e78aa6 100644 --- a/src/location/GeoIp.zig +++ b/src/location/GeoIp.zig @@ -1,5 +1,6 @@ const std = @import("std"); const Coordinates = @import("../Coordinates.zig"); +const Ip2location = @import("Ip2location.zig"); const c = @cImport({ @cInclude("maxminddb.h"); @@ -8,8 +9,11 @@ const c = @cImport({ const GeoIP = @This(); mmdb: c.MMDB_s, +ip2location_client: ?*Ip2location, +ip2location_cache: ?*Ip2location.Cache, +allocator: std.mem.Allocator, -pub fn init(db_path: []const u8) !GeoIP { +pub fn init(allocator: std.mem.Allocator, db_path: []const u8, api_key: ?[]const u8, cache_path: ?[]const u8) !GeoIP { const path_z = try std.heap.c_allocator.dupeZ(u8, db_path); defer std.heap.c_allocator.free(path_z); @@ -19,19 +23,58 @@ pub fn init(db_path: []const u8) !GeoIP { if (status != c.MMDB_SUCCESS) return error.CannotOpenDatabase; - return GeoIP{ .mmdb = mmdb }; + var client: ?*Ip2location = null; + var cache: ?*Ip2location.Cache = null; + + if (api_key) |key| { + client = try allocator.create(Ip2location); + client.?.* = try Ip2location.init(allocator, key); + + if (cache_path) |path| { + cache = try allocator.create(Ip2location.Cache); + cache.?.* = try Ip2location.Cache.init(allocator, path); + std.log.info("IP2Location fallback: enabled (cache: {s})", .{path}); + } else { + std.log.info("IP2Location fallback: enabled (no cache)", .{}); + } + } else { + std.log.info("IP2Location fallback: disabled (no API key configured)", .{}); + } + + return GeoIP{ + .mmdb = mmdb, + .ip2location_client = client, + .ip2location_cache = cache, + .allocator = allocator, + }; } pub fn deinit(self: *GeoIP) void { c.MMDB_close(&self.mmdb); + if (self.ip2location_client) |client| { + client.deinit(); + self.allocator.destroy(client); + } + if (self.ip2location_cache) |cache| { + cache.deinit(); + self.allocator.destroy(cache); + } } pub fn lookup(self: *GeoIP, ip: []const u8) !?Coordinates { + // Try MaxMind first const result = lookupInternal(&self.mmdb, ip) catch return null; - if (!result.found_entry) return null; + if (result.found_entry) { + return try self.extractCoordinates(result); + } - return try self.extractCoordinates(result); + // Fallback to IP2Location if configured + if (self.ip2location_client) |client| { + return client.lookupWithCache(ip, self.ip2location_cache); + } + + return null; } fn lookupInternal(mmdb: *c.MMDB_s, ip: []const u8) !c.MMDB_lookup_result_s { @@ -90,7 +133,7 @@ test "MMDB functions are callable" { } test "GeoIP init with invalid path fails" { - const result = GeoIP.init("/nonexistent/path.mmdb"); + const result = GeoIP.init(std.testing.allocator, "/nonexistent/path.mmdb", null, null); try std.testing.expectError(error.CannotOpenDatabase, result); } @@ -103,7 +146,7 @@ test "isUSIP detects US IPs" { try GeoLite2.ensureDatabase(std.testing.allocator, db_path); } - var geoip = GeoIP.init(db_path) catch { + var geoip = GeoIP.init(std.testing.allocator, db_path, null, null) catch { return error.SkipZigTest; }; defer geoip.deinit(); diff --git a/src/location/Ip2location.zig b/src/location/Ip2location.zig new file mode 100644 index 0000000..54bd002 --- /dev/null +++ b/src/location/Ip2location.zig @@ -0,0 +1,194 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Coordinates = @import("../Coordinates.zig"); + +const Self = @This(); + +const log = std.log.scoped(.ip2location); + +allocator: Allocator, +api_key: []const u8, +http_client: std.http.Client, + +pub fn init(allocator: Allocator, api_key: []const u8) !Self { + return .{ + .allocator = allocator, + .api_key = try allocator.dupe(u8, api_key), + .http_client = std.http.Client{ .allocator = allocator }, + }; +} + +pub fn deinit(self: *Self) void { + self.http_client.deinit(); + self.allocator.free(self.api_key); +} + +pub fn lookupWithCache(self: *Self, ip_str: []const u8, cache: ?*Cache) !?Coordinates { + // Parse IP to u128 for cache lookup + const addr = std.net.Address.parseIp(ip_str, 0) catch return null; + const ip_u128: u128 = switch (addr.any.family) { + std.posix.AF.INET => @as(u128, @intCast(std.mem.readInt(u32, @ptrCast(&addr.in.sa.addr), .big))), + std.posix.AF.INET6 => std.mem.readInt(u128, @ptrCast(&addr.in6.sa.addr), .big), + else => return null, + }; + const family: u8 = if (addr.any.family == std.posix.AF.INET) 4 else 6; + + // Check cache first + if (cache) |c| { + if (c.get(ip_u128)) |coords| { + return coords; + } + } + + // Fetch from API + const coords = self.lookup(ip_str) catch |err| { + log.err("API lookup failed: {}", .{err}); + return null; + }; + + // Store in cache + if (cache) |c| { + c.put(ip_u128, family, coords) catch |err| { + log.warn("Failed to cache result: {}", .{err}); + }; + } + + return coords; +} + +pub fn lookup(self: *Self, ip_str: []const u8) !Coordinates { + log.info("Fetching geolocation for IP {s}", .{ip_str}); + + // Build URL: https://api.ip2location.io/?key=XXX&ip=1.2.3.4 + const url = try std.fmt.allocPrint( + self.allocator, + "https://api.ip2location.io/?key={s}&ip={s}", + .{ self.api_key, ip_str }, + ); + defer self.allocator.free(url); + + const uri = try std.Uri.parse(url); + + var response_buf: [4096]u8 = undefined; + var writer = std.io.Writer.fixed(&response_buf); + const result = try self.http_client.fetch(.{ + .location = .{ .uri = uri }, + .method = .GET, + .response_writer = &writer, + }); + + if (result.status != .ok) { + log.err("API returned status {}", .{result.status}); + return error.ApiError; + } + + const response_body = response_buf[0..writer.end]; + + // Parse JSON response + const parsed = try std.json.parseFromSlice( + std.json.Value, + self.allocator, + response_body, + .{}, + ); + defer parsed.deinit(); + + const obj = parsed.value.object; + const lat = obj.get("latitude") orelse return error.MissingLatitude; + const lon = obj.get("longitude") orelse return error.MissingLongitude; + + return Coordinates{ + .latitude = @floatCast(lat.float), + .longitude = @floatCast(lon.float), + }; +} +const CacheEntry = packed struct { + family: u8, // 4 or 6 + _pad0: u8 = 0, + _pad1: u8 = 0, + _pad2: u8 = 0, + _pad3: u8 = 0, + _pad4: u8 = 0, + _pad5: u8 = 0, + _pad6: u8 = 0, + ip: u128, // 16 bytes (IPv4 in lower 32 bits) + lat: f32, // 4 bytes + lon: f32, // 4 bytes + // Total: 32 bytes per record +}; + +pub const Cache = struct { + allocator: Allocator, + path: []const u8, + entries: std.AutoHashMap(u128, Coordinates), + file: ?std.fs.File, + + pub fn init(allocator: Allocator, path: []const u8) !Cache { + var cache = Cache{ + .allocator = allocator, + .path = try allocator.dupe(u8, path), + .entries = std.AutoHashMap(u128, Coordinates).init(allocator), + .file = null, + }; + + // Try to open existing cache file + if (std.fs.openFileAbsolute(path, .{ .mode = .read_write })) |file| { + cache.file = file; + try cache.load(); + } else |err| switch (err) { + error.FileNotFound => { + // Create new cache file + const dir = std.fs.path.dirname(path) orelse return error.InvalidPath; + try std.fs.cwd().makePath(dir); + cache.file = try std.fs.createFileAbsolute(path, .{ .read = true }); + }, + else => return err, + } + + return cache; + } + + pub fn deinit(self: *Cache) void { + if (self.file) |f| f.close(); + self.entries.deinit(); + self.allocator.free(self.path); + } + + fn load(self: *Cache) !void { + const file = self.file orelse return; + const file_size = try file.getEndPos(); + if (file_size == 0) return; + + const bytes = try file.readToEndAlloc(self.allocator, file_size); + defer self.allocator.free(bytes); + + const entries = std.mem.bytesAsSlice(CacheEntry, bytes); + for (entries) |entry| { + try self.entries.put(entry.ip, .{ + .latitude = entry.lat, + .longitude = entry.lon, + }); + } + } + + pub fn get(self: *Cache, ip: u128) ?Coordinates { + return self.entries.get(ip); + } + + pub fn put(self: *Cache, ip: u128, family: u8, coords: Coordinates) !void { + // Add to in-memory map + try self.entries.put(ip, coords); + + // Append to file + if (self.file) |file| { + const entry = CacheEntry{ + .family = family, + .ip = ip, + .lat = @floatCast(coords.latitude), + .lon = @floatCast(coords.longitude), + }; + try file.seekFromEnd(0); + try file.writeAll(std.mem.asBytes(&entry)); + } + } +}; diff --git a/src/main.zig b/src/main.zig index 74e40cb..d9d088b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -31,8 +31,13 @@ pub fn main() !void { // Ensure GeoLite2 database exists try GeoLite2.ensureDatabase(allocator, cfg.geolite_path); - // Initialize GeoIP database - var geoip = GeoIp.init(cfg.geolite_path) catch |err| { + // Initialize GeoIP database with optional IP2Location fallback + var geoip = GeoIp.init( + allocator, + cfg.geolite_path, + cfg.ip2location_api_key, + if (cfg.ip2location_api_key != null) cfg.ip2location_cache_file else null, + ) catch |err| { std.log.err("Failed to load GeoIP database from {s}: {}", .{ cfg.geolite_path, err }); return err; };