From 4b26bc884f2c02409f1e4203c167cc9f6c93c879 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Wed, 16 Feb 2022 13:45:41 -0800 Subject: [PATCH] basic ISO8601 parsing --- src/date.zig | 115 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 90 insertions(+), 25 deletions(-) diff --git a/src/date.zig b/src/date.zig index f964513..479a809 100644 --- a/src/date.zig +++ b/src/date.zig @@ -4,6 +4,8 @@ const std = @import("std"); +const log = std.log.scoped(.date); + pub const DateTime = struct { day: u8, month: u8, year: u16, hour: u8, minute: u8, second: u8 }; const SECONDS_PER_DAY = 86400; //* 24* 60 * 60 */ @@ -55,33 +57,95 @@ pub fn timestampToDateTime(timestamp: i64) DateTime { return DateTime{ .day = day, .month = month, .year = year, .hour = hours, .minute = minutes, .second = seconds }; } +const IsoParsingState = enum { Start, Year, Month, Day, Hour, Minute, Second, Millisecond, End }; /// Converts a string to a timestamp value. May not handle dates before the /// epoch -pub fn parseIso8601Timestamp(data: []const u8) !i64 { - _ = data; - return error.NotImplemented; +pub fn parseIso8601ToDateTime(data: []const u8) !DateTime { + // Basic format YYYYMMDDThhmmss + if (data.len == "YYYYMMDDThhmmss".len and data[8] == 'T') + return try parseIso8601BasicFormatToDateTime(data); - // TODO: Use a parsing for loop with a state machine implementation - // to tell us where we are in the string - // if (data.len < 4) return error.NotEnoughData; - // var year = try std.fmt.parseInt(u8, data[0..4], 10); - // - // var month:u4 = 0; - // if (data.len > 5) { - // if (data[5] != '-') return error.InvalidCharacter; - // var next_dash = std.mem.indexOf(u8, data[6..], "-"); - // if (next_dash == null) - // next_dash = data.len - 6; - // month = std.fmt.parseInt(u8, data[6..next_dash + 6], 10); - // } - // var day:u5 = 0; - // var hours: u5 = 0; - // var minutes: u6 = 0; - // var seconds: u6 = 0; - // var milliseconds: u9 = 0; - // ISO 8601 is complicated. We're going + var start: usize = 0; + var state = IsoParsingState.Start; + // Anything not explicitly set by our string would be 0 + var rc = DateTime{ .year = 0, .month = 0, .day = 0, .hour = 0, .minute = 0, .second = 0 }; + var zulu_time = false; + for (data) |ch, i| { + _ = i; + switch (ch) { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => { + if (state == .Start) state = .Year; + }, + '?', '~', '%' => { + // These characters all specify the type of time (approximate, etc) + // and we will ignore + }, + '.', '-', ':', 'T' => { + // State transition + + // We're going to coerce and this might not go well, but we + // want the compiler to create checks, so we'll turn on + // runtime safety for this block, forcing checks in ReleaseSafe + // ReleaseFast modes. + const next_state = try endIsoState(state, &rc, data[start..i]); + state = next_state; + start = i + 1; + }, + 'Z' => zulu_time = true, + else => { + std.log.err("Invalid character: {c}", .{ch}); + return error.InvalidCharacter; + }, + } + } + if (!zulu_time) return error.LocalTimeNotSupported; + // We know we have a Z at the end of this, so let's grab the last bit + // of the string, minus the 'Z', and fly, eagles, fly! + _ = try endIsoState(state, &rc, data[start .. data.len - 1]); + return rc; } +fn parseIso8601BasicFormatToDateTime(data: []const u8) !DateTime { + return DateTime{ + .year = try std.fmt.parseUnsigned(u16, data[0..4], 10), + .month = try std.fmt.parseUnsigned(u8, data[4..6], 10), + .day = try std.fmt.parseUnsigned(u8, data[6..8], 10), + .hour = try std.fmt.parseUnsigned(u8, data[9..11], 10), + .minute = try std.fmt.parseUnsigned(u8, data[11..13], 10), + .second = try std.fmt.parseUnsigned(u8, data[13..15], 10), + }; +} + +fn endIsoState(current_state: IsoParsingState, date: *DateTime, prev_data: []const u8) !IsoParsingState { + var next_state: IsoParsingState = undefined; + log.debug("endIsoState. Current state '{s}', data: {s}", .{ current_state, prev_data }); + + // Using two switches is slightly less efficient, but more readable + switch (current_state) { + .Start, .End => return error.IllegalStateTransition, + .Year => next_state = .Month, + .Month => next_state = .Day, + .Day => next_state = .Hour, + .Hour => next_state = .Minute, + .Minute => next_state = .Second, + .Second => next_state = .Millisecond, + .Millisecond => next_state = .End, + } + + // TODO: This won't handle signed, which Iso supports. For now, let's fail + // explictly + switch (current_state) { + .Year => date.year = try std.fmt.parseUnsigned(u16, prev_data, 10), + .Month => date.month = try std.fmt.parseUnsigned(u8, prev_data, 10), + .Day => date.day = try std.fmt.parseUnsigned(u8, prev_data, 10), + .Hour => date.hour = try std.fmt.parseUnsigned(u8, prev_data, 10), + .Minute => date.minute = try std.fmt.parseUnsigned(u8, prev_data, 10), + .Second => date.second = try std.fmt.parseUnsigned(u8, prev_data, 10), + .Millisecond => {}, // We'll throw that away - our granularity is 1 second + .Start, .End => return error.InvalidState, + } + return next_state; +} fn dateTimeToTimestamp(datetime: DateTime) !i64 { const epoch = DateTime{ .year = 1970, @@ -251,9 +315,10 @@ test "Convert datetime to timestamp" { } test "Convert ISO8601 string to timestamp" { - try std.testing.expectEqual(@as(i64, 1598607147), try dateTimeToTimestamp(DateTime{ .year = 2020, .month = 8, .day = 28, .hour = 9, .minute = 32, .second = 27 })); - try std.testing.expectEqual(@as(i64, 1604207167), try dateTimeToTimestamp(DateTime{ .year = 2020, .month = 11, .day = 1, .hour = 5, .minute = 6, .second = 7 })); - try std.testing.expectEqual(@as(i64, 1440938160), try dateTimeToTimestamp(DateTime{ .year = 2015, .month = 08, .day = 30, .hour = 12, .minute = 36, .second = 00 })); + try std.testing.expectEqual(DateTime{ .year = 2020, .month = 8, .day = 28, .hour = 9, .minute = 32, .second = 27 }, try parseIso8601ToDateTime("20200828T093227")); + try std.testing.expectEqual(DateTime{ .year = 2020, .month = 8, .day = 28, .hour = 9, .minute = 32, .second = 27 }, try parseIso8601ToDateTime("2020-08-28T9:32:27Z")); + try std.testing.expectEqual(DateTime{ .year = 2020, .month = 11, .day = 1, .hour = 5, .minute = 6, .second = 7 }, try parseIso8601ToDateTime("2020-11-01T5:06:7Z")); + try std.testing.expectEqual(DateTime{ .year = 2015, .month = 08, .day = 30, .hour = 12, .minute = 36, .second = 00 }, try parseIso8601ToDateTime("2015-08-30T12:36:00.000Z")); } test "Convert datetime to timestamp before 1970" { try std.testing.expectEqual(@as(i64, -449392815), try dateTimeToTimestamp(DateTime{ .year = 1955, .month = 10, .day = 05, .hour = 16, .minute = 39, .second = 45 }));