Compare commits

..

7 Commits

Author SHA1 Message Date
8727a4e038
EC2 support
All checks were successful
continuous-integration/drone/push Build is passing
2022-02-16 14:14:54 -08:00
714e7278fd
ensure logging statements are using scoped log 2022-02-16 14:04:06 -08:00
b2c915f400
dealloc on error (using a blunt instrument) and iso8601 support 2022-02-16 14:01:29 -08:00
b44ea5c3e8
add iso8601 string to timestamp 2022-02-16 13:47:19 -08:00
4b26bc884f
basic ISO8601 parsing 2022-02-16 13:45:41 -08:00
fc8a73a9c4
handle times before 1970 2022-02-15 17:28:27 -08:00
49f3f48aa8
conversion from DateTime structure to timestamp (partial)
This only handles dates after 1970. The code
is getting ugly and I think it is the wrong approach.
Will clean up in the next commit
2022-02-15 17:03:42 -08:00
6 changed files with 500 additions and 109 deletions

View File

@ -2,15 +2,13 @@
[![Build Status](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/status.svg?ref=refs/heads/master)](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/)
This SDK currently supports all AWS services except EC2 and S3. These two
services only support XML, and more work is needed to parse and integrate
type hydration from the base parsing. S3 also requires some plumbing tweaks
in the signature calculation. Examples of usage are in src/main.zig.
This SDK currently supports all AWS services except services using the restXml
protocol (4 services including S3). See TODO list below.
Current executable size for the demo is 953k (90k of which is the AWS PEM file)
after compiling with -Drelease-safe and
Current executable size for the demo is 1.6M (90k of which is the AWS PEM file,
and approximately 600K for XML services) after compiling with -Drelease-safe and
[stripping the executable after compilation](https://github.com/ziglang/zig/issues/351).
This is for x86_linux. Tested targets:
This is for x86_linux, and will vary based on services used. Tested targets:
* x86_64-linux
* riscv64-linux
@ -41,8 +39,7 @@ require passing in a client option to specify an different TLS root certificate
(pass null to disable certificate verification).
The [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) exists
for posterity, and supports x86_64 linux. This branch is recommended moving
forward.
for posterity, and supports x86_64 linux. The old branch is deprecated.
## Limitations
@ -52,13 +49,8 @@ implemented.
TODO List:
* Complete integration of Xml responses with remaining code base
* Implement [AWS restXml protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-restxml-protocol.html).
Includes S3. Total service count 4. This may be blocked due to the same issue as EC2.
* Implement [AWS EC2 query protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-ec2-query-protocol.html).
Includes EC2. Total service count 1. This may be blocked on a compiler bug,
though has not been tested with zig 0.9.0. More details and llvm ir log can be found in the
[XML branch](https://git.lerch.org/lobo/aws-sdk-for-zig/src/branch/xml).
Includes S3. Total service count 4.
* Implement sigv4a signing
* Implement jitter/exponential backoff
* Implement timeouts and other TODO's in the code

View File

@ -5,7 +5,7 @@ const json = @import("json.zig");
const url = @import("url.zig");
const case = @import("case.zig");
const servicemodel = @import("servicemodel.zig");
// const xml_shaper = @import("xml_shaper.zig");
const xml_shaper = @import("xml_shaper.zig");
const log = std.log.scoped(.aws);
@ -175,8 +175,6 @@ pub fn Request(comptime action: anytype) type {
// handle lists and maps properly anyway yet, so we'll go for it and see
// where it breaks. PRs and/or failing test cases appreciated.
fn callQuery(request: ActionRequest, options: Options) !FullResponseType {
if (Self.service_meta.aws_protocol == .ec2_query)
@compileError("XML responses from EC2 blocked due to zig compiler bug scheduled to be fixed no earlier than 0.10.0");
var buffer = std.ArrayList(u8).init(options.client.allocator);
defer buffer.deinit();
const writer = buffer.writer();
@ -250,10 +248,9 @@ pub fn Request(comptime action: anytype) type {
}
}
// TODO: Handle XML
if (!isJson) return error.XmlUnimplemented;
if (!isJson) return try xmlReturn(options, response);
const SResponse = if (Self.service_meta.aws_protocol != .query and Self.service_meta.aws_protocol != .ec2_query)
const SResponse = if (Self.service_meta.aws_protocol != .query)
action.Response
else
ServerResponse(action);
@ -272,7 +269,7 @@ pub fn Request(comptime action: anytype) type {
.response_metadata = .{
.request_id = try requestIdFromHeaders(aws_request, response, options),
},
.parser_options = parser_options,
.parser_options = .{ .json = parser_options },
.raw_parsed = .{ .raw = .{} },
};
@ -294,13 +291,25 @@ pub fn Request(comptime action: anytype) type {
return e;
};
if (Self.service_meta.aws_protocol != .query and Self.service_meta.aws_protocol != .ec2_query) {
// TODO: Figure out this hack
// the code setting the response about 10 lines down will trigger
// an error because the first field may not be a struct when
// XML processing is happening above, which we only know at runtime.
//
// We could simply force .ec2_query and .rest_xml above rather than
// isJson, but it would be nice to automatically support json if
// these services start returning that like we'd like them to.
//
// Otherwise, the compiler gets down here thinking this will be
// processed. If it is, then we have a problem when the field name
// may not be a struct.
if (Self.service_meta.aws_protocol != .query or Self.service_meta.aws_protocol == .ec2_query) {
return FullResponseType{
.response = parsed_response,
.response_metadata = .{
.request_id = try requestIdFromHeaders(aws_request, response, options),
},
.parser_options = parser_options,
.parser_options = .{ .json = parser_options },
.raw_parsed = .{ .raw = parsed_response },
};
}
@ -320,10 +329,53 @@ pub fn Request(comptime action: anytype) type {
.response_metadata = .{
.request_id = try options.client.allocator.dupe(u8, real_response.ResponseMetadata.RequestId),
},
.parser_options = parser_options,
.parser_options = .{ .json = parser_options },
.raw_parsed = .{ .server = parsed_response },
};
}
fn xmlReturn(options: Options, result: awshttp.HttpResult) !FullResponseType {
// Server shape be all like:
//
// <?xml version="1.0" encoding="UTF-8"?>
// <DescribeRegionsResponse xmlns="http://ec2.amazonaws.com/doc/2016-11-15/">
// <requestId>0efe31c6-cad5-4882-b275-dfea478cf039</requestId>
// <regionInfo>
// <item>
// <regionName>eu-north-1</regionName>
// <regionEndpoint>ec2.eu-north-1.amazonaws.com</regionEndpoint>
// <optInStatus>opt-in-not-required</optInStatus>
// </item>
// </regionInfo>
// </DescribeRegionsResponse>
//
// While our stuff be like:
//
// struct {
// regions: []struct {
// region_name: []const u8,
// }
// }
//
// Big thing is that requestid, which we'll need to fetch "manually"
const xml_options = xml_shaper.ParseOptions{ .allocator = options.client.allocator };
const parsed = try xml_shaper.parse(action.Response, result.body, xml_options);
// This needs to get into FullResponseType somehow: defer parsed.deinit();
const request_id = blk: {
if (parsed.document.root.getCharData("requestId")) |elem|
break :blk elem;
return error.RequestIdNotFound;
};
return FullResponseType{
.response = parsed.parsed_value,
.response_metadata = .{
.request_id = try options.client.allocator.dupe(u8, request_id),
},
.parser_options = .{ .xml = xml_options },
.raw_parsed = .{ .xml = parsed },
};
}
};
}
@ -397,21 +449,32 @@ fn FullResponse(comptime action: anytype) type {
response_metadata: struct {
request_id: []u8,
},
parser_options: json.ParseOptions,
parser_options: union(enum) {
json: json.ParseOptions,
xml: xml_shaper.ParseOptions,
},
raw_parsed: union(enum) {
server: ServerResponse(action),
raw: action.Response,
xml: xml_shaper.Parsed(action.Response),
},
// raw_parsed: ServerResponse(request),
const Self = @This();
pub fn deinit(self: Self) void {
switch (self.raw_parsed) {
.server => json.parseFree(ServerResponse(action), self.raw_parsed.server, self.parser_options),
.raw => json.parseFree(action.Response, self.raw_parsed.raw, self.parser_options),
// Server is json only (so far)
.server => json.parseFree(ServerResponse(action), self.raw_parsed.server, self.parser_options.json),
// Raw is json only (so far)
.raw => json.parseFree(action.Response, self.raw_parsed.raw, self.parser_options.json),
.xml => |xml| xml.deinit(),
}
self.parser_options.allocator.?.free(self.response_metadata.request_id);
var allocator: std.mem.Allocator = undefined;
switch (self.parser_options) {
.json => |j| allocator = j.allocator.?,
.xml => |x| allocator = x.allocator.?,
}
allocator.free(self.response_metadata.request_id);
}
};
}

View File

@ -4,14 +4,17 @@
const std = @import("std");
const log = std.log.scoped(.date);
pub const DateTime = struct { day: u8, month: u8, year: u16, hour: u8, minute: u8, second: u8 };
const SECONDS_PER_DAY = 86400; //* 24* 60 * 60 */
const DAYS_PER_YEAR = 365; //* Normal year (no leap year) */
pub fn timestampToDateTime(timestamp: i64) DateTime {
// aus https://de.wikipedia.org/wiki/Unixzeit
const unixtime = @intCast(u64, timestamp);
const SECONDS_PER_DAY = 86400; //* 24* 60 * 60 */
const DAYS_PER_YEAR = 365; //* Normal year (no leap year) */
const DAYS_IN_4_YEARS = 1461; //* 4*365 + 1 */
const DAYS_IN_100_YEARS = 36524; //* 100*365 + 25 - 1 */
const DAYS_IN_400_YEARS = 146097; //* 400*365 + 100 - 4 + 1 */
@ -54,8 +57,239 @@ pub fn timestampToDateTime(timestamp: i64) DateTime {
return DateTime{ .day = day, .month = month, .year = year, .hour = hours, .minute = minutes, .second = seconds };
}
pub fn parseIso8601ToTimestamp(data: []const u8) !i64 {
return try dateTimeToTimestamp(try parseIso8601ToDateTime(data));
}
const IsoParsingState = enum { Start, Year, Month, Day, Hour, Minute, Second, Millisecond, End };
/// Converts a string to a timestamp value. May not handle dates before the
/// epoch
pub fn parseIso8601ToDateTime(data: []const u8) !DateTime {
// Basic format YYYYMMDDThhmmss
if (data.len == "YYYYMMDDThhmmss".len and data[8] == 'T')
return try parseIso8601BasicFormatToDateTime(data);
var start: usize = 0;
var state = IsoParsingState.Start;
// Anything not explicitly set by our string would be 0
var rc = DateTime{ .year = 0, .month = 0, .day = 0, .hour = 0, .minute = 0, .second = 0 };
var zulu_time = false;
for (data) |ch, i| {
_ = i;
switch (ch) {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => {
if (state == .Start) state = .Year;
},
'?', '~', '%' => {
// These characters all specify the type of time (approximate, etc)
// and we will ignore
},
'.', '-', ':', 'T' => {
// State transition
// We're going to coerce and this might not go well, but we
// want the compiler to create checks, so we'll turn on
// runtime safety for this block, forcing checks in ReleaseSafe
// ReleaseFast modes.
const next_state = try endIsoState(state, &rc, data[start..i]);
state = next_state;
start = i + 1;
},
'Z' => zulu_time = true,
else => {
log.err("Invalid character: {c}", .{ch});
return error.InvalidCharacter;
},
}
}
if (!zulu_time) return error.LocalTimeNotSupported;
// We know we have a Z at the end of this, so let's grab the last bit
// of the string, minus the 'Z', and fly, eagles, fly!
_ = try endIsoState(state, &rc, data[start .. data.len - 1]);
return rc;
}
fn parseIso8601BasicFormatToDateTime(data: []const u8) !DateTime {
return DateTime{
.year = try std.fmt.parseUnsigned(u16, data[0..4], 10),
.month = try std.fmt.parseUnsigned(u8, data[4..6], 10),
.day = try std.fmt.parseUnsigned(u8, data[6..8], 10),
.hour = try std.fmt.parseUnsigned(u8, data[9..11], 10),
.minute = try std.fmt.parseUnsigned(u8, data[11..13], 10),
.second = try std.fmt.parseUnsigned(u8, data[13..15], 10),
};
}
fn endIsoState(current_state: IsoParsingState, date: *DateTime, prev_data: []const u8) !IsoParsingState {
var next_state: IsoParsingState = undefined;
log.debug("endIsoState. Current state '{s}', data: {s}", .{ current_state, prev_data });
// Using two switches is slightly less efficient, but more readable
switch (current_state) {
.Start, .End => return error.IllegalStateTransition,
.Year => next_state = .Month,
.Month => next_state = .Day,
.Day => next_state = .Hour,
.Hour => next_state = .Minute,
.Minute => next_state = .Second,
.Second => next_state = .Millisecond,
.Millisecond => next_state = .End,
}
// TODO: This won't handle signed, which Iso supports. For now, let's fail
// explictly
switch (current_state) {
.Year => date.year = try std.fmt.parseUnsigned(u16, prev_data, 10),
.Month => date.month = try std.fmt.parseUnsigned(u8, prev_data, 10),
.Day => date.day = try std.fmt.parseUnsigned(u8, prev_data, 10),
.Hour => date.hour = try std.fmt.parseUnsigned(u8, prev_data, 10),
.Minute => date.minute = try std.fmt.parseUnsigned(u8, prev_data, 10),
.Second => date.second = try std.fmt.parseUnsigned(u8, prev_data, 10),
.Millisecond => {}, // We'll throw that away - our granularity is 1 second
.Start, .End => return error.InvalidState,
}
return next_state;
}
fn dateTimeToTimestamp(datetime: DateTime) !i64 {
const epoch = DateTime{
.year = 1970,
.month = 1,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
};
return secondsBetween(epoch, datetime);
}
const DateTimeToTimestampError = error{
DateTimeOutOfRange,
};
fn secondsBetween(start: DateTime, end: DateTime) DateTimeToTimestampError!i64 {
try validateDatetime(start);
try validateDatetime(end);
if (end.year < start.year) return -1 * try secondsBetween(end, start);
if (start.month != 1 or
start.day != 1 or
start.hour != 0 or
start.minute != 0 or
start.second != 0)
{
const seconds_into_start_year = secondsFromBeginningOfYear(
start.year,
start.month,
start.day,
start.hour,
start.minute,
start.second,
);
const new_start = DateTime{
.year = start.year,
.month = 1,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
};
return (try secondsBetween(new_start, end)) - seconds_into_start_year;
}
const leap_years_between = leapYearsBetween(start.year, end.year);
var add_days: u1 = 0;
const years_diff = end.year - start.year;
log.debug("Years from epoch: {d}, Leap years: {d}", .{ years_diff, leap_years_between });
var days_diff: i32 = (years_diff * DAYS_PER_YEAR) + leap_years_between + add_days;
log.debug("Days with leap year, without month: {d}", .{days_diff});
const seconds_into_year = secondsFromBeginningOfYear(
end.year,
end.month,
end.day,
end.hour,
end.minute,
end.second,
);
return (days_diff * SECONDS_PER_DAY) + @as(i64, seconds_into_year);
}
fn validateDatetime(dt: DateTime) !void {
if (dt.month > 12 or
dt.day > 31 or
dt.hour >= 24 or
dt.minute >= 60 or
dt.second >= 60) return error.DateTimeOutOfRange;
}
fn secondsFromBeginningOfYear(year: u16, month: u8, day: u8, hour: u8, minute: u8, second: u8) u32 {
const current_year_is_leap_year = isLeapYear(year);
const leap_year_days_per_month: [12]u5 = .{ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
const normal_days_per_month: [12]u5 = .{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
const days_per_month = if (current_year_is_leap_year) leap_year_days_per_month else normal_days_per_month;
var current_month: usize = 1;
var end_month = month;
var days_diff: u32 = 0;
while (current_month != end_month) {
days_diff += days_per_month[current_month - 1]; // months are 1-based vs array is 0-based
current_month += 1;
}
log.debug("Days with month, without day: {d}. Day of month {d}, will add {d} days", .{
days_diff,
day,
day - 1,
});
// We need -1 because we're not actually including the ending day (that's up to hour/minute)
// In other words, days in the month are 1-based, while hours/minutes are zero based
days_diff += day - 1;
log.debug("Total days diff: {d}", .{days_diff});
var seconds_diff: u32 = days_diff * SECONDS_PER_DAY;
// From here out, we want to get everything into seconds
seconds_diff += @as(u32, hour) * 60 * 60;
seconds_diff += @as(u32, minute) * 60;
seconds_diff += @as(u32, second);
return seconds_diff;
}
fn isLeapYear(year: u16) bool {
if (year % 4 != 0) return false;
if (year % 400 == 0) return true;
if (year % 100 == 0) return false;
return true;
}
fn leapYearsBetween(start_year_inclusive: u16, end_year_exclusive: u16) u16 {
const start = std.math.min(start_year_inclusive, end_year_exclusive);
const end = std.math.max(start_year_inclusive, end_year_exclusive);
var current = start;
log.debug("Leap years starting from {d}, ending at {d}", .{ start, end });
while (current % 4 != 0 and current < end) {
current += 1;
}
if (current == end) return 0; // No leap years here. E.g. 1971-1973
// We're on a potential leap year, and now we can step by 4
var rc: u16 = 0;
while (current < end) {
if (current % 4 == 0) {
if (current % 100 != 0) {
log.debug("Year {d} is leap year", .{current});
rc += 1;
current += 4;
continue;
}
// We're on a century, which is normally not a leap year, unless
// it's divisible by 400
if (current % 400 == 0) {
log.debug("Year {d} is leap year", .{current});
rc += 1;
}
}
current += 4;
}
return rc;
}
fn printDateTime(dt: DateTime) void {
std.log.debug("{:0>4}-{:0>2}-{:0>2}T{:0>2}:{:0>2}:{:0<2}Z", .{
log.debug("{:0>4}-{:0>2}-{:0>2}T{:0>2}:{:0>2}:{:0<2}Z", .{
dt.year,
dt.month,
dt.day,
@ -69,9 +303,7 @@ pub fn printNowUtc() void {
printDateTime(timestampToDateTime(std.time.timestamp()));
}
test "GMT and localtime" {
std.testing.log_level = .debug;
std.log.debug("\n", .{});
test "Convert timestamp to datetime" {
printDateTime(timestampToDateTime(std.time.timestamp()));
try std.testing.expectEqual(DateTime{ .year = 2020, .month = 8, .day = 28, .hour = 9, .minute = 32, .second = 27 }, timestampToDateTime(1598607147));
@ -79,3 +311,19 @@ test "GMT and localtime" {
// Get time for date: https://wtools.io/convert-date-time-to-unix-time
try std.testing.expectEqual(DateTime{ .year = 2015, .month = 08, .day = 30, .hour = 12, .minute = 36, .second = 00 }, timestampToDateTime(1440938160));
}
test "Convert datetime to timestamp" {
try std.testing.expectEqual(@as(i64, 1598607147), try dateTimeToTimestamp(DateTime{ .year = 2020, .month = 8, .day = 28, .hour = 9, .minute = 32, .second = 27 }));
try std.testing.expectEqual(@as(i64, 1604207167), try dateTimeToTimestamp(DateTime{ .year = 2020, .month = 11, .day = 1, .hour = 5, .minute = 6, .second = 7 }));
try std.testing.expectEqual(@as(i64, 1440938160), try dateTimeToTimestamp(DateTime{ .year = 2015, .month = 08, .day = 30, .hour = 12, .minute = 36, .second = 00 }));
}
test "Convert ISO8601 string to timestamp" {
try std.testing.expectEqual(DateTime{ .year = 2020, .month = 8, .day = 28, .hour = 9, .minute = 32, .second = 27 }, try parseIso8601ToDateTime("20200828T093227"));
try std.testing.expectEqual(DateTime{ .year = 2020, .month = 8, .day = 28, .hour = 9, .minute = 32, .second = 27 }, try parseIso8601ToDateTime("2020-08-28T9:32:27Z"));
try std.testing.expectEqual(DateTime{ .year = 2020, .month = 11, .day = 1, .hour = 5, .minute = 6, .second = 7 }, try parseIso8601ToDateTime("2020-11-01T5:06:7Z"));
try std.testing.expectEqual(DateTime{ .year = 2015, .month = 08, .day = 30, .hour = 12, .minute = 36, .second = 00 }, try parseIso8601ToDateTime("2015-08-30T12:36:00.000Z"));
}
test "Convert datetime to timestamp before 1970" {
try std.testing.expectEqual(@as(i64, -449392815), try dateTimeToTimestamp(DateTime{ .year = 1955, .month = 10, .day = 05, .hour = 16, .minute = 39, .second = 45 }));
}

View File

@ -14,6 +14,15 @@ pub fn log(
// Ignore aws_signing messages
if (verbose < 2 and scope == .aws_signing and @enumToInt(level) >= @enumToInt(std.log.Level.debug))
return;
// Ignore aws_credentials messages
if (verbose < 2 and scope == .aws_credentials and @enumToInt(level) >= @enumToInt(std.log.Level.debug))
return;
// Ignore xml_shaper messages
if (verbose < 2 and scope == .xml_shaper and @enumToInt(level) >= @enumToInt(std.log.Level.debug))
return;
// Ignore date messages
if (verbose < 2 and scope == .date and @enumToInt(level) >= @enumToInt(std.log.Level.debug))
return;
// Ignore awshttp messages
if (verbose < 1 and scope == .awshttp and @enumToInt(level) >= @enumToInt(std.log.Level.debug))
return;
@ -169,18 +178,17 @@ pub fn main() anyerror!void {
std.log.err("no functions to work with", .{});
}
},
// TODO: This test fails with broken LLVM module
.ec2_query_no_input => {
std.log.err("EC2 Test disabled due to compiler bug", .{});
// Describe regions is a simpler request and easier to debug
// const instances = try client.call(services.ec2.describe_regions.Request{}, options);
// defer instances.deinit();
// std.log.info("region count: {d}", .{instances.response.regions.?.len});
const result = try client.call(services.ec2.describe_regions.Request{}, options);
defer result.deinit();
std.log.info("request id: {s}", .{result.response_metadata.request_id});
std.log.info("region count: {d}", .{result.response.regions.?.len});
// Describe instances is more interesting
// const instances = try client.call(services.ec2.describe_instances.Request{}, options);
// defer instances.deinit();
// std.log.info("reservation count: {d}", .{instances.response.reservations.len});
const instances = try client.call(services.ec2.describe_instances.Request{}, options);
defer instances.deinit();
std.log.info("reservation count: {d}", .{instances.response.reservations.?.len});
},
}
std.log.info("===== End Test: {s} =====\n", .{@tagName(t)});

View File

@ -45,7 +45,7 @@ pub const Element = struct {
}
pub fn getCharData(self: *Element, child_tag: []const u8) ?[]const u8 {
const child = self.findChildByTag(child_tag) orelse return null;
const child = (self.findChildByTag(child_tag) catch return null) orelse return null;
if (child.children.items.len != 1) {
return null;
}

View File

@ -1,26 +1,37 @@
const std = @import("std");
const xml = @import("xml.zig");
const date = @import("date.zig");
const log = std.log.scoped(.xml_shaper);
fn Parsed(comptime T: type) type {
pub fn Parsed(comptime T: type) type {
return struct {
allocator: std.mem.Allocator,
// Forcing an arean allocator isn't my favorite choice here, but
// is the simplest way to handle deallocation in the event of
// an error
allocator: std.heap.ArenaAllocator,
parsed_value: T,
document: xml.Document,
const Self = @This();
pub fn init(allocator: std.mem.Allocator, parsedObj: T) Self {
pub fn init(allocator: std.heap.ArenaAllocator, parsedObj: T, document: xml.Document) Self {
return .{
.allocator = allocator,
.parsed_value = parsedObj,
.document = document,
};
}
pub fn deinit(self: Self) void {
deinitObject(self.allocator, self.parsed_value);
self.allocator.deinit();
// deinitObject(self.allocator, self.parsed_value);
// self.document.deinit();
}
};
}
// This is dead code and can be removed with the move to ArenaAllocator
fn deinitObject(allocator: std.mem.Allocator, obj: anytype) void {
switch (@typeInfo(@TypeOf(obj))) {
.Optional => if (obj) |o| deinitObject(allocator, o),
@ -54,28 +65,7 @@ fn Parsed(comptime T: type) type {
else => {},
}
}
};
}
pub fn Parser(comptime T: type) type {
return struct {
ParseType: type = T,
ReturnType: type = Parsed(T),
const Self = @This();
pub fn parse(source: []const u8, options: ParseOptions) !Parsed(T) {
if (options.allocator == null)
return error.AllocatorRequired; // we are only leaving it be null for compatibility with json
const allocator = options.allocator.?;
const parse_allocator = std.heap.ArenaAllocator.init(allocator);
const parsed = try xml.parse(allocator, source);
defer parsed.deinit();
defer parse_allocator.deinit();
return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options));
}
};
}
// should we just use json parse options?
pub const ParseOptions = struct {
allocator: ?std.mem.Allocator = null,
@ -86,11 +76,17 @@ pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parse
if (options.allocator == null)
return error.AllocatorRequired; // we are only leaving it be null for compatibility with json
const allocator = options.allocator.?;
var parse_allocator = std.heap.ArenaAllocator.init(allocator);
const parsed = try xml.parse(parse_allocator.allocator(), source);
// defer parsed.deinit(); // Let the arena allocator whack it all
defer parse_allocator.deinit();
return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options));
var arena_allocator = std.heap.ArenaAllocator.init(allocator);
const aa = arena_allocator.allocator();
errdefer arena_allocator.deinit();
const parsed = try xml.parse(aa, source);
errdefer parsed.deinit();
const opts = ParseOptions{
.allocator = aa,
.match_predicate = options.match_predicate,
};
return Parsed(T).init(arena_allocator, try parseInternal(T, parsed.root, opts), parsed);
}
fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T {
@ -103,10 +99,47 @@ fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions)
return error.UnexpectedToken;
},
.Float, .ComptimeFloat => {
return try std.fmt.parseFloat(T, element.children.items[0].CharData);
return std.fmt.parseFloat(T, element.children.items[0].CharData) catch |e| {
if (log_parse_traces) {
std.log.err(
"Could not parse '{s}' as float in element '{s}': {s}",
.{
element.children.items[0].CharData,
element.tag,
e,
},
);
if (@errorReturnTrace()) |trace| {
std.debug.dumpStackTrace(trace.*);
}
}
return e;
};
},
.Int, .ComptimeInt => {
return try std.fmt.parseInt(T, element.children.items[0].CharData, 10);
// 2021-10-05T16:39:45.000Z
return std.fmt.parseInt(T, element.children.items[0].CharData, 10) catch |e| {
if (element.children.items[0].CharData[element.children.items[0].CharData.len - 1] == 'Z') {
// We have an iso8601 in an integer field (we think)
// Try to coerce this into our type
const timestamp = try date.parseIso8601ToTimestamp(element.children.items[0].CharData);
return try std.math.cast(T, timestamp);
}
if (log_parse_traces) {
std.log.err(
"Could not parse '{s}' as integer in element '{s}': {s}",
.{
element.children.items[0].CharData,
element.tag,
e,
},
);
if (@errorReturnTrace()) |trace| {
std.debug.dumpStackTrace(trace.*);
}
}
return e;
};
},
.Optional => |optional_info| {
if (element.children.items.len == 0) {
@ -200,13 +233,23 @@ fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions)
// }
// } else {
log.debug("Found child element {s}", .{child.tag});
// TODO: how do we errdefer this?
@field(r, field.name) = try parseInternal(field.field_type, child, options);
fields_seen[i] = true;
fields_set = fields_set + 1;
found_value = true;
}
if (@typeInfo(field.field_type) == .Optional and !found_value) {
// @compileLog("Optional: Field name ", field.name, ", type ", field.field_type);
@field(r, field.name) = null;
fields_set = fields_set + 1;
found_value = true;
}
// Using this else clause breaks zig, so we'll use a boolean instead
if (!found_value) return error.NoValueForField;
if (!found_value) {
log.err("Could not find a value for field {s}. Looking for {s} in element {s}", .{ field.name, name, element.tag });
return error.NoValueForField;
}
// } else {
// return error.NoValueForField;
// }
@ -416,6 +459,21 @@ test "can parse an optional boolean type" {
try testing.expectEqual(@as(?bool, true), parsed_data.parsed_value.foo_bar);
}
test "can coerce 8601 date to integer" {
const allocator = std.testing.allocator;
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <fooBar>2021-10-05T16:39:45.000Z</fooBar>
\\</Example>
;
const ExampleDoesNotMatter = struct {
foo_bar: ?i64 = null,
};
const parsed_data = try parse(ExampleDoesNotMatter, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
defer parsed_data.deinit();
try testing.expectEqual(@as(i64, 1633451985), parsed_data.parsed_value.foo_bar.?);
}
// This is the simplest test so far that breaks zig
test "can parse a boolean type (two fields)" {
const allocator = std.testing.allocator;
@ -434,6 +492,28 @@ test "can parse a boolean type (two fields)" {
defer parsed_data.deinit();
try testing.expectEqual(@as(bool, true), parsed_data.parsed_value.foo_bar);
}
var log_parse_traces = true;
test "can error without leaking memory" {
const allocator = std.testing.allocator;
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <fooBar>true</fooBar>
\\ <fooBaz>12.345</fooBaz>
\\</Example>
;
const ExampleDoesNotMatter = struct {
foo_bar: bool,
foo_baz: u64,
};
log_parse_traces = false;
defer log_parse_traces = true;
try std.testing.expectError(
error.InvalidCharacter,
parse(ExampleDoesNotMatter, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual }),
);
}
test "can parse a nested type" {
const allocator = std.testing.allocator;
const data =