const std = @import("std"); const xml = @import("xml.zig"); const date = @import("date.zig"); const log = std.log.scoped(.xml_shaper); pub fn Parsed(comptime T: type) type { return struct { // Forcing an arean allocator isn't my favorite choice here, but // is the simplest way to handle deallocation in the event of // an error allocator: std.heap.ArenaAllocator, parsed_value: T, document: xml.Document, const Self = @This(); pub fn init(allocator: std.heap.ArenaAllocator, parsedObj: T, document: xml.Document) Self { return .{ .allocator = allocator, .parsed_value = parsedObj, .document = document, }; } pub fn deinit(self: Self) void { self.allocator.deinit(); // deinitObject(self.allocator, self.parsed_value); // self.document.deinit(); } }; } // This is dead code and can be removed with the move to ArenaAllocator fn deinitObject(allocator: std.mem.Allocator, obj: anytype) void { switch (@typeInfo(@TypeOf(obj))) { .Optional => if (obj) |o| deinitObject(allocator, o), .Union => |union_info| { inline for (union_info.fields) |field| { std.debug.print("{s}", field); // need to find active field and deinit it } }, .Struct => |struct_info| { inline for (struct_info.fields) |field| { deinitObject(allocator, @field(obj, field.name)); } }, .Array => {}, // Not implemented below .Pointer => |ptr_info| { switch (ptr_info.size) { .One => { deinitObject(allocator, obj.*); allocator.free(obj); }, .Many => {}, .C => {}, .Slice => { for (obj) |child| deinitObject(allocator, child); allocator.free(obj); }, } }, //.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum, .Opaque => {}, // no allocations here else => {}, } } // should we just use json parse options? pub const ParseOptions = struct { allocator: ?std.mem.Allocator = null, match_predicate_ptr: ?*const fn (a: []const u8, b: []const u8, options: xml.PredicateOptions) anyerror!bool = null, }; pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parsed(T) { if (options.allocator == null) return error.AllocatorRequired; // we are only leaving it be null for compatibility with json const allocator = options.allocator.?; var arena_allocator = std.heap.ArenaAllocator.init(allocator); const aa = arena_allocator.allocator(); errdefer arena_allocator.deinit(); const parsed = try xml.parse(aa, source); errdefer parsed.deinit(); const opts = ParseOptions{ .allocator = aa, .match_predicate_ptr = options.match_predicate_ptr, }; return Parsed(T).init(arena_allocator, try parseInternal(T, parsed.root, opts), parsed); } fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T { switch (@typeInfo(T)) { .Bool => { if (std.ascii.eqlIgnoreCase("true", element.children.items[0].CharData)) return true; if (std.ascii.eqlIgnoreCase("false", element.children.items[0].CharData)) return false; return error.UnexpectedToken; }, .Float, .ComptimeFloat => { return std.fmt.parseFloat(T, element.children.items[0].CharData) catch |e| { if (log_parse_traces) { std.log.err( "Could not parse '{s}' as float in element '{s}': {any}", .{ element.children.items[0].CharData, element.tag, e, }, ); if (@errorReturnTrace()) |trace| { std.debug.dumpStackTrace(trace.*); } } return e; }; }, .Int, .ComptimeInt => { // 2021-10-05T16:39:45.000Z return std.fmt.parseInt(T, element.children.items[0].CharData, 10) catch |e| { if (element.children.items[0].CharData[element.children.items[0].CharData.len - 1] == 'Z') { // We have an iso8601 in an integer field (we think) // Try to coerce this into our type const timestamp = try date.parseIso8601ToTimestamp(element.children.items[0].CharData); return std.math.cast(T, timestamp).?; } if (log_parse_traces) { std.log.err( "Could not parse '{s}' as integer in element '{s}': {any}", .{ element.children.items[0].CharData, element.tag, e, }, ); if (@errorReturnTrace()) |trace| { std.debug.dumpStackTrace(trace.*); } } return e; }; }, .Optional => |optional_info| { if (element.children.items.len == 0) { // This is almost certainly incomplete. Empty strings? xsi:nil? return null; } if (element.children.items.len > 0) { // return try parseInternal(optional_info.child, element.elements().next().?, options); return try parseInternal(optional_info.child, element, options); } }, .Enum => |enum_info| { _ = enum_info; // const numeric: ?enum_info.tag_type = std.fmt.parseInt(enum_info.tag_type, element.children.items[0].CharData, 10) catch null; // if (numeric) |num| { // return std.meta.intToEnum(T, num); // } else { // // json parser handles escaping - could this happen here or does chardata handle? // return std.meta.stringToEnum(T, element.CharData); // } }, .Union => |union_info| { if (union_info.tag_type) |_| { // try each of the union fields until we find one that matches // inline for (union_info.fields) |u_field| { // // take a copy of tokens so we can withhold mutations until success // var tokens_copy = tokens.*; // if (parseInternal(u_field.type, token, &tokens_copy, options)) |value| { // tokens.* = tokens_copy; // return @unionInit(T, u_field.name, value); // } else |err| { // // Bubble up error.OutOfMemory // // Parsing some types won't have OutOfMemory in their // // error-sets, for the condition to be valid, merge it in. // if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err; // // Bubble up AllocatorRequired, as it indicates missing option // if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err; // // otherwise continue through the `inline for` // } // } return error.NoUnionMembersMatched; } @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); }, .Struct => |struct_info| { var r: T = undefined; var fields_seen = [_]bool{false} ** struct_info.fields.len; var fields_set: u64 = 0; // errdefer { // // TODO: why so high here? This was needed for ec2 describe instances // @setEvalBranchQuota(100000); // inline for (struct_info.fields) |field, i| { // if (fields_seen[i] and !field.is_comptime) { // parseFree(field.type, @field(r, field.name), options); // } // } // } // XML parser provides CharData for whitespace around elements. // We shall ignore extra data for the moment as a performance thing // if (element.children.items.len > struct_info.fields.len) { // std.debug.print("element children: {d}, struct fields: {d}\n", .{ element.children.items.len, struct_info.fields.len }); // for (element.children.items) |child, i| { // switch (child) { // .CharData => std.debug.print("{d}: {s}\n", .{ i, child }), // .Comment => {}, // .Element => {}, // } // } // return error.MoreElementsThanFields; // } log.debug("Processing fields in struct: {s}", .{@typeName(T)}); inline for (struct_info.fields, 0..) |field, i| { var name = field.name; var found_value = false; if (comptime std.meta.trait.hasFn("fieldNameFor")(T)) name = r.fieldNameFor(field.name); log.debug("Field name: {s}, Element: {s}, Adjusted field name: {s}", .{ field.name, element.tag, name }); var iterator = element.findChildrenByTag(name); if (options.match_predicate_ptr) |predicate_ptr| { iterator.predicate = predicate_ptr; iterator.predicate_options = .{ .allocator = options.allocator.? }; } if (try iterator.next()) |child| { // I don't know that we would use comptime here. I'm also // not sure the nuance of setting this... // if (field.is_comptime) { // if (!try parsesTo(field.type, field.default_value.?, tokens, options)) { // return error.UnexpectedValue; // } // } else { log.debug("Found child element {s}", .{child.tag}); // TODO: how do we errdefer this? @field(r, field.name) = try parseInternal(field.type, child, options); fields_seen[i] = true; fields_set = fields_set + 1; found_value = true; } if (@typeInfo(field.type) == .Optional) { // Test "compiler assertion failure 2" // Zig compiler bug circa 0.9.0. Using "and !found_value" // in the if statement above will trigger assertion failure if (!found_value) { // @compileLog("Optional: Field name ", field.name, ", type ", field.type); @field(r, field.name) = null; fields_set = fields_set + 1; found_value = true; } } // Using this else clause breaks zig, so we'll use a boolean instead if (!found_value) { log.err("Could not find a value for field {s}. Looking for {s} in element {s}", .{ field.name, name, element.tag }); return error.NoValueForField; } // } else { // return error.NoValueForField; // } } if (fields_set != struct_info.fields.len) return error.FieldElementMismatch; // see fields_seen for details return r; }, .Array => //|array_info| { return error.ArrayNotImplemented, // switch (token) { // .ArrayBegin => { // var r: T = undefined; // var i: usize = 0; // errdefer { // while (true) : (i -= 1) { // parseFree(arrayInfo.child, r[i], options); // if (i == 0) break; // } // } // while (i < r.len) : (i += 1) { // r[i] = try parse(arrayInfo.child, tokens, options); // } // const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; // switch (tok) { // .ArrayEnd => {}, // else => return error.UnexpectedToken, // } // return r; // }, // .String => |stringToken| { // if (arrayInfo.child != u8) return error.UnexpectedToken; // var r: T = undefined; // const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); // switch (stringToken.escapes) { // .None => mem.copy(u8, &r, source_slice), // .Some => try unescapeValidString(&r, source_slice), // } // return r; // }, // else => return error.UnexpectedToken, // } // }, .Pointer => |ptr_info| { const allocator = options.allocator orelse return error.AllocatorRequired; switch (ptr_info.size) { .One => { const r: T = try allocator.create(ptr_info.child); errdefer allocator.free(r); r.* = try parseInternal(ptr_info.child, element, options); return r; }, .Slice => { // TODO: Detect and deal with arrays. This will require two // passes through the element children - one to // determine if it is an array, one to parse the elements // // foo // bar // if (ptr_info.child != u8) { log.debug("type = {s}, ptr_info.child == {s}, element = {s}", .{ @typeName(T), @typeName(ptr_info.child), element.tag }); var iterator = element.elements(); var children = std.ArrayList(ptr_info.child).init(allocator); defer children.deinit(); while (iterator.next()) |child_element| { try children.append(try parseInternal(ptr_info.child, child_element, options)); } return children.toOwnedSlice(); // var inx: usize = 0; // while (inx < children.len) { // switch (element.children.items[inx]) { // .Element => children[inx] = try parseInternal(ptr_info.child, element.children.items[inx].Element, options), // .CharData => children[inx] = try allocator.dupe(u8, element.children.items[inx].CharData), // .Comment => children[inx] = try allocator.dupe(u8, element.children.items[inx].Comment), // This might be an error... // } // inx += 1; // } } return try allocator.dupe(u8, element.children.items[0].CharData); }, .Many => { return error.ManyPointerSizeNotImplemented; }, .C => { return error.CPointerSizeNotImplemented; }, } }, else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), // } // }, // else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), } unreachable; } pub fn fuzzyEqual(a: []const u8, b: []const u8, options: xml.PredicateOptions) !bool { const allocator = options.allocator orelse return error.AllocatorRequired; // std.debug.print("raw: a = '{s}', b = '{s}'\n", .{ a, b }); const lower_a = try std.ascii.allocLowerString(allocator, a); defer allocator.free(lower_a); const lower_b = try std.ascii.allocLowerString(allocator, b); defer allocator.free(lower_b); // std.debug.print("lower: a = '{s}', b = '{s}'\n", .{ lower_a, lower_b }); const normal_a = normalize(lower_a); const normal_b = normalize(lower_b); // std.debug.print("normal: a = '{s}', b = '{s}'\n", .{ normal_a, normal_b }); return std.mem.eql(u8, normal_a, normal_b); } fn normalize(val: []u8) []u8 { var underscores: u64 = 0; for (val, 0..) |ch, i| { if (ch == '_') { underscores = underscores + 1; } else { val[i - underscores] = ch; } } return val[0 .. val.len - underscores]; } const testing = std.testing; test "can parse a simple type" { const allocator = std.testing.allocator; // defer allocator.free(snake_case); const data = \\ \\ \\ bar \\ ; const Example = struct { foo_bar: []const u8, }; // std.debug.print("{s}", .{data}); const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqualStrings("bar", parsed_data.parsed_value.foo_bar); } test "can parse a boolean type" { const allocator = std.testing.allocator; // defer allocator.free(snake_case); const data = \\ \\ \\ true \\ ; const Example = struct { foo_bar: bool, }; // std.debug.print("{s}", .{data}); const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqual(true, parsed_data.parsed_value.foo_bar); } test "can parse an integer type" { const allocator = std.testing.allocator; // defer allocator.free(snake_case); const data = \\ \\ \\ 42 \\ ; const Example = struct { foo_bar: u8, }; // std.debug.print("{s}", .{data}); const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqual(@as(u8, 42), parsed_data.parsed_value.foo_bar); } test "can parse an optional boolean type" { const allocator = std.testing.allocator; const data = \\ \\ \\ true \\ ; const ExampleDoesNotMatter = struct { foo_bar: ?bool = null, }; const parsed_data = try parse(ExampleDoesNotMatter, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqual(@as(?bool, true), parsed_data.parsed_value.foo_bar); } test "can coerce 8601 date to integer" { const allocator = std.testing.allocator; const data = \\ \\ \\ 2021-10-05T16:39:45.000Z \\ ; const ExampleDoesNotMatter = struct { foo_bar: ?i64 = null, }; const parsed_data = try parse(ExampleDoesNotMatter, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqual(@as(i64, 1633451985), parsed_data.parsed_value.foo_bar.?); } // This is the simplest test so far that breaks zig (circa 0.9.0) // See "Using this else clause breaks zig, so we'll use a boolean instead" test "can parse a boolean type (two fields)" { const allocator = std.testing.allocator; const data = \\ \\ \\ true \\ true \\ ; const ExampleDoesNotMatter = struct { foo_bar: bool, foo_baz: bool, }; const parsed_data = try parse(ExampleDoesNotMatter, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqual(@as(bool, true), parsed_data.parsed_value.foo_bar); } var log_parse_traces = true; test "can error without leaking memory" { const allocator = std.testing.allocator; const data = \\ \\ \\ true \\ 12.345 \\ ; const ExampleDoesNotMatter = struct { foo_bar: bool, foo_baz: u64, }; log_parse_traces = false; defer log_parse_traces = true; try std.testing.expectError( error.InvalidCharacter, parse(ExampleDoesNotMatter, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }), ); } test "can parse a nested type" { const allocator = std.testing.allocator; const data = \\ \\ \\ \\ baz \\ \\ ; const Example = struct { foo: struct { bar: []const u8, }, }; const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqualStrings("baz", parsed_data.parsed_value.foo.bar); } test "can parse a nested type - two fields" { const allocator = std.testing.allocator; const data = \\ \\ \\ \\ baz \\ baz \\ \\ ; const Example = struct { foo: struct { bar: []const u8, qux: []const u8, }, }; const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate_ptr = fuzzyEqual }); defer parsed_data.deinit(); try testing.expectEqualStrings("baz", parsed_data.parsed_value.foo.bar); try testing.expectEqualStrings("baz", parsed_data.parsed_value.foo.qux); } const service_metadata: struct { version: []const u8 = "2016-11-15", sdk_id: []const u8 = "EC2", arn_namespace: []const u8 = "ec2", endpoint_prefix: []const u8 = "ec2", sigv4_name: []const u8 = "ec2", name: []const u8 = "AmazonEC2", } = .{}; const describe_regions: struct { action_name: []const u8 = "DescribeRegions", Request: type = struct { // filters: ?[]Filter = null, region_names: ?[][]const u8 = null, dry_run: ?bool = null, all_regions: ?bool = null, pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { const mappings = .{ .filters = "Filter", .region_names = "RegionName", .dry_run = "dryRun", .all_regions = "AllRegions", }; return @field(mappings, field_name); } pub fn metaInfo() struct { service_metadata: @TypeOf(service_metadata), action: @TypeOf(describe_regions) } { return .{ .service_metadata = service_metadata, .action = describe_regions }; } }, Response: type = struct { regions: ?[]struct { // Having two of these causes the zig compiler bug // Only one of them works fine. This leads me to believe that // it has something to do with the inline for endpoint: ?[]const u8 = null, region_name: ?[]const u8 = null, pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { const mappings = .{ .endpoint = "regionEndpoint", .region_name = "regionName", .opt_in_status = "optInStatus", }; return @field(mappings, field_name); } } = null, pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { const mappings = .{ .regions = "regionInfo", }; return @field(mappings, field_name); } }, } = .{}; test "can parse something serious" { // std.testing.log_level = .debug; log.debug("", .{}); const allocator = std.testing.allocator; const data = \\ \\ \\ 8d6bfc99-978b-4146-ba23-2e5fe5b65406 \\ \\ \\ eu-north-1 \\ ec2.eu-north-1.amazonaws.com \\ \\ \\ ap-south-1 \\ ec2.ap-south-1.amazonaws.com \\ \\ \\ ; // const ServerResponse = struct { DescribeRegionsResponse: describe_regions.Response, }; const parsed_data = try parse(describe_regions.Response, data, .{ .allocator = allocator }); defer parsed_data.deinit(); try testing.expect(parsed_data.parsed_value.regions != null); try testing.expectEqualStrings("eu-north-1", parsed_data.parsed_value.regions.?[0].region_name.?); try testing.expectEqualStrings("ec2.eu-north-1.amazonaws.com", parsed_data.parsed_value.regions.?[0].endpoint.?); } test "compiler assertion failure 2" { // std.testing.log_level = .debug; // log.debug("", .{}); // Actually, we only care here that the code compiles const allocator = std.testing.allocator; const Response: type = struct { key_group_list: ?struct { quantity: i64, // Making this optional will make the code compile items: ?[]struct { key_group: []const u8, } = null, pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { const mappings = .{ .quantity = "Quantity", .items = "Items", }; return @field(mappings, field_name); } } = null, pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { const mappings = .{ .key_group_list = "KeyGroupList", }; return @field(mappings, field_name); } }; const data = \\ \\ \\ \\ 42 \\ \\ ; const parsed_data = try parse(Response, data, .{ .allocator = allocator }); defer parsed_data.deinit(); try testing.expect(parsed_data.parsed_value.key_group_list.?.quantity == 42); }