From bb93f41b85b3ae9bd1087fdcd1b27a18f9e84109 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Fri, 11 Feb 2022 09:22:32 -0800 Subject: [PATCH] add xml_shaper (see below) This will use the Vulkan xml parser to parse data, then massage that into a passed type. It uses code patterned off the standard library Json parser to do the work. The final, commented test is exposing a compiler bug that needs to be reduced and sent to the zig team. Initial reports from IRC indicate that the team is not focusing on compiler bugs until stage 2 is done (hopefully May 2022) --- src/xml_shaper.zig | 507 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 507 insertions(+) create mode 100644 src/xml_shaper.zig diff --git a/src/xml_shaper.zig b/src/xml_shaper.zig new file mode 100644 index 0000000..30d76d4 --- /dev/null +++ b/src/xml_shaper.zig @@ -0,0 +1,507 @@ +const std = @import("std"); +const xml = @import("xml.zig"); + +fn Parsed(comptime T: type) type { + return struct { + allocator: std.mem.Allocator, + parsed_value: T, + + const Self = @This(); + + pub fn init(allocator: std.mem.Allocator, parsedObj: T) Self { + return .{ + .allocator = allocator, + .parsed_value = parsedObj, + }; + } + + pub fn deinit(self: Self) void { + deinitObject(self.allocator, self.parsed_value); + } + + fn deinitObject(allocator: std.mem.Allocator, obj: anytype) void { + switch (@typeInfo(@TypeOf(obj))) { + .Optional => if (obj) |o| deinitObject(allocator, o), + .Union => |union_info| { + inline for (union_info.fields) |field| { + std.debug.print("{s}", field); // need to find active field and deinit it + } + }, + .Struct => |struct_info| { + inline for (struct_info.fields) |field| { + deinitObject(allocator, @field(obj, field.name)); + } + }, + .Array => {}, // Not implemented below + .Pointer => |ptr_info| { + switch (ptr_info.size) { + .One => { + deinitObject(allocator, obj.*); + allocator.free(obj); + }, + .Many => {}, + .C => {}, + .Slice => { + allocator.free(obj); + }, + } + }, + //.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum, .Opaque => {}, // no allocations here + else => {}, + } + } + }; +} + +pub fn Parser(comptime T: type) type { + return struct { + ParseType: type = T, + ReturnType: type = Parsed(T), + + const Self = @This(); + + pub fn parse(source: []const u8, options: ParseOptions) !Parsed(T) { + if (options.allocator == null) + return error.AllocatorRequired; // we are only leaving it be null for compatibility with json + const allocator = options.allocator.?; + const parse_allocator = std.heap.ArenaAllocator.init(allocator); + const parsed = try xml.parse(allocator, source); + defer parsed.deinit(); + defer parse_allocator.deinit(); + return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options)); + } + }; +} +// should we just use json parse options? +pub const ParseOptions = struct { + allocator: ?std.mem.Allocator = null, + match_predicate: ?fn (a: []const u8, b: []const u8, options: xml.PredicateOptions) anyerror!bool = null, +}; + +pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parsed(T) { + if (options.allocator == null) + return error.AllocatorRequired; // we are only leaving it be null for compatibility with json + const allocator = options.allocator.?; + var parse_allocator = std.heap.ArenaAllocator.init(allocator); + const parsed = try xml.parse(parse_allocator.allocator(), source); + // defer parsed.deinit(); // Let the arena allocator whack it all + defer parse_allocator.deinit(); + return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options)); +} + +fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T { + switch (@typeInfo(T)) { + .Bool => { + if (std.ascii.eqlIgnoreCase("true", element.children.items[0].CharData)) + return true; + if (std.ascii.eqlIgnoreCase("false", element.children.items[0].CharData)) + return false; + return error.UnexpectedToken; + }, + .Float, .ComptimeFloat => { + return try std.fmt.parseFloat(T, element.children.items[0].CharData); + }, + .Int, .ComptimeInt => { + return try std.fmt.parseInt(T, element.children.items[0].CharData, 10); + }, + .Optional => |optional_info| { + if (element.children.items.len == 0) { + // This is almost certainly incomplete. Empty strings? xsi:nil? + return null; + } else { + // return try parseInternal(optional_info.child, element.elements().next().?, options); + return try parseInternal(optional_info.child, element, options); + } + }, + .Enum => |enum_info| { + _ = enum_info; + // const numeric: ?enum_info.tag_type = std.fmt.parseInt(enum_info.tag_type, element.children.items[0].CharData, 10) catch null; + // if (numeric) |num| { + // return std.meta.intToEnum(T, num); + // } else { + // // json parser handles escaping - could this happen here or does chardata handle? + // return std.meta.stringToEnum(T, element.CharData); + // } + }, + .Union => |union_info| { + if (union_info.tag_type) |_| { + // try each of the union fields until we find one that matches + // inline for (union_info.fields) |u_field| { + // // take a copy of tokens so we can withhold mutations until success + // var tokens_copy = tokens.*; + // if (parseInternal(u_field.field_type, token, &tokens_copy, options)) |value| { + // tokens.* = tokens_copy; + // return @unionInit(T, u_field.name, value); + // } else |err| { + // // Bubble up error.OutOfMemory + // // Parsing some types won't have OutOfMemory in their + // // error-sets, for the condition to be valid, merge it in. + // if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err; + // // Bubble up AllocatorRequired, as it indicates missing option + // if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err; + // // otherwise continue through the `inline for` + // } + // } + return error.NoUnionMembersMatched; + } else { + @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); + } + }, + .Struct => |struct_info| { + var r: T = undefined; + var fields_seen = [_]bool{false} ** struct_info.fields.len; + var fields_set: u64 = 0; + // errdefer { + // // TODO: why so high here? This was needed for ec2 describe instances + // @setEvalBranchQuota(100000); + // inline for (struct_info.fields) |field, i| { + // if (fields_seen[i] and !field.is_comptime) { + // parseFree(field.field_type, @field(r, field.name), options); + // } + // } + // } + + // XML parser provides CharData for whitespace around elements. + // We shall ignore extra data for the moment as a performance thing + // if (element.children.items.len > struct_info.fields.len) { + // std.debug.print("element children: {d}, struct fields: {d}\n", .{ element.children.items.len, struct_info.fields.len }); + // for (element.children.items) |child, i| { + // switch (child) { + // .CharData => std.debug.print("{d}: {s}\n", .{ i, child }), + // .Comment => {}, + // .Element => {}, + // } + // } + // return error.MoreElementsThanFields; + // } + + inline for (struct_info.fields) |field, i| { + var name = field.name; + if (comptime std.meta.trait.hasFn("fieldNameFor")(T)) + name = r.fieldNameFor(field.name); + std.log.debug("Field name: {s}, Element: {s}, Adjusted field name: {s}\n", .{ field.name, element.tag, name }); + var iterator = element.findChildrenByTag(name); + if (options.match_predicate) |predicate| { + iterator.predicate = predicate; + iterator.predicate_options = .{ .allocator = options.allocator.? }; + } + if (try iterator.next()) |child| { + // I don't know that we would use comptime here. I'm also + // not sure the nuance of setting this... + // if (field.is_comptime) { + // if (!try parsesTo(field.field_type, field.default_value.?, tokens, options)) { + // return error.UnexpectedValue; + // } + // } else { + @field(r, field.name) = try parseInternal(field.field_type, child, options); + fields_seen[i] = true; + fields_set = fields_set + 1; + // } + + } else { + return error.NoValueForField; + } + } + if (fields_set != struct_info.fields.len) + return error.FieldElementMismatch; // see fields_seen for details + return r; + }, + .Array => //|array_info| { + return error.ArrayNotImplemented, + // switch (token) { + // .ArrayBegin => { + // var r: T = undefined; + // var i: usize = 0; + // errdefer { + // while (true) : (i -= 1) { + // parseFree(arrayInfo.child, r[i], options); + // if (i == 0) break; + // } + // } + // while (i < r.len) : (i += 1) { + // r[i] = try parse(arrayInfo.child, tokens, options); + // } + // const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; + // switch (tok) { + // .ArrayEnd => {}, + // else => return error.UnexpectedToken, + // } + // return r; + // }, + // .String => |stringToken| { + // if (arrayInfo.child != u8) return error.UnexpectedToken; + // var r: T = undefined; + // const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); + // switch (stringToken.escapes) { + // .None => mem.copy(u8, &r, source_slice), + // .Some => try unescapeValidString(&r, source_slice), + // } + // return r; + // }, + // else => return error.UnexpectedToken, + // } + // }, + .Pointer => |ptr_info| { + const allocator = options.allocator orelse return error.AllocatorRequired; + switch (ptr_info.size) { + .One => { + const r: T = try allocator.create(ptr_info.child); + errdefer allocator.free(r); + r.* = try parseInternal(ptr_info.child, element, options); + return r; + }, + .Slice => { + // TODO: Detect and deal with arrays. This will require two + // passes through the element children - one to + // determine if it is an array, one to parse the elements + // + // foo + // bar + // + if (ptr_info.child != u8) { + std.log.debug("ptr_info.child == {s}", .{@typeName(ptr_info.child)}); + const children = try allocator.alloc(ptr_info.child, element.children.items.len); + var inx: usize = 0; + while (inx < children.len) { + children[inx] = try parseInternal(ptr_info.child, element.children.items[inx].Element, options); + inx += 1; + } + return children; + } + return try allocator.dupe(u8, element.children.items[0].CharData); + }, + .Many => { + return error.ManyPointerSizeNotImplemented; + }, + .C => { + return error.CPointerSizeNotImplemented; + }, + } + }, + else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), + // } + // }, + // else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), + } + unreachable; +} +pub fn fuzzyEqual(a: []const u8, b: []const u8, options: xml.PredicateOptions) !bool { + const allocator = options.allocator orelse return error.AllocatorRequired; + // std.debug.print("raw: a = '{s}', b = '{s}'\n", .{ a, b }); + const lower_a = try std.ascii.allocLowerString(allocator, a); + defer allocator.free(lower_a); + const lower_b = try std.ascii.allocLowerString(allocator, b); + defer allocator.free(lower_b); + // std.debug.print("lower: a = '{s}', b = '{s}'\n", .{ lower_a, lower_b }); + const normal_a = normalize(lower_a); + const normal_b = normalize(lower_b); + + // std.debug.print("normal: a = '{s}', b = '{s}'\n", .{ normal_a, normal_b }); + return std.mem.eql(u8, normal_a, normal_b); +} + +fn normalize(val: []u8) []u8 { + var underscores: u64 = 0; + for (val) |ch, i| { + if (ch == '_') { + underscores = underscores + 1; + } else { + val[i - underscores] = ch; + } + } + return val[0 .. val.len - underscores]; +} + +const testing = std.testing; +test "can parse a simple type" { + const allocator = std.testing.allocator; + // defer allocator.free(snake_case); + const data = + \\ + \\ + \\ bar + \\ + ; + const Example = struct { + foo_bar: []const u8, + }; + // std.debug.print("{s}", .{data}); + const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual }); + defer parsed_data.deinit(); + try testing.expectEqualStrings("bar", parsed_data.parsed_value.foo_bar); +} + +test "can parse a boolean type" { + const allocator = std.testing.allocator; + // defer allocator.free(snake_case); + const data = + \\ + \\ + \\ true + \\ + ; + const Example = struct { + foo_bar: bool, + }; + // std.debug.print("{s}", .{data}); + const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual }); + defer parsed_data.deinit(); + try testing.expectEqual(true, parsed_data.parsed_value.foo_bar); +} +test "can parse an integer type" { + const allocator = std.testing.allocator; + // defer allocator.free(snake_case); + const data = + \\ + \\ + \\ 42 + \\ + ; + const Example = struct { + foo_bar: u8, + }; + // std.debug.print("{s}", .{data}); + const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual }); + defer parsed_data.deinit(); + try testing.expectEqual(@as(u8, 42), parsed_data.parsed_value.foo_bar); +} +test "can parse a boolean type" { + const allocator = std.testing.allocator; + // defer allocator.free(snake_case); + const data = + \\ + \\ + \\ true + \\ + ; + const Example = struct { + foo_bar: bool, + }; + // std.debug.print("{s}", .{data}); + const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual }); + defer parsed_data.deinit(); + try testing.expectEqual(true, parsed_data.parsed_value.foo_bar); +} +test "can parse an optional boolean type" { + const allocator = std.testing.allocator; + const data = + \\ + \\ + \\ true + \\ + ; + const Example = struct { + foo_bar: ?bool = null, + }; + const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual }); + defer parsed_data.deinit(); + try testing.expectEqual(@as(?bool, true), parsed_data.parsed_value.foo_bar); +} +test "can parse a nested type" { + const allocator = std.testing.allocator; + const data = + \\ + \\ + \\ + \\ baz + \\ + \\ + ; + const Example = struct { + foo: struct { + bar: []const u8, + }, + }; + const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual }); + defer parsed_data.deinit(); + try testing.expectEqualStrings("baz", parsed_data.parsed_value.foo.bar); +} + +const service_metadata: struct { + version: []const u8 = "2016-11-15", + sdk_id: []const u8 = "EC2", + arn_namespace: []const u8 = "ec2", + endpoint_prefix: []const u8 = "ec2", + sigv4_name: []const u8 = "ec2", + name: []const u8 = "AmazonEC2", +} = .{}; + +const describe_regions: struct { + action_name: []const u8 = "DescribeRegions", + Request: type = struct { + // filters: ?[]Filter = null, + region_names: ?[][]const u8 = null, + dry_run: ?bool = null, + all_regions: ?bool = null, + + pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { + const mappings = .{ + .filters = "Filter", + .region_names = "RegionName", + .dry_run = "dryRun", + .all_regions = "AllRegions", + }; + return @field(mappings, field_name); + } + + pub fn metaInfo() struct { service_metadata: @TypeOf(service_metadata), action: @TypeOf(describe_regions) } { + return .{ .service_metadata = service_metadata, .action = describe_regions }; + } + }, + Response: type = struct { + regions: ?[]struct { + endpoint: ?[]const u8 = null, + region_name: ?[]const u8 = null, + opt_in_status: ?[]const u8 = null, + + pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { + const mappings = .{ + .endpoint = "regionEndpoint", + .region_name = "regionName", + .opt_in_status = "optInStatus", + }; + return @field(mappings, field_name); + } + } = null, + + pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 { + const mappings = .{ + .regions = "regionInfo", + }; + return @field(mappings, field_name); + } + }, +} = .{}; + +// This test results in "broken LLVM module found: Operand is null" +// br i1 %120, label %ErrRetReturn12, , !dbg !10637 +// +// This is a bug in the Zig compiler. +// test "can parse something serious" { +// std.testing.log_level = .debug; +// std.log.debug("", .{}); +// +// const allocator = std.testing.allocator; +// const data = +// \\ +// \\ +// \\ 8d6bfc99-978b-4146-ba23-2e5fe5b65406 +// \\ +// \\ +// \\ eu-north-1 +// \\ ec2.eu-north-1.amazonaws.com +// \\ opt-in-not-required +// \\ +// \\ +// \\ ap-south-1 +// \\ ec2.ap-south-1.amazonaws.com +// \\ opt-in-not-required +// \\ +// \\ +// \\ +// ; +// const parsed_data = try parse(describe_regions.Response, data, .{ .allocator = allocator }); +// defer parsed_data.deinit(); +// try testing.expect(parsed_data.parsed_value.regions != null); +// // try testing.expectEqualStrings("eu-north-1", parsed_data.parsed_value.regions.?[0].region_name.?); +// }