add xml_shaper (see below)

This will use the Vulkan xml parser to parse data, then
massage that into a passed type. It uses code patterned
off the standard library Json parser to do the work.

The final, commented test is exposing a compiler bug that
needs to be reduced and sent to the zig team. Initial reports
from IRC indicate that the team is not focusing on compiler
bugs until stage 2 is done (hopefully May 2022)
This commit is contained in:
Emil Lerch 2022-02-11 09:22:32 -08:00
parent 240d2ebdd1
commit bb93f41b85
Signed by: lobo
GPG Key ID: A7B62D657EF764F8

507
src/xml_shaper.zig Normal file
View File

@ -0,0 +1,507 @@
const std = @import("std");
const xml = @import("xml.zig");
fn Parsed(comptime T: type) type {
return struct {
allocator: std.mem.Allocator,
parsed_value: T,
const Self = @This();
pub fn init(allocator: std.mem.Allocator, parsedObj: T) Self {
return .{
.allocator = allocator,
.parsed_value = parsedObj,
};
}
pub fn deinit(self: Self) void {
deinitObject(self.allocator, self.parsed_value);
}
fn deinitObject(allocator: std.mem.Allocator, obj: anytype) void {
switch (@typeInfo(@TypeOf(obj))) {
.Optional => if (obj) |o| deinitObject(allocator, o),
.Union => |union_info| {
inline for (union_info.fields) |field| {
std.debug.print("{s}", field); // need to find active field and deinit it
}
},
.Struct => |struct_info| {
inline for (struct_info.fields) |field| {
deinitObject(allocator, @field(obj, field.name));
}
},
.Array => {}, // Not implemented below
.Pointer => |ptr_info| {
switch (ptr_info.size) {
.One => {
deinitObject(allocator, obj.*);
allocator.free(obj);
},
.Many => {},
.C => {},
.Slice => {
allocator.free(obj);
},
}
},
//.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum, .Opaque => {}, // no allocations here
else => {},
}
}
};
}
pub fn Parser(comptime T: type) type {
return struct {
ParseType: type = T,
ReturnType: type = Parsed(T),
const Self = @This();
pub fn parse(source: []const u8, options: ParseOptions) !Parsed(T) {
if (options.allocator == null)
return error.AllocatorRequired; // we are only leaving it be null for compatibility with json
const allocator = options.allocator.?;
const parse_allocator = std.heap.ArenaAllocator.init(allocator);
const parsed = try xml.parse(allocator, source);
defer parsed.deinit();
defer parse_allocator.deinit();
return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options));
}
};
}
// should we just use json parse options?
pub const ParseOptions = struct {
allocator: ?std.mem.Allocator = null,
match_predicate: ?fn (a: []const u8, b: []const u8, options: xml.PredicateOptions) anyerror!bool = null,
};
pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parsed(T) {
if (options.allocator == null)
return error.AllocatorRequired; // we are only leaving it be null for compatibility with json
const allocator = options.allocator.?;
var parse_allocator = std.heap.ArenaAllocator.init(allocator);
const parsed = try xml.parse(parse_allocator.allocator(), source);
// defer parsed.deinit(); // Let the arena allocator whack it all
defer parse_allocator.deinit();
return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options));
}
fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T {
switch (@typeInfo(T)) {
.Bool => {
if (std.ascii.eqlIgnoreCase("true", element.children.items[0].CharData))
return true;
if (std.ascii.eqlIgnoreCase("false", element.children.items[0].CharData))
return false;
return error.UnexpectedToken;
},
.Float, .ComptimeFloat => {
return try std.fmt.parseFloat(T, element.children.items[0].CharData);
},
.Int, .ComptimeInt => {
return try std.fmt.parseInt(T, element.children.items[0].CharData, 10);
},
.Optional => |optional_info| {
if (element.children.items.len == 0) {
// This is almost certainly incomplete. Empty strings? xsi:nil?
return null;
} else {
// return try parseInternal(optional_info.child, element.elements().next().?, options);
return try parseInternal(optional_info.child, element, options);
}
},
.Enum => |enum_info| {
_ = enum_info;
// const numeric: ?enum_info.tag_type = std.fmt.parseInt(enum_info.tag_type, element.children.items[0].CharData, 10) catch null;
// if (numeric) |num| {
// return std.meta.intToEnum(T, num);
// } else {
// // json parser handles escaping - could this happen here or does chardata handle?
// return std.meta.stringToEnum(T, element.CharData);
// }
},
.Union => |union_info| {
if (union_info.tag_type) |_| {
// try each of the union fields until we find one that matches
// inline for (union_info.fields) |u_field| {
// // take a copy of tokens so we can withhold mutations until success
// var tokens_copy = tokens.*;
// if (parseInternal(u_field.field_type, token, &tokens_copy, options)) |value| {
// tokens.* = tokens_copy;
// return @unionInit(T, u_field.name, value);
// } else |err| {
// // Bubble up error.OutOfMemory
// // Parsing some types won't have OutOfMemory in their
// // error-sets, for the condition to be valid, merge it in.
// if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
// // Bubble up AllocatorRequired, as it indicates missing option
// if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err;
// // otherwise continue through the `inline for`
// }
// }
return error.NoUnionMembersMatched;
} else {
@compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
}
},
.Struct => |struct_info| {
var r: T = undefined;
var fields_seen = [_]bool{false} ** struct_info.fields.len;
var fields_set: u64 = 0;
// errdefer {
// // TODO: why so high here? This was needed for ec2 describe instances
// @setEvalBranchQuota(100000);
// inline for (struct_info.fields) |field, i| {
// if (fields_seen[i] and !field.is_comptime) {
// parseFree(field.field_type, @field(r, field.name), options);
// }
// }
// }
// XML parser provides CharData for whitespace around elements.
// We shall ignore extra data for the moment as a performance thing
// if (element.children.items.len > struct_info.fields.len) {
// std.debug.print("element children: {d}, struct fields: {d}\n", .{ element.children.items.len, struct_info.fields.len });
// for (element.children.items) |child, i| {
// switch (child) {
// .CharData => std.debug.print("{d}: {s}\n", .{ i, child }),
// .Comment => {},
// .Element => {},
// }
// }
// return error.MoreElementsThanFields;
// }
inline for (struct_info.fields) |field, i| {
var name = field.name;
if (comptime std.meta.trait.hasFn("fieldNameFor")(T))
name = r.fieldNameFor(field.name);
std.log.debug("Field name: {s}, Element: {s}, Adjusted field name: {s}\n", .{ field.name, element.tag, name });
var iterator = element.findChildrenByTag(name);
if (options.match_predicate) |predicate| {
iterator.predicate = predicate;
iterator.predicate_options = .{ .allocator = options.allocator.? };
}
if (try iterator.next()) |child| {
// I don't know that we would use comptime here. I'm also
// not sure the nuance of setting this...
// if (field.is_comptime) {
// if (!try parsesTo(field.field_type, field.default_value.?, tokens, options)) {
// return error.UnexpectedValue;
// }
// } else {
@field(r, field.name) = try parseInternal(field.field_type, child, options);
fields_seen[i] = true;
fields_set = fields_set + 1;
// }
} else {
return error.NoValueForField;
}
}
if (fields_set != struct_info.fields.len)
return error.FieldElementMismatch; // see fields_seen for details
return r;
},
.Array => //|array_info| {
return error.ArrayNotImplemented,
// switch (token) {
// .ArrayBegin => {
// var r: T = undefined;
// var i: usize = 0;
// errdefer {
// while (true) : (i -= 1) {
// parseFree(arrayInfo.child, r[i], options);
// if (i == 0) break;
// }
// }
// while (i < r.len) : (i += 1) {
// r[i] = try parse(arrayInfo.child, tokens, options);
// }
// const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
// switch (tok) {
// .ArrayEnd => {},
// else => return error.UnexpectedToken,
// }
// return r;
// },
// .String => |stringToken| {
// if (arrayInfo.child != u8) return error.UnexpectedToken;
// var r: T = undefined;
// const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
// switch (stringToken.escapes) {
// .None => mem.copy(u8, &r, source_slice),
// .Some => try unescapeValidString(&r, source_slice),
// }
// return r;
// },
// else => return error.UnexpectedToken,
// }
// },
.Pointer => |ptr_info| {
const allocator = options.allocator orelse return error.AllocatorRequired;
switch (ptr_info.size) {
.One => {
const r: T = try allocator.create(ptr_info.child);
errdefer allocator.free(r);
r.* = try parseInternal(ptr_info.child, element, options);
return r;
},
.Slice => {
// TODO: Detect and deal with arrays. This will require two
// passes through the element children - one to
// determine if it is an array, one to parse the elements
// <Items>
// <Item>foo</Item>
// <Item>bar</Item>
// <Items>
if (ptr_info.child != u8) {
std.log.debug("ptr_info.child == {s}", .{@typeName(ptr_info.child)});
const children = try allocator.alloc(ptr_info.child, element.children.items.len);
var inx: usize = 0;
while (inx < children.len) {
children[inx] = try parseInternal(ptr_info.child, element.children.items[inx].Element, options);
inx += 1;
}
return children;
}
return try allocator.dupe(u8, element.children.items[0].CharData);
},
.Many => {
return error.ManyPointerSizeNotImplemented;
},
.C => {
return error.CPointerSizeNotImplemented;
},
}
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
// }
// },
// else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
unreachable;
}
pub fn fuzzyEqual(a: []const u8, b: []const u8, options: xml.PredicateOptions) !bool {
const allocator = options.allocator orelse return error.AllocatorRequired;
// std.debug.print("raw: a = '{s}', b = '{s}'\n", .{ a, b });
const lower_a = try std.ascii.allocLowerString(allocator, a);
defer allocator.free(lower_a);
const lower_b = try std.ascii.allocLowerString(allocator, b);
defer allocator.free(lower_b);
// std.debug.print("lower: a = '{s}', b = '{s}'\n", .{ lower_a, lower_b });
const normal_a = normalize(lower_a);
const normal_b = normalize(lower_b);
// std.debug.print("normal: a = '{s}', b = '{s}'\n", .{ normal_a, normal_b });
return std.mem.eql(u8, normal_a, normal_b);
}
fn normalize(val: []u8) []u8 {
var underscores: u64 = 0;
for (val) |ch, i| {
if (ch == '_') {
underscores = underscores + 1;
} else {
val[i - underscores] = ch;
}
}
return val[0 .. val.len - underscores];
}
const testing = std.testing;
test "can parse a simple type" {
const allocator = std.testing.allocator;
// defer allocator.free(snake_case);
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <fooBar>bar</fooBar>
\\</Example>
;
const Example = struct {
foo_bar: []const u8,
};
// std.debug.print("{s}", .{data});
const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
defer parsed_data.deinit();
try testing.expectEqualStrings("bar", parsed_data.parsed_value.foo_bar);
}
test "can parse a boolean type" {
const allocator = std.testing.allocator;
// defer allocator.free(snake_case);
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <fooBar>true</fooBar>
\\</Example>
;
const Example = struct {
foo_bar: bool,
};
// std.debug.print("{s}", .{data});
const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
defer parsed_data.deinit();
try testing.expectEqual(true, parsed_data.parsed_value.foo_bar);
}
test "can parse an integer type" {
const allocator = std.testing.allocator;
// defer allocator.free(snake_case);
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <fooBar>42</fooBar>
\\</Example>
;
const Example = struct {
foo_bar: u8,
};
// std.debug.print("{s}", .{data});
const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
defer parsed_data.deinit();
try testing.expectEqual(@as(u8, 42), parsed_data.parsed_value.foo_bar);
}
test "can parse a boolean type" {
const allocator = std.testing.allocator;
// defer allocator.free(snake_case);
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <fooBar>true</fooBar>
\\</Example>
;
const Example = struct {
foo_bar: bool,
};
// std.debug.print("{s}", .{data});
const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
defer parsed_data.deinit();
try testing.expectEqual(true, parsed_data.parsed_value.foo_bar);
}
test "can parse an optional boolean type" {
const allocator = std.testing.allocator;
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <fooBar>true</fooBar>
\\</Example>
;
const Example = struct {
foo_bar: ?bool = null,
};
const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
defer parsed_data.deinit();
try testing.expectEqual(@as(?bool, true), parsed_data.parsed_value.foo_bar);
}
test "can parse a nested type" {
const allocator = std.testing.allocator;
const data =
\\<?xml version="1.0" encoding="UTF-8"?>
\\<Example xmlns="http://example.example.com/doc/2016-11-15/">
\\ <foo>
\\ <bar>baz</bar>
\\ </foo>
\\</Example>
;
const Example = struct {
foo: struct {
bar: []const u8,
},
};
const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
defer parsed_data.deinit();
try testing.expectEqualStrings("baz", parsed_data.parsed_value.foo.bar);
}
const service_metadata: struct {
version: []const u8 = "2016-11-15",
sdk_id: []const u8 = "EC2",
arn_namespace: []const u8 = "ec2",
endpoint_prefix: []const u8 = "ec2",
sigv4_name: []const u8 = "ec2",
name: []const u8 = "AmazonEC2",
} = .{};
const describe_regions: struct {
action_name: []const u8 = "DescribeRegions",
Request: type = struct {
// filters: ?[]Filter = null,
region_names: ?[][]const u8 = null,
dry_run: ?bool = null,
all_regions: ?bool = null,
pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {
const mappings = .{
.filters = "Filter",
.region_names = "RegionName",
.dry_run = "dryRun",
.all_regions = "AllRegions",
};
return @field(mappings, field_name);
}
pub fn metaInfo() struct { service_metadata: @TypeOf(service_metadata), action: @TypeOf(describe_regions) } {
return .{ .service_metadata = service_metadata, .action = describe_regions };
}
},
Response: type = struct {
regions: ?[]struct {
endpoint: ?[]const u8 = null,
region_name: ?[]const u8 = null,
opt_in_status: ?[]const u8 = null,
pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {
const mappings = .{
.endpoint = "regionEndpoint",
.region_name = "regionName",
.opt_in_status = "optInStatus",
};
return @field(mappings, field_name);
}
} = null,
pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {
const mappings = .{
.regions = "regionInfo",
};
return @field(mappings, field_name);
}
},
} = .{};
// This test results in "broken LLVM module found: Operand is null"
// br i1 %120, label %ErrRetReturn12, <null operand!>, !dbg !10637
//
// This is a bug in the Zig compiler.
// test "can parse something serious" {
// std.testing.log_level = .debug;
// std.log.debug("", .{});
//
// const allocator = std.testing.allocator;
// const data =
// \\<?xml version="1.0" encoding="UTF-8"?>
// \\<DescribeRegionsResponse xmlns="http://ec2.amazonaws.com/doc/2016-11-15/">
// \\ <requestId>8d6bfc99-978b-4146-ba23-2e5fe5b65406</requestId>
// \\ <regionInfo>
// \\ <item>
// \\ <regionName>eu-north-1</regionName>
// \\ <regionEndpoint>ec2.eu-north-1.amazonaws.com</regionEndpoint>
// \\ <optInStatus>opt-in-not-required</optInStatus>
// \\ </item>
// \\ <item>
// \\ <regionName>ap-south-1</regionName>
// \\ <regionEndpoint>ec2.ap-south-1.amazonaws.com</regionEndpoint>
// \\ <optInStatus>opt-in-not-required</optInStatus>
// \\ </item>
// \\ </regionInfo>
// \\</DescribeRegionsResponse>
// ;
// const parsed_data = try parse(describe_regions.Response, data, .{ .allocator = allocator });
// defer parsed_data.deinit();
// try testing.expect(parsed_data.parsed_value.regions != null);
// // try testing.expectEqualStrings("eu-north-1", parsed_data.parsed_value.regions.?[0].region_name.?);
// }