From bb93f41b85b3ae9bd1087fdcd1b27a18f9e84109 Mon Sep 17 00:00:00 2001
From: Emil Lerch <emil@lerch.org>
Date: Fri, 11 Feb 2022 09:22:32 -0800
Subject: [PATCH] add xml_shaper (see below)

This will use the Vulkan xml parser to parse data, then
massage that into a passed type. It uses code patterned
off the standard library Json parser to do the work.

The final, commented test is exposing a compiler bug that
needs to be reduced and sent to the zig team. Initial reports
from IRC indicate that the team is not focusing on compiler
bugs until stage 2 is done (hopefully May 2022)
---
 src/xml_shaper.zig | 507 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 507 insertions(+)
 create mode 100644 src/xml_shaper.zig

diff --git a/src/xml_shaper.zig b/src/xml_shaper.zig
new file mode 100644
index 0000000..30d76d4
--- /dev/null
+++ b/src/xml_shaper.zig
@@ -0,0 +1,507 @@
+const std = @import("std");
+const xml = @import("xml.zig");
+
+fn Parsed(comptime T: type) type {
+    return struct {
+        allocator: std.mem.Allocator,
+        parsed_value: T,
+
+        const Self = @This();
+
+        pub fn init(allocator: std.mem.Allocator, parsedObj: T) Self {
+            return .{
+                .allocator = allocator,
+                .parsed_value = parsedObj,
+            };
+        }
+
+        pub fn deinit(self: Self) void {
+            deinitObject(self.allocator, self.parsed_value);
+        }
+
+        fn deinitObject(allocator: std.mem.Allocator, obj: anytype) void {
+            switch (@typeInfo(@TypeOf(obj))) {
+                .Optional => if (obj) |o| deinitObject(allocator, o),
+                .Union => |union_info| {
+                    inline for (union_info.fields) |field| {
+                        std.debug.print("{s}", field); // need to find active field and deinit it
+                    }
+                },
+                .Struct => |struct_info| {
+                    inline for (struct_info.fields) |field| {
+                        deinitObject(allocator, @field(obj, field.name));
+                    }
+                },
+                .Array => {}, // Not implemented below
+                .Pointer => |ptr_info| {
+                    switch (ptr_info.size) {
+                        .One => {
+                            deinitObject(allocator, obj.*);
+                            allocator.free(obj);
+                        },
+                        .Many => {},
+                        .C => {},
+                        .Slice => {
+                            allocator.free(obj);
+                        },
+                    }
+                },
+                //.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum, .Opaque => {}, // no allocations here
+                else => {},
+            }
+        }
+    };
+}
+
+pub fn Parser(comptime T: type) type {
+    return struct {
+        ParseType: type = T,
+        ReturnType: type = Parsed(T),
+
+        const Self = @This();
+
+        pub fn parse(source: []const u8, options: ParseOptions) !Parsed(T) {
+            if (options.allocator == null)
+                return error.AllocatorRequired; // we are only leaving it be null for compatibility with json
+            const allocator = options.allocator.?;
+            const parse_allocator = std.heap.ArenaAllocator.init(allocator);
+            const parsed = try xml.parse(allocator, source);
+            defer parsed.deinit();
+            defer parse_allocator.deinit();
+            return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options));
+        }
+    };
+}
+// should we just use json parse options?
+pub const ParseOptions = struct {
+    allocator: ?std.mem.Allocator = null,
+    match_predicate: ?fn (a: []const u8, b: []const u8, options: xml.PredicateOptions) anyerror!bool = null,
+};
+
+pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parsed(T) {
+    if (options.allocator == null)
+        return error.AllocatorRequired; // we are only leaving it be null for compatibility with json
+    const allocator = options.allocator.?;
+    var parse_allocator = std.heap.ArenaAllocator.init(allocator);
+    const parsed = try xml.parse(parse_allocator.allocator(), source);
+    // defer parsed.deinit(); // Let the arena allocator whack it all
+    defer parse_allocator.deinit();
+    return Parsed(T).init(allocator, try parseInternal(T, parsed.root, options));
+}
+
+fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T {
+    switch (@typeInfo(T)) {
+        .Bool => {
+            if (std.ascii.eqlIgnoreCase("true", element.children.items[0].CharData))
+                return true;
+            if (std.ascii.eqlIgnoreCase("false", element.children.items[0].CharData))
+                return false;
+            return error.UnexpectedToken;
+        },
+        .Float, .ComptimeFloat => {
+            return try std.fmt.parseFloat(T, element.children.items[0].CharData);
+        },
+        .Int, .ComptimeInt => {
+            return try std.fmt.parseInt(T, element.children.items[0].CharData, 10);
+        },
+        .Optional => |optional_info| {
+            if (element.children.items.len == 0) {
+                // This is almost certainly incomplete. Empty strings? xsi:nil?
+                return null;
+            } else {
+                // return try parseInternal(optional_info.child, element.elements().next().?, options);
+                return try parseInternal(optional_info.child, element, options);
+            }
+        },
+        .Enum => |enum_info| {
+            _ = enum_info;
+            // const numeric: ?enum_info.tag_type = std.fmt.parseInt(enum_info.tag_type, element.children.items[0].CharData, 10) catch null;
+            // if (numeric) |num| {
+            //     return std.meta.intToEnum(T, num);
+            // } else {
+            //     // json parser handles escaping - could this happen here or does chardata handle?
+            //     return std.meta.stringToEnum(T, element.CharData);
+            // }
+        },
+        .Union => |union_info| {
+            if (union_info.tag_type) |_| {
+                // try each of the union fields until we find one that matches
+                // inline for (union_info.fields) |u_field| {
+                //     // take a copy of tokens so we can withhold mutations until success
+                //     var tokens_copy = tokens.*;
+                //     if (parseInternal(u_field.field_type, token, &tokens_copy, options)) |value| {
+                //         tokens.* = tokens_copy;
+                //         return @unionInit(T, u_field.name, value);
+                //     } else |err| {
+                //         // Bubble up error.OutOfMemory
+                //         // Parsing some types won't have OutOfMemory in their
+                //         // error-sets, for the condition to be valid, merge it in.
+                //         if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
+                //         // Bubble up AllocatorRequired, as it indicates missing option
+                //         if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err;
+                //         // otherwise continue through the `inline for`
+                //     }
+                // }
+                return error.NoUnionMembersMatched;
+            } else {
+                @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
+            }
+        },
+        .Struct => |struct_info| {
+            var r: T = undefined;
+            var fields_seen = [_]bool{false} ** struct_info.fields.len;
+            var fields_set: u64 = 0;
+            // errdefer {
+            //     // TODO: why so high here? This was needed for ec2 describe instances
+            //     @setEvalBranchQuota(100000);
+            //     inline for (struct_info.fields) |field, i| {
+            //         if (fields_seen[i] and !field.is_comptime) {
+            //             parseFree(field.field_type, @field(r, field.name), options);
+            //         }
+            //     }
+            // }
+
+            // XML parser provides CharData for whitespace around elements.
+            // We shall ignore extra data for the moment as a performance thing
+            // if (element.children.items.len > struct_info.fields.len) {
+            //     std.debug.print("element children: {d}, struct fields: {d}\n", .{ element.children.items.len, struct_info.fields.len });
+            //     for (element.children.items) |child, i| {
+            //         switch (child) {
+            //             .CharData => std.debug.print("{d}: {s}\n", .{ i, child }),
+            //             .Comment => {},
+            //             .Element => {},
+            //         }
+            //     }
+            //     return error.MoreElementsThanFields;
+            // }
+
+            inline for (struct_info.fields) |field, i| {
+                var name = field.name;
+                if (comptime std.meta.trait.hasFn("fieldNameFor")(T))
+                    name = r.fieldNameFor(field.name);
+                std.log.debug("Field name: {s}, Element: {s}, Adjusted field name: {s}\n", .{ field.name, element.tag, name });
+                var iterator = element.findChildrenByTag(name);
+                if (options.match_predicate) |predicate| {
+                    iterator.predicate = predicate;
+                    iterator.predicate_options = .{ .allocator = options.allocator.? };
+                }
+                if (try iterator.next()) |child| {
+                    // I don't know that we would use comptime here. I'm also
+                    // not sure the nuance of setting this...
+                    // if (field.is_comptime) {
+                    //     if (!try parsesTo(field.field_type, field.default_value.?, tokens, options)) {
+                    //         return error.UnexpectedValue;
+                    //     }
+                    // } else {
+                    @field(r, field.name) = try parseInternal(field.field_type, child, options);
+                    fields_seen[i] = true;
+                    fields_set = fields_set + 1;
+                    // }
+
+                } else {
+                    return error.NoValueForField;
+                }
+            }
+            if (fields_set != struct_info.fields.len)
+                return error.FieldElementMismatch; // see fields_seen for details
+            return r;
+        },
+        .Array => //|array_info| {
+        return error.ArrayNotImplemented,
+        // switch (token) {
+        //     .ArrayBegin => {
+        //         var r: T = undefined;
+        //         var i: usize = 0;
+        //         errdefer {
+        //             while (true) : (i -= 1) {
+        //                 parseFree(arrayInfo.child, r[i], options);
+        //                 if (i == 0) break;
+        //             }
+        //         }
+        //         while (i < r.len) : (i += 1) {
+        //             r[i] = try parse(arrayInfo.child, tokens, options);
+        //         }
+        //         const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
+        //         switch (tok) {
+        //             .ArrayEnd => {},
+        //             else => return error.UnexpectedToken,
+        //         }
+        //         return r;
+        //     },
+        //     .String => |stringToken| {
+        //         if (arrayInfo.child != u8) return error.UnexpectedToken;
+        //         var r: T = undefined;
+        //         const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+        //         switch (stringToken.escapes) {
+        //             .None => mem.copy(u8, &r, source_slice),
+        //             .Some => try unescapeValidString(&r, source_slice),
+        //         }
+        //         return r;
+        //     },
+        //     else => return error.UnexpectedToken,
+        // }
+        // },
+        .Pointer => |ptr_info| {
+            const allocator = options.allocator orelse return error.AllocatorRequired;
+            switch (ptr_info.size) {
+                .One => {
+                    const r: T = try allocator.create(ptr_info.child);
+                    errdefer allocator.free(r);
+                    r.* = try parseInternal(ptr_info.child, element, options);
+                    return r;
+                },
+                .Slice => {
+                    // TODO: Detect and deal with arrays. This will require two
+                    //       passes through the element children - one to
+                    //       determine if it is an array, one to parse the elements
+                    // <Items>
+                    //   <Item>foo</Item>
+                    //   <Item>bar</Item>
+                    // <Items>
+                    if (ptr_info.child != u8) {
+                        std.log.debug("ptr_info.child == {s}", .{@typeName(ptr_info.child)});
+                        const children = try allocator.alloc(ptr_info.child, element.children.items.len);
+                        var inx: usize = 0;
+                        while (inx < children.len) {
+                            children[inx] = try parseInternal(ptr_info.child, element.children.items[inx].Element, options);
+                            inx += 1;
+                        }
+                        return children;
+                    }
+                    return try allocator.dupe(u8, element.children.items[0].CharData);
+                },
+                .Many => {
+                    return error.ManyPointerSizeNotImplemented;
+                },
+                .C => {
+                    return error.CPointerSizeNotImplemented;
+                },
+            }
+        },
+        else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+        // }
+        // },
+        // else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+    }
+    unreachable;
+}
+pub fn fuzzyEqual(a: []const u8, b: []const u8, options: xml.PredicateOptions) !bool {
+    const allocator = options.allocator orelse return error.AllocatorRequired;
+    // std.debug.print("raw: a = '{s}', b = '{s}'\n", .{ a, b });
+    const lower_a = try std.ascii.allocLowerString(allocator, a);
+    defer allocator.free(lower_a);
+    const lower_b = try std.ascii.allocLowerString(allocator, b);
+    defer allocator.free(lower_b);
+    // std.debug.print("lower: a = '{s}', b = '{s}'\n", .{ lower_a, lower_b });
+    const normal_a = normalize(lower_a);
+    const normal_b = normalize(lower_b);
+
+    // std.debug.print("normal: a = '{s}', b = '{s}'\n", .{ normal_a, normal_b });
+    return std.mem.eql(u8, normal_a, normal_b);
+}
+
+fn normalize(val: []u8) []u8 {
+    var underscores: u64 = 0;
+    for (val) |ch, i| {
+        if (ch == '_') {
+            underscores = underscores + 1;
+        } else {
+            val[i - underscores] = ch;
+        }
+    }
+    return val[0 .. val.len - underscores];
+}
+
+const testing = std.testing;
+test "can parse a simple type" {
+    const allocator = std.testing.allocator;
+    // defer allocator.free(snake_case);
+    const data =
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<Example xmlns="http://example.example.com/doc/2016-11-15/">
+        \\    <fooBar>bar</fooBar>
+        \\</Example>
+    ;
+    const Example = struct {
+        foo_bar: []const u8,
+    };
+    // std.debug.print("{s}", .{data});
+    const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
+    defer parsed_data.deinit();
+    try testing.expectEqualStrings("bar", parsed_data.parsed_value.foo_bar);
+}
+
+test "can parse a boolean type" {
+    const allocator = std.testing.allocator;
+    // defer allocator.free(snake_case);
+    const data =
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<Example xmlns="http://example.example.com/doc/2016-11-15/">
+        \\    <fooBar>true</fooBar>
+        \\</Example>
+    ;
+    const Example = struct {
+        foo_bar: bool,
+    };
+    // std.debug.print("{s}", .{data});
+    const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
+    defer parsed_data.deinit();
+    try testing.expectEqual(true, parsed_data.parsed_value.foo_bar);
+}
+test "can parse an integer type" {
+    const allocator = std.testing.allocator;
+    // defer allocator.free(snake_case);
+    const data =
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<Example xmlns="http://example.example.com/doc/2016-11-15/">
+        \\    <fooBar>42</fooBar>
+        \\</Example>
+    ;
+    const Example = struct {
+        foo_bar: u8,
+    };
+    // std.debug.print("{s}", .{data});
+    const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
+    defer parsed_data.deinit();
+    try testing.expectEqual(@as(u8, 42), parsed_data.parsed_value.foo_bar);
+}
+test "can parse a boolean type" {
+    const allocator = std.testing.allocator;
+    // defer allocator.free(snake_case);
+    const data =
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<Example xmlns="http://example.example.com/doc/2016-11-15/">
+        \\    <fooBar>true</fooBar>
+        \\</Example>
+    ;
+    const Example = struct {
+        foo_bar: bool,
+    };
+    // std.debug.print("{s}", .{data});
+    const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
+    defer parsed_data.deinit();
+    try testing.expectEqual(true, parsed_data.parsed_value.foo_bar);
+}
+test "can parse an optional boolean type" {
+    const allocator = std.testing.allocator;
+    const data =
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<Example xmlns="http://example.example.com/doc/2016-11-15/">
+        \\    <fooBar>true</fooBar>
+        \\</Example>
+    ;
+    const Example = struct {
+        foo_bar: ?bool = null,
+    };
+    const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
+    defer parsed_data.deinit();
+    try testing.expectEqual(@as(?bool, true), parsed_data.parsed_value.foo_bar);
+}
+test "can parse a nested type" {
+    const allocator = std.testing.allocator;
+    const data =
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<Example xmlns="http://example.example.com/doc/2016-11-15/">
+        \\    <foo>
+        \\        <bar>baz</bar>
+        \\    </foo>
+        \\</Example>
+    ;
+    const Example = struct {
+        foo: struct {
+            bar: []const u8,
+        },
+    };
+    const parsed_data = try parse(Example, data, .{ .allocator = allocator, .match_predicate = fuzzyEqual });
+    defer parsed_data.deinit();
+    try testing.expectEqualStrings("baz", parsed_data.parsed_value.foo.bar);
+}
+
+const service_metadata: struct {
+    version: []const u8 = "2016-11-15",
+    sdk_id: []const u8 = "EC2",
+    arn_namespace: []const u8 = "ec2",
+    endpoint_prefix: []const u8 = "ec2",
+    sigv4_name: []const u8 = "ec2",
+    name: []const u8 = "AmazonEC2",
+} = .{};
+
+const describe_regions: struct {
+    action_name: []const u8 = "DescribeRegions",
+    Request: type = struct {
+        // filters: ?[]Filter = null,
+        region_names: ?[][]const u8 = null,
+        dry_run: ?bool = null,
+        all_regions: ?bool = null,
+
+        pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {
+            const mappings = .{
+                .filters = "Filter",
+                .region_names = "RegionName",
+                .dry_run = "dryRun",
+                .all_regions = "AllRegions",
+            };
+            return @field(mappings, field_name);
+        }
+
+        pub fn metaInfo() struct { service_metadata: @TypeOf(service_metadata), action: @TypeOf(describe_regions) } {
+            return .{ .service_metadata = service_metadata, .action = describe_regions };
+        }
+    },
+    Response: type = struct {
+        regions: ?[]struct {
+            endpoint: ?[]const u8 = null,
+            region_name: ?[]const u8 = null,
+            opt_in_status: ?[]const u8 = null,
+
+            pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {
+                const mappings = .{
+                    .endpoint = "regionEndpoint",
+                    .region_name = "regionName",
+                    .opt_in_status = "optInStatus",
+                };
+                return @field(mappings, field_name);
+            }
+        } = null,
+
+        pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {
+            const mappings = .{
+                .regions = "regionInfo",
+            };
+            return @field(mappings, field_name);
+        }
+    },
+} = .{};
+
+// This test results in "broken LLVM module found: Operand is null"
+//   br i1 %120, label %ErrRetReturn12, <null operand!>, !dbg !10637
+//
+// This is a bug in the Zig compiler.
+// test "can parse something serious" {
+//     std.testing.log_level = .debug;
+//     std.log.debug("", .{});
+//
+//     const allocator = std.testing.allocator;
+//     const data =
+//         \\<?xml version="1.0" encoding="UTF-8"?>
+//         \\<DescribeRegionsResponse xmlns="http://ec2.amazonaws.com/doc/2016-11-15/">
+//         \\    <requestId>8d6bfc99-978b-4146-ba23-2e5fe5b65406</requestId>
+//         \\    <regionInfo>
+//         \\        <item>
+//         \\            <regionName>eu-north-1</regionName>
+//         \\            <regionEndpoint>ec2.eu-north-1.amazonaws.com</regionEndpoint>
+//         \\            <optInStatus>opt-in-not-required</optInStatus>
+//         \\        </item>
+//         \\        <item>
+//         \\            <regionName>ap-south-1</regionName>
+//         \\            <regionEndpoint>ec2.ap-south-1.amazonaws.com</regionEndpoint>
+//         \\            <optInStatus>opt-in-not-required</optInStatus>
+//         \\        </item>
+//         \\    </regionInfo>
+//         \\</DescribeRegionsResponse>
+//     ;
+//     const parsed_data = try parse(describe_regions.Response, data, .{ .allocator = allocator });
+//     defer parsed_data.deinit();
+//     try testing.expect(parsed_data.parsed_value.regions != null);
+//     // try testing.expectEqualStrings("eu-north-1", parsed_data.parsed_value.regions.?[0].region_name.?);
+// }