From 7438642d9187b9355581b2c03d95b3688e300d46 Mon Sep 17 00:00:00 2001 From: Simon Hartcher Date: Tue, 29 Apr 2025 16:56:40 +1000 Subject: [PATCH] fix: xml parser can now handle repeated root arrays --- src/xml.zig | 36 +++++++++++---- src/xml_shaper.zig | 112 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 126 insertions(+), 22 deletions(-) diff --git a/src/xml.zig b/src/xml.zig index 471f8a3..bd00414 100644 --- a/src/xml.zig +++ b/src/xml.zig @@ -25,6 +25,7 @@ pub const Element = struct { tag: []const u8, attributes: AttributeList, children: ContentList, + next_sibling: ?*Element = null, fn init(tag: []const u8, alloc: Allocator) Element { return .{ @@ -347,7 +348,7 @@ fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document { _ = ctx.eatWs(); try trySkipComments(ctx, allocator); - doc.root = (try tryParseElement(ctx, allocator)) orelse return error.InvalidDocument; + doc.root = (try tryParseElement(ctx, allocator, null)) orelse return error.InvalidDocument; _ = ctx.eatWs(); try trySkipComments(ctx, allocator); @@ -415,12 +416,12 @@ fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 { return try dupeAndUnescape(alloc, ctx.source[begin..end]); } -fn parseContent(ctx: *ParseContext, alloc: Allocator) ParseError!Content { +fn parseContent(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) ParseError!Content { if (try tryParseCharData(ctx, alloc)) |cd| { return Content{ .CharData = cd }; } else if (try tryParseComment(ctx, alloc)) |comment| { return Content{ .Comment = comment }; - } else if (try tryParseElement(ctx, alloc)) |elem| { + } else if (try tryParseElement(ctx, alloc, parent)) |elem| { return Content{ .Element = elem }; } else { return error.UnexpectedCharacter; @@ -440,7 +441,7 @@ fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute { return attr; } -fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element { +fn tryParseElement(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) !?*Element { const start = ctx.offset; if (!ctx.eat('<')) return null; const tag = parseNameNoDupe(ctx) catch { @@ -469,7 +470,7 @@ fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element { break; } - const content = try parseContent(ctx, alloc); + const content = try parseContent(ctx, alloc, element); try element.children.append(content); } @@ -480,6 +481,23 @@ fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element { _ = ctx.eatWs(); try ctx.expect('>'); + + if (parent) |p| { + var last_element: ?*Element = null; + + for (0..p.children.items.len) |i| { + const child = p.children.items[p.children.items.len - i - 1]; + if (child == .Element) { + last_element = child.Element; + break; + } + } + + if (last_element) |lc| { + lc.next_sibling = element; + } + } + return element; } @@ -490,13 +508,13 @@ test "tryParseElement" { { var ctx = ParseContext.init("<= a='b'/>"); - try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc)); + try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc, null)); try testing.expectEqual(@as(?u8, '<'), ctx.peek()); } { var ctx = ParseContext.init(""); - const elem = try tryParseElement(&ctx, alloc); + const elem = try tryParseElement(&ctx, alloc, null); try testing.expectEqualSlices(u8, elem.?.tag, "python"); const size_attr = elem.?.attributes.items[0]; @@ -510,14 +528,14 @@ test "tryParseElement" { { var ctx = ParseContext.init("test"); - const elem = try tryParseElement(&ctx, alloc); + const elem = try tryParseElement(&ctx, alloc, null); try testing.expectEqualSlices(u8, elem.?.tag, "python"); try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test"); } { var ctx = ParseContext.init("bdf"); - const elem = try tryParseElement(&ctx, alloc); + const elem = try tryParseElement(&ctx, alloc, null); try testing.expectEqualSlices(u8, elem.?.tag, "a"); try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b"); try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c"); diff --git a/src/xml_shaper.zig b/src/xml_shaper.zig index acded96..c23a56f 100644 --- a/src/xml_shaper.zig +++ b/src/xml_shaper.zig @@ -1,6 +1,7 @@ const std = @import("std"); const xml = @import("xml.zig"); const date = @import("date"); +const sm = @import("service_manifest"); const log = std.log.scoped(.xml_shaper); @@ -94,6 +95,53 @@ pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parse return Parsed(T).init(arena_allocator, try parseInternal(T, root, opts), parsed); } +pub const XmlArrayStyle = enum { + collection, // Has a container element and list of child elements + repeated_root, // Repeats the same element without a container, e.g. S3 ListBucketResult +}; + +fn detectArrayStyle(comptime T: type, element: *xml.Element, options: ParseOptions) !XmlArrayStyle { + _ = options; + + if (@typeInfo(T) != .@"struct") { + return .collection; + } + + // does the element have child elements that match our expected struct? + const field_names = comptime blk: { + var result: [std.meta.fieldNames(T).len][]const u8 = undefined; + + for (std.meta.fieldNames(T), 0..) |field_name, i| { + result[i] = if (@hasDecl(T, "fieldNameFor")) + T.fieldNameFor(undefined, field_name) + else + field_name; + } + + break :blk result; + }; + + var matching_fields: usize = 0; + for (element.children.items) |content| { + switch (content) { + .Element => |el| { + for (field_names) |field_name| { + if (std.mem.eql(u8, field_name, el.tag)) { + matching_fields += 1; + } + } + }, + else => continue, + } + } + + if (matching_fields > 0) { + return .repeated_root; + } + + return .collection; +} + fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T { switch (@typeInfo(T)) { .bool => { @@ -330,23 +378,31 @@ fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) // bar // if (ptr_info.child != u8) { - log.debug("type = {s}, ptr_info.child == {s}, element = {s}", .{ @typeName(T), @typeName(ptr_info.child), element.tag }); - var iterator = element.elements(); + const array_style = try detectArrayStyle(ptr_info.child, element, options); + + std.log.debug("type = {s}, style = {s}, ptr_info.child == {s}, element = {s}", .{ @typeName(T), @tagName(array_style), @typeName(ptr_info.child), element.tag }); + var children = std.ArrayList(ptr_info.child).init(allocator); defer children.deinit(); - while (iterator.next()) |child_element| { - try children.append(try parseInternal(ptr_info.child, child_element, options)); + + switch (array_style) { + .collection => { + var iterator = element.elements(); + while (iterator.next()) |child_element| { + try children.append(try parseInternal(ptr_info.child, child_element, options)); + } + }, + .repeated_root => { + var current: ?*Element = element; + while (current) |el| : (current = el.next_sibling) { + if (!std.mem.eql(u8, el.tag, element.tag)) continue; + + try children.append(try parseInternal(ptr_info.child, el, options)); + } + }, } + return children.toOwnedSlice(); - // var inx: usize = 0; - // while (inx < children.len) { - // switch (element.children.items[inx]) { - // .Element => children[inx] = try parseInternal(ptr_info.child, element.children.items[inx].Element, options), - // .CharData => children[inx] = try allocator.dupe(u8, element.children.items[inx].CharData), - // .Comment => children[inx] = try allocator.dupe(u8, element.children.items[inx].Comment), // This might be an error... - // } - // inx += 1; - // } } return try allocator.dupe(u8, element.children.items[0].CharData); }, @@ -738,3 +794,33 @@ test "compiler assertion failure 2" { defer parsed_data.deinit(); try testing.expect(parsed_data.parsed_value.key_group_list.?.quantity == 42); } + +test "can parse list objects" { + const data = + \\ + \\ + \\ + \\ file1.txt + \\ 1024 + \\ + \\ + \\ file2.jpg + \\ 2048 + \\ + \\ + ; + + const Response = sm.s3.list_objects_v2.Response; + + const parsed_data = try parse(Response, data, .{ .allocator = testing.allocator }); + defer parsed_data.deinit(); + + const response: Response = parsed_data.parsed_value; + const s3_objects: []sm.s3.Object = response.contents.?; + + try testing.expectEqual(2, s3_objects.len); + try testing.expectEqualStrings(s3_objects[0].key.?, "file1.txt"); + try testing.expectEqualStrings(s3_objects[1].key.?, "file2.jpg"); + try testing.expectEqual(s3_objects[0].size.?, 1024); + try testing.expectEqual(s3_objects[1].size.?, 2048); +}