fix: xml parser can now handle repeated root arrays
This commit is contained in:
parent
a420528a59
commit
7438642d91
2 changed files with 126 additions and 22 deletions
36
src/xml.zig
36
src/xml.zig
|
@ -25,6 +25,7 @@ pub const Element = struct {
|
|||
tag: []const u8,
|
||||
attributes: AttributeList,
|
||||
children: ContentList,
|
||||
next_sibling: ?*Element = null,
|
||||
|
||||
fn init(tag: []const u8, alloc: Allocator) Element {
|
||||
return .{
|
||||
|
@ -347,7 +348,7 @@ fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document {
|
|||
_ = ctx.eatWs();
|
||||
try trySkipComments(ctx, allocator);
|
||||
|
||||
doc.root = (try tryParseElement(ctx, allocator)) orelse return error.InvalidDocument;
|
||||
doc.root = (try tryParseElement(ctx, allocator, null)) orelse return error.InvalidDocument;
|
||||
_ = ctx.eatWs();
|
||||
try trySkipComments(ctx, allocator);
|
||||
|
||||
|
@ -415,12 +416,12 @@ fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
|
|||
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
|
||||
}
|
||||
|
||||
fn parseContent(ctx: *ParseContext, alloc: Allocator) ParseError!Content {
|
||||
fn parseContent(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) ParseError!Content {
|
||||
if (try tryParseCharData(ctx, alloc)) |cd| {
|
||||
return Content{ .CharData = cd };
|
||||
} else if (try tryParseComment(ctx, alloc)) |comment| {
|
||||
return Content{ .Comment = comment };
|
||||
} else if (try tryParseElement(ctx, alloc)) |elem| {
|
||||
} else if (try tryParseElement(ctx, alloc, parent)) |elem| {
|
||||
return Content{ .Element = elem };
|
||||
} else {
|
||||
return error.UnexpectedCharacter;
|
||||
|
@ -440,7 +441,7 @@ fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute {
|
|||
return attr;
|
||||
}
|
||||
|
||||
fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
|
||||
fn tryParseElement(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) !?*Element {
|
||||
const start = ctx.offset;
|
||||
if (!ctx.eat('<')) return null;
|
||||
const tag = parseNameNoDupe(ctx) catch {
|
||||
|
@ -469,7 +470,7 @@ fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
|
|||
break;
|
||||
}
|
||||
|
||||
const content = try parseContent(ctx, alloc);
|
||||
const content = try parseContent(ctx, alloc, element);
|
||||
try element.children.append(content);
|
||||
}
|
||||
|
||||
|
@ -480,6 +481,23 @@ fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
|
|||
|
||||
_ = ctx.eatWs();
|
||||
try ctx.expect('>');
|
||||
|
||||
if (parent) |p| {
|
||||
var last_element: ?*Element = null;
|
||||
|
||||
for (0..p.children.items.len) |i| {
|
||||
const child = p.children.items[p.children.items.len - i - 1];
|
||||
if (child == .Element) {
|
||||
last_element = child.Element;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_element) |lc| {
|
||||
lc.next_sibling = element;
|
||||
}
|
||||
}
|
||||
|
||||
return element;
|
||||
}
|
||||
|
||||
|
@ -490,13 +508,13 @@ test "tryParseElement" {
|
|||
|
||||
{
|
||||
var ctx = ParseContext.init("<= a='b'/>");
|
||||
try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc));
|
||||
try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc, null));
|
||||
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<python size='15' color = \"green\"/>");
|
||||
const elem = try tryParseElement(&ctx, alloc);
|
||||
const elem = try tryParseElement(&ctx, alloc, null);
|
||||
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
||||
|
||||
const size_attr = elem.?.attributes.items[0];
|
||||
|
@ -510,14 +528,14 @@ test "tryParseElement" {
|
|||
|
||||
{
|
||||
var ctx = ParseContext.init("<python>test</python>");
|
||||
const elem = try tryParseElement(&ctx, alloc);
|
||||
const elem = try tryParseElement(&ctx, alloc, null);
|
||||
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test");
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>");
|
||||
const elem = try tryParseElement(&ctx, alloc);
|
||||
const elem = try tryParseElement(&ctx, alloc, null);
|
||||
try testing.expectEqualSlices(u8, elem.?.tag, "a");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c");
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
const std = @import("std");
|
||||
const xml = @import("xml.zig");
|
||||
const date = @import("date");
|
||||
const sm = @import("service_manifest");
|
||||
|
||||
const log = std.log.scoped(.xml_shaper);
|
||||
|
||||
|
@ -94,6 +95,53 @@ pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parse
|
|||
return Parsed(T).init(arena_allocator, try parseInternal(T, root, opts), parsed);
|
||||
}
|
||||
|
||||
pub const XmlArrayStyle = enum {
|
||||
collection, // Has a container element and list of child elements
|
||||
repeated_root, // Repeats the same element without a container, e.g. S3 ListBucketResult
|
||||
};
|
||||
|
||||
fn detectArrayStyle(comptime T: type, element: *xml.Element, options: ParseOptions) !XmlArrayStyle {
|
||||
_ = options;
|
||||
|
||||
if (@typeInfo(T) != .@"struct") {
|
||||
return .collection;
|
||||
}
|
||||
|
||||
// does the element have child elements that match our expected struct?
|
||||
const field_names = comptime blk: {
|
||||
var result: [std.meta.fieldNames(T).len][]const u8 = undefined;
|
||||
|
||||
for (std.meta.fieldNames(T), 0..) |field_name, i| {
|
||||
result[i] = if (@hasDecl(T, "fieldNameFor"))
|
||||
T.fieldNameFor(undefined, field_name)
|
||||
else
|
||||
field_name;
|
||||
}
|
||||
|
||||
break :blk result;
|
||||
};
|
||||
|
||||
var matching_fields: usize = 0;
|
||||
for (element.children.items) |content| {
|
||||
switch (content) {
|
||||
.Element => |el| {
|
||||
for (field_names) |field_name| {
|
||||
if (std.mem.eql(u8, field_name, el.tag)) {
|
||||
matching_fields += 1;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => continue,
|
||||
}
|
||||
}
|
||||
|
||||
if (matching_fields > 0) {
|
||||
return .repeated_root;
|
||||
}
|
||||
|
||||
return .collection;
|
||||
}
|
||||
|
||||
fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T {
|
||||
switch (@typeInfo(T)) {
|
||||
.bool => {
|
||||
|
@ -330,23 +378,31 @@ fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions)
|
|||
// <Item>bar</Item>
|
||||
// <Items>
|
||||
if (ptr_info.child != u8) {
|
||||
log.debug("type = {s}, ptr_info.child == {s}, element = {s}", .{ @typeName(T), @typeName(ptr_info.child), element.tag });
|
||||
var iterator = element.elements();
|
||||
const array_style = try detectArrayStyle(ptr_info.child, element, options);
|
||||
|
||||
std.log.debug("type = {s}, style = {s}, ptr_info.child == {s}, element = {s}", .{ @typeName(T), @tagName(array_style), @typeName(ptr_info.child), element.tag });
|
||||
|
||||
var children = std.ArrayList(ptr_info.child).init(allocator);
|
||||
defer children.deinit();
|
||||
|
||||
switch (array_style) {
|
||||
.collection => {
|
||||
var iterator = element.elements();
|
||||
while (iterator.next()) |child_element| {
|
||||
try children.append(try parseInternal(ptr_info.child, child_element, options));
|
||||
}
|
||||
},
|
||||
.repeated_root => {
|
||||
var current: ?*Element = element;
|
||||
while (current) |el| : (current = el.next_sibling) {
|
||||
if (!std.mem.eql(u8, el.tag, element.tag)) continue;
|
||||
|
||||
try children.append(try parseInternal(ptr_info.child, el, options));
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
return children.toOwnedSlice();
|
||||
// var inx: usize = 0;
|
||||
// while (inx < children.len) {
|
||||
// switch (element.children.items[inx]) {
|
||||
// .Element => children[inx] = try parseInternal(ptr_info.child, element.children.items[inx].Element, options),
|
||||
// .CharData => children[inx] = try allocator.dupe(u8, element.children.items[inx].CharData),
|
||||
// .Comment => children[inx] = try allocator.dupe(u8, element.children.items[inx].Comment), // This might be an error...
|
||||
// }
|
||||
// inx += 1;
|
||||
// }
|
||||
}
|
||||
return try allocator.dupe(u8, element.children.items[0].CharData);
|
||||
},
|
||||
|
@ -738,3 +794,33 @@ test "compiler assertion failure 2" {
|
|||
defer parsed_data.deinit();
|
||||
try testing.expect(parsed_data.parsed_value.key_group_list.?.quantity == 42);
|
||||
}
|
||||
|
||||
test "can parse list objects" {
|
||||
const data =
|
||||
\\<?xml version="1.0" encoding="UTF-8"?>
|
||||
\\<ListBucketResult>
|
||||
\\ <Contents>
|
||||
\\ <Key>file1.txt</Key>
|
||||
\\ <Size>1024</Size>
|
||||
\\ </Contents>
|
||||
\\ <Contents>
|
||||
\\ <Key>file2.jpg</Key>
|
||||
\\ <Size>2048</Size>
|
||||
\\ </Contents>
|
||||
\\</ListBucketResult>
|
||||
;
|
||||
|
||||
const Response = sm.s3.list_objects_v2.Response;
|
||||
|
||||
const parsed_data = try parse(Response, data, .{ .allocator = testing.allocator });
|
||||
defer parsed_data.deinit();
|
||||
|
||||
const response: Response = parsed_data.parsed_value;
|
||||
const s3_objects: []sm.s3.Object = response.contents.?;
|
||||
|
||||
try testing.expectEqual(2, s3_objects.len);
|
||||
try testing.expectEqualStrings(s3_objects[0].key.?, "file1.txt");
|
||||
try testing.expectEqualStrings(s3_objects[1].key.?, "file2.jpg");
|
||||
try testing.expectEqual(s3_objects[0].size.?, 1024);
|
||||
try testing.expectEqual(s3_objects[1].size.?, 2048);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue