fix: xml parser can now handle repeated root arrays
This commit is contained in:
parent
a420528a59
commit
7438642d91
2 changed files with 126 additions and 22 deletions
36
src/xml.zig
36
src/xml.zig
|
@ -25,6 +25,7 @@ pub const Element = struct {
|
||||||
tag: []const u8,
|
tag: []const u8,
|
||||||
attributes: AttributeList,
|
attributes: AttributeList,
|
||||||
children: ContentList,
|
children: ContentList,
|
||||||
|
next_sibling: ?*Element = null,
|
||||||
|
|
||||||
fn init(tag: []const u8, alloc: Allocator) Element {
|
fn init(tag: []const u8, alloc: Allocator) Element {
|
||||||
return .{
|
return .{
|
||||||
|
@ -347,7 +348,7 @@ fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document {
|
||||||
_ = ctx.eatWs();
|
_ = ctx.eatWs();
|
||||||
try trySkipComments(ctx, allocator);
|
try trySkipComments(ctx, allocator);
|
||||||
|
|
||||||
doc.root = (try tryParseElement(ctx, allocator)) orelse return error.InvalidDocument;
|
doc.root = (try tryParseElement(ctx, allocator, null)) orelse return error.InvalidDocument;
|
||||||
_ = ctx.eatWs();
|
_ = ctx.eatWs();
|
||||||
try trySkipComments(ctx, allocator);
|
try trySkipComments(ctx, allocator);
|
||||||
|
|
||||||
|
@ -415,12 +416,12 @@ fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
|
||||||
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
|
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parseContent(ctx: *ParseContext, alloc: Allocator) ParseError!Content {
|
fn parseContent(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) ParseError!Content {
|
||||||
if (try tryParseCharData(ctx, alloc)) |cd| {
|
if (try tryParseCharData(ctx, alloc)) |cd| {
|
||||||
return Content{ .CharData = cd };
|
return Content{ .CharData = cd };
|
||||||
} else if (try tryParseComment(ctx, alloc)) |comment| {
|
} else if (try tryParseComment(ctx, alloc)) |comment| {
|
||||||
return Content{ .Comment = comment };
|
return Content{ .Comment = comment };
|
||||||
} else if (try tryParseElement(ctx, alloc)) |elem| {
|
} else if (try tryParseElement(ctx, alloc, parent)) |elem| {
|
||||||
return Content{ .Element = elem };
|
return Content{ .Element = elem };
|
||||||
} else {
|
} else {
|
||||||
return error.UnexpectedCharacter;
|
return error.UnexpectedCharacter;
|
||||||
|
@ -440,7 +441,7 @@ fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute {
|
||||||
return attr;
|
return attr;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
|
fn tryParseElement(ctx: *ParseContext, alloc: Allocator, parent: ?*Element) !?*Element {
|
||||||
const start = ctx.offset;
|
const start = ctx.offset;
|
||||||
if (!ctx.eat('<')) return null;
|
if (!ctx.eat('<')) return null;
|
||||||
const tag = parseNameNoDupe(ctx) catch {
|
const tag = parseNameNoDupe(ctx) catch {
|
||||||
|
@ -469,7 +470,7 @@ fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = try parseContent(ctx, alloc);
|
const content = try parseContent(ctx, alloc, element);
|
||||||
try element.children.append(content);
|
try element.children.append(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -480,6 +481,23 @@ fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
|
||||||
|
|
||||||
_ = ctx.eatWs();
|
_ = ctx.eatWs();
|
||||||
try ctx.expect('>');
|
try ctx.expect('>');
|
||||||
|
|
||||||
|
if (parent) |p| {
|
||||||
|
var last_element: ?*Element = null;
|
||||||
|
|
||||||
|
for (0..p.children.items.len) |i| {
|
||||||
|
const child = p.children.items[p.children.items.len - i - 1];
|
||||||
|
if (child == .Element) {
|
||||||
|
last_element = child.Element;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last_element) |lc| {
|
||||||
|
lc.next_sibling = element;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return element;
|
return element;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -490,13 +508,13 @@ test "tryParseElement" {
|
||||||
|
|
||||||
{
|
{
|
||||||
var ctx = ParseContext.init("<= a='b'/>");
|
var ctx = ParseContext.init("<= a='b'/>");
|
||||||
try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc));
|
try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc, null));
|
||||||
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
|
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
var ctx = ParseContext.init("<python size='15' color = \"green\"/>");
|
var ctx = ParseContext.init("<python size='15' color = \"green\"/>");
|
||||||
const elem = try tryParseElement(&ctx, alloc);
|
const elem = try tryParseElement(&ctx, alloc, null);
|
||||||
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
||||||
|
|
||||||
const size_attr = elem.?.attributes.items[0];
|
const size_attr = elem.?.attributes.items[0];
|
||||||
|
@ -510,14 +528,14 @@ test "tryParseElement" {
|
||||||
|
|
||||||
{
|
{
|
||||||
var ctx = ParseContext.init("<python>test</python>");
|
var ctx = ParseContext.init("<python>test</python>");
|
||||||
const elem = try tryParseElement(&ctx, alloc);
|
const elem = try tryParseElement(&ctx, alloc, null);
|
||||||
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
||||||
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test");
|
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>");
|
var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>");
|
||||||
const elem = try tryParseElement(&ctx, alloc);
|
const elem = try tryParseElement(&ctx, alloc, null);
|
||||||
try testing.expectEqualSlices(u8, elem.?.tag, "a");
|
try testing.expectEqualSlices(u8, elem.?.tag, "a");
|
||||||
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b");
|
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b");
|
||||||
try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c");
|
try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c");
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const xml = @import("xml.zig");
|
const xml = @import("xml.zig");
|
||||||
const date = @import("date");
|
const date = @import("date");
|
||||||
|
const sm = @import("service_manifest");
|
||||||
|
|
||||||
const log = std.log.scoped(.xml_shaper);
|
const log = std.log.scoped(.xml_shaper);
|
||||||
|
|
||||||
|
@ -94,6 +95,53 @@ pub fn parse(comptime T: type, source: []const u8, options: ParseOptions) !Parse
|
||||||
return Parsed(T).init(arena_allocator, try parseInternal(T, root, opts), parsed);
|
return Parsed(T).init(arena_allocator, try parseInternal(T, root, opts), parsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub const XmlArrayStyle = enum {
|
||||||
|
collection, // Has a container element and list of child elements
|
||||||
|
repeated_root, // Repeats the same element without a container, e.g. S3 ListBucketResult
|
||||||
|
};
|
||||||
|
|
||||||
|
fn detectArrayStyle(comptime T: type, element: *xml.Element, options: ParseOptions) !XmlArrayStyle {
|
||||||
|
_ = options;
|
||||||
|
|
||||||
|
if (@typeInfo(T) != .@"struct") {
|
||||||
|
return .collection;
|
||||||
|
}
|
||||||
|
|
||||||
|
// does the element have child elements that match our expected struct?
|
||||||
|
const field_names = comptime blk: {
|
||||||
|
var result: [std.meta.fieldNames(T).len][]const u8 = undefined;
|
||||||
|
|
||||||
|
for (std.meta.fieldNames(T), 0..) |field_name, i| {
|
||||||
|
result[i] = if (@hasDecl(T, "fieldNameFor"))
|
||||||
|
T.fieldNameFor(undefined, field_name)
|
||||||
|
else
|
||||||
|
field_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
break :blk result;
|
||||||
|
};
|
||||||
|
|
||||||
|
var matching_fields: usize = 0;
|
||||||
|
for (element.children.items) |content| {
|
||||||
|
switch (content) {
|
||||||
|
.Element => |el| {
|
||||||
|
for (field_names) |field_name| {
|
||||||
|
if (std.mem.eql(u8, field_name, el.tag)) {
|
||||||
|
matching_fields += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matching_fields > 0) {
|
||||||
|
return .repeated_root;
|
||||||
|
}
|
||||||
|
|
||||||
|
return .collection;
|
||||||
|
}
|
||||||
|
|
||||||
fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T {
|
fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions) !T {
|
||||||
switch (@typeInfo(T)) {
|
switch (@typeInfo(T)) {
|
||||||
.bool => {
|
.bool => {
|
||||||
|
@ -330,23 +378,31 @@ fn parseInternal(comptime T: type, element: *xml.Element, options: ParseOptions)
|
||||||
// <Item>bar</Item>
|
// <Item>bar</Item>
|
||||||
// <Items>
|
// <Items>
|
||||||
if (ptr_info.child != u8) {
|
if (ptr_info.child != u8) {
|
||||||
log.debug("type = {s}, ptr_info.child == {s}, element = {s}", .{ @typeName(T), @typeName(ptr_info.child), element.tag });
|
const array_style = try detectArrayStyle(ptr_info.child, element, options);
|
||||||
var iterator = element.elements();
|
|
||||||
|
std.log.debug("type = {s}, style = {s}, ptr_info.child == {s}, element = {s}", .{ @typeName(T), @tagName(array_style), @typeName(ptr_info.child), element.tag });
|
||||||
|
|
||||||
var children = std.ArrayList(ptr_info.child).init(allocator);
|
var children = std.ArrayList(ptr_info.child).init(allocator);
|
||||||
defer children.deinit();
|
defer children.deinit();
|
||||||
while (iterator.next()) |child_element| {
|
|
||||||
try children.append(try parseInternal(ptr_info.child, child_element, options));
|
switch (array_style) {
|
||||||
|
.collection => {
|
||||||
|
var iterator = element.elements();
|
||||||
|
while (iterator.next()) |child_element| {
|
||||||
|
try children.append(try parseInternal(ptr_info.child, child_element, options));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.repeated_root => {
|
||||||
|
var current: ?*Element = element;
|
||||||
|
while (current) |el| : (current = el.next_sibling) {
|
||||||
|
if (!std.mem.eql(u8, el.tag, element.tag)) continue;
|
||||||
|
|
||||||
|
try children.append(try parseInternal(ptr_info.child, el, options));
|
||||||
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return children.toOwnedSlice();
|
return children.toOwnedSlice();
|
||||||
// var inx: usize = 0;
|
|
||||||
// while (inx < children.len) {
|
|
||||||
// switch (element.children.items[inx]) {
|
|
||||||
// .Element => children[inx] = try parseInternal(ptr_info.child, element.children.items[inx].Element, options),
|
|
||||||
// .CharData => children[inx] = try allocator.dupe(u8, element.children.items[inx].CharData),
|
|
||||||
// .Comment => children[inx] = try allocator.dupe(u8, element.children.items[inx].Comment), // This might be an error...
|
|
||||||
// }
|
|
||||||
// inx += 1;
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
return try allocator.dupe(u8, element.children.items[0].CharData);
|
return try allocator.dupe(u8, element.children.items[0].CharData);
|
||||||
},
|
},
|
||||||
|
@ -738,3 +794,33 @@ test "compiler assertion failure 2" {
|
||||||
defer parsed_data.deinit();
|
defer parsed_data.deinit();
|
||||||
try testing.expect(parsed_data.parsed_value.key_group_list.?.quantity == 42);
|
try testing.expect(parsed_data.parsed_value.key_group_list.?.quantity == 42);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "can parse list objects" {
|
||||||
|
const data =
|
||||||
|
\\<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
\\<ListBucketResult>
|
||||||
|
\\ <Contents>
|
||||||
|
\\ <Key>file1.txt</Key>
|
||||||
|
\\ <Size>1024</Size>
|
||||||
|
\\ </Contents>
|
||||||
|
\\ <Contents>
|
||||||
|
\\ <Key>file2.jpg</Key>
|
||||||
|
\\ <Size>2048</Size>
|
||||||
|
\\ </Contents>
|
||||||
|
\\</ListBucketResult>
|
||||||
|
;
|
||||||
|
|
||||||
|
const Response = sm.s3.list_objects_v2.Response;
|
||||||
|
|
||||||
|
const parsed_data = try parse(Response, data, .{ .allocator = testing.allocator });
|
||||||
|
defer parsed_data.deinit();
|
||||||
|
|
||||||
|
const response: Response = parsed_data.parsed_value;
|
||||||
|
const s3_objects: []sm.s3.Object = response.contents.?;
|
||||||
|
|
||||||
|
try testing.expectEqual(2, s3_objects.len);
|
||||||
|
try testing.expectEqualStrings(s3_objects[0].key.?, "file1.txt");
|
||||||
|
try testing.expectEqualStrings(s3_objects[1].key.?, "file2.jpg");
|
||||||
|
try testing.expectEqual(s3_objects[0].size.?, 1024);
|
||||||
|
try testing.expectEqual(s3_objects[1].size.?, 2048);
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue