Compare commits

...

5 Commits

Author SHA1 Message Date
ca801799fc
update README with new XML information
All checks were successful
continuous-integration/drone/push Build is passing
2022-02-10 15:29:51 -08:00
f374df3fa1
update generated models for xml mappings 2022-02-10 15:13:44 -08:00
744d834cfd
add predicate option to xml parser 2022-02-10 15:07:50 -08:00
c9369504fa
re-import xml.zig from Vulkan project 2022-02-10 10:20:00 -08:00
4606205b82
update readme for prioritization plan 2022-02-10 09:45:18 -08:00
6 changed files with 717 additions and 40 deletions

View File

@ -2,13 +2,11 @@
[![Build Status](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/status.svg?ref=refs/heads/master)](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/)
### NOTE: All tests pass, but credentials currently must be passed through environment
This SDK currently supports all AWS services except EC2 and S3. These two
services only support XML, and zig 0.8.0 and master both trigger compile
errors while incorporating the XML parser. S3 also requires some plumbing
tweaks in the signature calculation. Examples of usage are in src/main.zig.
services only support XML, and zig 0.9.0 and master both trigger compile
errors while incorporating the XML parser in conjunction with a process
to fill the types. S3 also requires some plumbing tweaks in the signature
calculation. Examples of usage are in src/main.zig.
Current executable size for the demo is 953k (90k of which is the AWS PEM file)
after compiling with -Drelease-safe and
@ -43,37 +41,33 @@ supersede all other configuration. Note that an alternative endpoint may
require passing in a client option to specify an different TLS root certificate
(pass null to disable certificate verification).
Given that credential handling is still very basic, you may want to look at
the [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) if
your needs include something more robust. Note that that branch supports
x86_64 linux only.
The [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) exists
for posterity, and supports x86_64 linux. This branch is recommended moving
forward.
## Limitations
There are many nuances of AWS V4 signature calculation. S3 is not supported
because it uses many of these test cases. STS tokens using a session token
are not yet implemented, though should be trivial. I have also seen a few
service errors caused by discrepancies in signatures, though I don't know yet
if this was an issue in the service itself (has not repro'd) or if there
is a latent bug.
Only environment variable based credentials can be used at the moment.
because it uses many of these test cases. WebIdentityToken is not yet
implemented.
TODO List:
* Add option to cache signature keys
* Implement credentials provider
* Implement jitter/exponential backoff
* Implement timeouts and other TODO's in the code
* To work around compiler issues, the best option may be to convert from
Xml to json, then parse from there. This will be pursued first. It may need
to wait for zig 0.10.0 when self-hosted compiler is likely to be completed
(zig 0.10.0 eta May 2022) discovered. If we need to wait, S3, EC2 and other
restXml protocols will be blocked.
* Implement [AWS restXml protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-restxml-protocol.html).
Includes S3. Total service count 4. This may be blocked due to the same issue as EC2.
* Implement [AWS EC2 query protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-ec2-query-protocol.html).
Includes EC2. Total service count 1. This may be blocked on a compiler bug,
though has not been tested with zig 0.9.0. It may need to wait for zig 0.10.0
when self-hosted compiler is likely to be completed (zig 0.10.0 eta May 2022)
discovered. More details and llvm ir log can be found in the
though has not been tested with zig 0.9.0. More details and llvm ir log can be found in the
[XML branch](https://git.lerch.org/lobo/aws-sdk-for-zig/src/branch/xml).
* Implement sigv4a signing
* Implement jitter/exponential backoff
* Implement timeouts and other TODO's in the code
* Add option to cache signature keys
Compiler wishlist/watchlist:

View File

@ -544,12 +544,12 @@ fn generateSimpleTypeFor(_: anytype, type_name: []const u8, writer: anytype) !vo
}
fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, type_type_name: []const u8, writer: anytype, state: GenerationState) anyerror!void {
_ = shape_id;
const Mapping = struct { snake: []const u8, json: []const u8 };
var json_field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len);
const Mapping = struct { snake: []const u8, original: []const u8 };
var field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len);
defer {
for (json_field_name_mappings.items) |mapping|
for (field_name_mappings.items) |mapping|
state.allocator.free(mapping.snake);
json_field_name_mappings.deinit();
field_name_mappings.deinit();
}
// There is an httpQueryParams trait as well, but nobody is using it. API GW
// pretends to, but it's an empty map
@ -591,15 +591,19 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty
switch (trait) {
.json_name => {
found_name_trait = true;
json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.json_name });
field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.json_name });
},
.http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_query }),
.http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_header }),
.xml_name => {
found_name_trait = true;
field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.xml_name });
},
.http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_query }),
.http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_header }),
else => {},
}
}
if (!found_name_trait)
json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = member.name });
field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = member.name });
defer state.allocator.free(snake_case_member);
try outputIndent(child_state, writer);
const member_name = avoidReserved(snake_case_member);
@ -637,11 +641,11 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty
//
try writer.writeByte('\n');
try outputIndent(child_state, writer);
_ = try writer.write("pub fn jsonFieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n");
_ = try writer.write("pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n");
var grandchild_state = child_state;
grandchild_state.indent_level += 1;
// We need to force output here becaseu we're referencing the field in the return statement below
try writeMappings(grandchild_state, "", "mappings", json_field_name_mappings, true, writer);
try writeMappings(grandchild_state, "", "mappings", field_name_mappings, true, writer);
try outputIndent(grandchild_state, writer);
_ = try writer.write("return @field(mappings, field_name);\n");
try outputIndent(child_state, writer);
@ -667,7 +671,7 @@ fn writeStringify(state: GenerationState, fields: [][]const u8, writer: anytype)
try outputIndent(child_state, writer);
try writer.print("if (std.mem.eql(u8, \"{s}\", field_name))\n", .{field});
try outputIndent(return_state, writer);
try writer.print("return try serializeMap(self.{s}, self.jsonFieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field });
try writer.print("return try serializeMap(self.{s}, self.fieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field });
}
try outputIndent(child_state, writer);
_ = try writer.write("return false;\n");
@ -690,7 +694,7 @@ fn writeMappings(state: GenerationState, @"pub": []const u8, mapping_name: []con
child_state.indent_level += 1;
for (mappings.items) |mapping| {
try outputIndent(child_state, writer);
try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.json });
try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.original });
}
try outputIndent(state, writer);
_ = try writer.write("};\n");

View File

@ -96,6 +96,7 @@ pub const TraitType = enum {
http_label,
http_query,
json_name,
xml_name,
required,
documentation,
pattern,
@ -118,6 +119,7 @@ pub const Trait = union(TraitType) {
aws_protocol: AwsProtocol,
ec2_query_name: []const u8,
json_name: []const u8,
xml_name: []const u8,
http: struct {
method: []const u8,
uri: []const u8,
@ -565,6 +567,8 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra
}
if (std.mem.eql(u8, trait_type, "smithy.api#jsonName"))
return Trait{ .json_name = value.String };
if (std.mem.eql(u8, trait_type, "smithy.api#xmlName"))
return Trait{ .xml_name = value.String };
if (std.mem.eql(u8, trait_type, "smithy.api#httpQuery"))
return Trait{ .http_query = value.String };
if (std.mem.eql(u8, trait_type, "smithy.api#httpHeader"))
@ -617,7 +621,6 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra
\\smithy.api#timestampFormat
\\smithy.api#xmlAttribute
\\smithy.api#xmlFlattened
\\smithy.api#xmlName
\\smithy.waiters#waitable
;
var iterator = std.mem.split(u8, list, "\n");

View File

@ -432,7 +432,7 @@ fn buildPath(allocator: std.mem.Allocator, raw_uri: []const u8, comptime ActionR
in_var = false;
const replacement_var = raw_uri[start..inx];
inline for (std.meta.fields(ActionRequest)) |field| {
if (std.mem.eql(u8, request.jsonFieldNameFor(field.name), replacement_var)) {
if (std.mem.eql(u8, request.fieldNameFor(field.name), replacement_var)) {
var replacement_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len);
defer replacement_buffer.deinit();
var encoded_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len);

View File

@ -2871,8 +2871,8 @@ pub fn stringify(
field_written = try value.jsonStringifyField(Field.name, child_options, out_stream);
if (!field_written) {
if (comptime std.meta.trait.hasFn("jsonFieldNameFor")(T)) {
const name = value.jsonFieldNameFor(Field.name);
if (comptime std.meta.trait.hasFn("fieldNameFor")(T)) {
const name = value.fieldNameFor(Field.name);
try stringify(name, options, out_stream);
} else {
try stringify(Field.name, options, out_stream);

676
src/xml.zig Normal file
View File

@ -0,0 +1,676 @@
// File sourced from:
// https://github.com/Snektron/vulkan-zig/blob/797ae8af88e84753af9640266de61a985b76b580/generator/xml.zig
const std = @import("std");
const mem = std.mem;
const testing = std.testing;
const Allocator = mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const ArrayList = std.ArrayList;
pub const Attribute = struct {
name: []const u8,
value: []const u8,
};
pub const Content = union(enum) {
CharData: []const u8,
Comment: []const u8,
Element: *Element,
};
pub const Element = struct {
pub const AttributeList = ArrayList(*Attribute);
pub const ContentList = ArrayList(Content);
tag: []const u8,
attributes: AttributeList,
children: ContentList,
fn init(tag: []const u8, alloc: Allocator) Element {
return .{
.tag = tag,
.attributes = AttributeList.init(alloc),
.children = ContentList.init(alloc),
};
}
pub fn getAttribute(self: *Element, attrib_name: []const u8) ?[]const u8 {
for (self.attributes.items) |child| {
if (mem.eql(u8, child.name, attrib_name)) {
return child.value;
}
}
return null;
}
pub fn getCharData(self: *Element, child_tag: []const u8) ?[]const u8 {
const child = self.findChildByTag(child_tag) orelse return null;
if (child.children.items.len != 1) {
return null;
}
return switch (child.children.items[0]) {
.CharData => |char_data| char_data,
else => null,
};
}
pub fn iterator(self: *Element) ChildIterator {
return .{
.items = self.children.items,
.i = 0,
};
}
pub fn elements(self: *Element) ChildElementIterator {
return .{
.inner = self.iterator(),
};
}
pub fn findChildByTag(self: *Element, tag: []const u8) !?*Element {
return try self.findChildrenByTag(tag).next();
}
pub fn findChildrenByTag(self: *Element, tag: []const u8) FindChildrenByTagIterator {
return .{
.inner = self.elements(),
.tag = tag,
};
}
pub const ChildIterator = struct {
items: []Content,
i: usize,
pub fn next(self: *ChildIterator) ?*Content {
if (self.i < self.items.len) {
self.i += 1;
return &self.items[self.i - 1];
}
return null;
}
};
pub const ChildElementIterator = struct {
inner: ChildIterator,
pub fn next(self: *ChildElementIterator) ?*Element {
while (self.inner.next()) |child| {
if (child.* != .Element) {
continue;
}
return child.*.Element;
}
return null;
}
};
fn strictEqual(a: []const u8, b: []const u8, _: PredicateOptions) !bool {
return mem.eql(u8, a, b);
}
pub const FindChildrenByTagIterator = struct {
inner: ChildElementIterator,
tag: []const u8,
predicate: fn (a: []const u8, b: []const u8, options: PredicateOptions) anyerror!bool = strictEqual,
predicate_options: PredicateOptions = .{},
pub fn next(self: *FindChildrenByTagIterator) !?*Element {
while (self.inner.next()) |child| {
if (!try self.predicate(child.tag, self.tag, self.predicate_options)) {
continue;
}
return child;
}
return null;
}
};
};
pub const PredicateOptions = struct {
allocator: ?std.mem.Allocator = null,
};
pub const XmlDecl = struct {
version: []const u8,
encoding: ?[]const u8,
standalone: ?bool,
};
pub const Document = struct {
arena: ArenaAllocator,
xml_decl: ?*XmlDecl,
root: *Element,
pub fn deinit(self: Document) void {
var arena = self.arena; // Copy to stack so self can be taken by value.
arena.deinit();
}
};
const ParseContext = struct {
source: []const u8,
offset: usize,
line: usize,
column: usize,
fn init(source: []const u8) ParseContext {
return .{
.source = source,
.offset = 0,
.line = 0,
.column = 0,
};
}
fn peek(self: *ParseContext) ?u8 {
return if (self.offset < self.source.len) self.source[self.offset] else null;
}
fn consume(self: *ParseContext) !u8 {
if (self.offset < self.source.len) {
return self.consumeNoEof();
}
return error.UnexpectedEof;
}
fn consumeNoEof(self: *ParseContext) u8 {
std.debug.assert(self.offset < self.source.len);
const c = self.source[self.offset];
self.offset += 1;
if (c == '\n') {
self.line += 1;
self.column = 0;
} else {
self.column += 1;
}
return c;
}
fn eat(self: *ParseContext, char: u8) bool {
self.expect(char) catch return false;
return true;
}
fn expect(self: *ParseContext, expected: u8) !void {
if (self.peek()) |actual| {
if (expected != actual) {
return error.UnexpectedCharacter;
}
_ = self.consumeNoEof();
return;
}
return error.UnexpectedEof;
}
fn eatStr(self: *ParseContext, text: []const u8) bool {
self.expectStr(text) catch return false;
return true;
}
fn expectStr(self: *ParseContext, text: []const u8) !void {
if (self.source.len < self.offset + text.len) {
return error.UnexpectedEof;
} else if (std.mem.startsWith(u8, self.source[self.offset..], text)) {
var i: usize = 0;
while (i < text.len) : (i += 1) {
_ = self.consumeNoEof();
}
return;
}
return error.UnexpectedCharacter;
}
fn eatWs(self: *ParseContext) bool {
var ws = false;
while (self.peek()) |ch| {
switch (ch) {
' ', '\t', '\n', '\r' => {
ws = true;
_ = self.consumeNoEof();
},
else => break,
}
}
return ws;
}
fn expectWs(self: *ParseContext) !void {
if (!self.eatWs()) return error.UnexpectedCharacter;
}
fn currentLine(self: ParseContext) []const u8 {
var begin: usize = 0;
if (mem.lastIndexOfScalar(u8, self.source[0..self.offset], '\n')) |prev_nl| {
begin = prev_nl + 1;
}
var end = mem.indexOfScalarPos(u8, self.source, self.offset, '\n') orelse self.source.len;
return self.source[begin..end];
}
};
test "ParseContext" {
{
var ctx = ParseContext.init("I like pythons");
try testing.expectEqual(@as(?u8, 'I'), ctx.peek());
try testing.expectEqual(@as(u8, 'I'), ctx.consumeNoEof());
try testing.expectEqual(@as(?u8, ' '), ctx.peek());
try testing.expectEqual(@as(u8, ' '), try ctx.consume());
try testing.expect(ctx.eat('l'));
try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
try testing.expectEqual(false, ctx.eat('a'));
try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
try ctx.expect('i');
try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
try testing.expectError(error.UnexpectedCharacter, ctx.expect('a'));
try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
try testing.expect(ctx.eatStr("ke"));
try testing.expectEqual(@as(?u8, ' '), ctx.peek());
try testing.expect(ctx.eatWs());
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectEqual(false, ctx.eatWs());
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectEqual(false, ctx.eatStr("aaaaaaaaa"));
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectError(error.UnexpectedEof, ctx.expectStr("aaaaaaaaa"));
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectError(error.UnexpectedCharacter, ctx.expectStr("pytn"));
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try ctx.expectStr("python");
try testing.expectEqual(@as(?u8, 's'), ctx.peek());
}
{
var ctx = ParseContext.init("");
try testing.expectEqual(ctx.peek(), null);
try testing.expectError(error.UnexpectedEof, ctx.consume());
try testing.expectEqual(ctx.eat('p'), false);
try testing.expectError(error.UnexpectedEof, ctx.expect('p'));
}
}
pub const ParseError = error{
IllegalCharacter,
UnexpectedEof,
UnexpectedCharacter,
UnclosedValue,
UnclosedComment,
InvalidName,
InvalidEntity,
InvalidStandaloneValue,
NonMatchingClosingTag,
InvalidDocument,
OutOfMemory,
};
pub fn parse(backing_allocator: Allocator, source: []const u8) !Document {
var ctx = ParseContext.init(source);
return try parseDocument(&ctx, backing_allocator);
}
fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document {
var doc = Document{
.arena = ArenaAllocator.init(backing_allocator),
.xml_decl = null,
.root = undefined,
};
errdefer doc.deinit();
const allocator = doc.arena.allocator();
try trySkipComments(ctx, allocator);
doc.xml_decl = try tryParseProlog(ctx, allocator);
_ = ctx.eatWs();
try trySkipComments(ctx, allocator);
doc.root = (try tryParseElement(ctx, allocator)) orelse return error.InvalidDocument;
_ = ctx.eatWs();
try trySkipComments(ctx, allocator);
if (ctx.peek() != null) return error.InvalidDocument;
return doc;
}
fn parseAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
const quote = try ctx.consume();
if (quote != '"' and quote != '\'') return error.UnexpectedCharacter;
const begin = ctx.offset;
while (true) {
const c = ctx.consume() catch return error.UnclosedValue;
if (c == quote) break;
}
const end = ctx.offset - 1;
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
}
fn parseEqAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
_ = ctx.eatWs();
try ctx.expect('=');
_ = ctx.eatWs();
return try parseAttrValue(ctx, alloc);
}
fn parseNameNoDupe(ctx: *ParseContext) ![]const u8 {
// XML's spec on names is very long, so to make this easier
// we just take any character that is not special and not whitespace
const begin = ctx.offset;
while (ctx.peek()) |ch| {
switch (ch) {
' ', '\t', '\n', '\r' => break,
'&', '"', '\'', '<', '>', '?', '=', '/' => break,
else => _ = ctx.consumeNoEof(),
}
}
const end = ctx.offset;
if (begin == end) return error.InvalidName;
return ctx.source[begin..end];
}
fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
const begin = ctx.offset;
while (ctx.peek()) |ch| {
switch (ch) {
'<' => break,
else => _ = ctx.consumeNoEof(),
}
}
const end = ctx.offset;
if (begin == end) return null;
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
}
fn parseContent(ctx: *ParseContext, alloc: Allocator) ParseError!Content {
if (try tryParseCharData(ctx, alloc)) |cd| {
return Content{ .CharData = cd };
} else if (try tryParseComment(ctx, alloc)) |comment| {
return Content{ .Comment = comment };
} else if (try tryParseElement(ctx, alloc)) |elem| {
return Content{ .Element = elem };
} else {
return error.UnexpectedCharacter;
}
}
fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute {
const name = parseNameNoDupe(ctx) catch return null;
_ = ctx.eatWs();
try ctx.expect('=');
_ = ctx.eatWs();
const value = try parseAttrValue(ctx, alloc);
const attr = try alloc.create(Attribute);
attr.name = try alloc.dupe(u8, name);
attr.value = value;
return attr;
}
fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
const start = ctx.offset;
if (!ctx.eat('<')) return null;
const tag = parseNameNoDupe(ctx) catch {
ctx.offset = start;
return null;
};
const element = try alloc.create(Element);
element.* = Element.init(try alloc.dupe(u8, tag), alloc);
while (ctx.eatWs()) {
const attr = (try tryParseAttr(ctx, alloc)) orelse break;
try element.attributes.append(attr);
}
if (ctx.eatStr("/>")) {
return element;
}
try ctx.expect('>');
while (true) {
if (ctx.peek() == null) {
return error.UnexpectedEof;
} else if (ctx.eatStr("</")) {
break;
}
const content = try parseContent(ctx, alloc);
try element.children.append(content);
}
const closing_tag = try parseNameNoDupe(ctx);
if (!std.mem.eql(u8, tag, closing_tag)) {
return error.NonMatchingClosingTag;
}
_ = ctx.eatWs();
try ctx.expect('>');
return element;
}
test "tryParseElement" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
{
var ctx = ParseContext.init("<= a='b'/>");
try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc));
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
}
{
var ctx = ParseContext.init("<python size='15' color = \"green\"/>");
const elem = try tryParseElement(&ctx, alloc);
try testing.expectEqualSlices(u8, elem.?.tag, "python");
const size_attr = elem.?.attributes.items[0];
try testing.expectEqualSlices(u8, size_attr.name, "size");
try testing.expectEqualSlices(u8, size_attr.value, "15");
const color_attr = elem.?.attributes.items[1];
try testing.expectEqualSlices(u8, color_attr.name, "color");
try testing.expectEqualSlices(u8, color_attr.value, "green");
}
{
var ctx = ParseContext.init("<python>test</python>");
const elem = try tryParseElement(&ctx, alloc);
try testing.expectEqualSlices(u8, elem.?.tag, "python");
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test");
}
{
var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>");
const elem = try tryParseElement(&ctx, alloc);
try testing.expectEqualSlices(u8, elem.?.tag, "a");
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b");
try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c");
try testing.expectEqualSlices(u8, elem.?.children.items[2].CharData, "d");
try testing.expectEqualSlices(u8, elem.?.children.items[3].Element.tag, "e");
try testing.expectEqualSlices(u8, elem.?.children.items[4].CharData, "f");
try testing.expectEqualSlices(u8, elem.?.children.items[5].Comment, "g");
}
}
fn tryParseProlog(ctx: *ParseContext, alloc: Allocator) !?*XmlDecl {
const start = ctx.offset;
if (!ctx.eatStr("<?") or !mem.eql(u8, try parseNameNoDupe(ctx), "xml")) {
ctx.offset = start;
return null;
}
const decl = try alloc.create(XmlDecl);
decl.encoding = null;
decl.standalone = null;
// Version info is mandatory
try ctx.expectWs();
try ctx.expectStr("version");
decl.version = try parseEqAttrValue(ctx, alloc);
if (ctx.eatWs()) {
// Optional encoding and standalone info
var require_ws = false;
if (ctx.eatStr("encoding")) {
decl.encoding = try parseEqAttrValue(ctx, alloc);
require_ws = true;
}
if (require_ws == ctx.eatWs() and ctx.eatStr("standalone")) {
const standalone = try parseEqAttrValue(ctx, alloc);
if (std.mem.eql(u8, standalone, "yes")) {
decl.standalone = true;
} else if (std.mem.eql(u8, standalone, "no")) {
decl.standalone = false;
} else {
return error.InvalidStandaloneValue;
}
}
_ = ctx.eatWs();
}
try ctx.expectStr("?>");
return decl;
}
test "tryParseProlog" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
{
var ctx = ParseContext.init("<?xmla version='aa'?>");
try testing.expectEqual(@as(?*XmlDecl, null), try tryParseProlog(&ctx, alloc));
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
}
{
var ctx = ParseContext.init("<?xml version='aa'?>");
const decl = try tryParseProlog(&ctx, alloc);
try testing.expectEqualSlices(u8, "aa", decl.?.version);
try testing.expectEqual(@as(?[]const u8, null), decl.?.encoding);
try testing.expectEqual(@as(?bool, null), decl.?.standalone);
}
{
var ctx = ParseContext.init("<?xml version=\"aa\" encoding = 'bbb' standalone \t = 'yes'?>");
const decl = try tryParseProlog(&ctx, alloc);
try testing.expectEqualSlices(u8, "aa", decl.?.version);
try testing.expectEqualSlices(u8, "bbb", decl.?.encoding.?);
try testing.expectEqual(@as(?bool, true), decl.?.standalone.?);
}
}
fn trySkipComments(ctx: *ParseContext, alloc: Allocator) !void {
while (try tryParseComment(ctx, alloc)) |_| {
_ = ctx.eatWs();
}
}
fn tryParseComment(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
if (!ctx.eatStr("<!--")) return null;
const begin = ctx.offset;
while (!ctx.eatStr("-->")) {
_ = ctx.consume() catch return error.UnclosedComment;
}
const end = ctx.offset - "-->".len;
return try alloc.dupe(u8, ctx.source[begin..end]);
}
fn unescapeEntity(text: []const u8) !u8 {
const EntitySubstition = struct { text: []const u8, replacement: u8 };
const entities = [_]EntitySubstition{
.{ .text = "&lt;", .replacement = '<' },
.{ .text = "&gt;", .replacement = '>' },
.{ .text = "&amp;", .replacement = '&' },
.{ .text = "&apos;", .replacement = '\'' },
.{ .text = "&quot;", .replacement = '"' },
};
for (entities) |entity| {
if (std.mem.eql(u8, text, entity.text)) return entity.replacement;
}
return error.InvalidEntity;
}
fn dupeAndUnescape(alloc: Allocator, text: []const u8) ![]const u8 {
const str = try alloc.alloc(u8, text.len);
var j: usize = 0;
var i: usize = 0;
while (i < text.len) : (j += 1) {
if (text[i] == '&') {
const entity_end = 1 + (mem.indexOfScalarPos(u8, text, i, ';') orelse return error.InvalidEntity);
str[j] = try unescapeEntity(text[i..entity_end]);
i = entity_end;
} else {
str[j] = text[i];
i += 1;
}
}
return alloc.shrink(str, j);
}
test "dupeAndUnescape" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
try testing.expectEqualSlices(u8, "test", try dupeAndUnescape(alloc, "test"));
try testing.expectEqualSlices(u8, "a<b&c>d\"e'f<", try dupeAndUnescape(alloc, "a&lt;b&amp;c&gt;d&quot;e&apos;f&lt;"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&&"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&test;"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&boa"));
}
test "Top level comments" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
const doc = try parse(alloc, "<?xml version='aa'?><!--comment--><python color='green'/><!--another comment-->");
try testing.expectEqualSlices(u8, "python", doc.root.tag);
}