Compare commits

...

5 Commits

Author SHA1 Message Date
ca801799fc
update README with new XML information
All checks were successful
continuous-integration/drone/push Build is passing
2022-02-10 15:29:51 -08:00
f374df3fa1
update generated models for xml mappings 2022-02-10 15:13:44 -08:00
744d834cfd
add predicate option to xml parser 2022-02-10 15:07:50 -08:00
c9369504fa
re-import xml.zig from Vulkan project 2022-02-10 10:20:00 -08:00
4606205b82
update readme for prioritization plan 2022-02-10 09:45:18 -08:00
6 changed files with 717 additions and 40 deletions

View File

@ -2,13 +2,11 @@
[![Build Status](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/status.svg?ref=refs/heads/master)](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/) [![Build Status](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/status.svg?ref=refs/heads/master)](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/)
### NOTE: All tests pass, but credentials currently must be passed through environment
This SDK currently supports all AWS services except EC2 and S3. These two This SDK currently supports all AWS services except EC2 and S3. These two
services only support XML, and zig 0.8.0 and master both trigger compile services only support XML, and zig 0.9.0 and master both trigger compile
errors while incorporating the XML parser. S3 also requires some plumbing errors while incorporating the XML parser in conjunction with a process
tweaks in the signature calculation. Examples of usage are in src/main.zig. to fill the types. S3 also requires some plumbing tweaks in the signature
calculation. Examples of usage are in src/main.zig.
Current executable size for the demo is 953k (90k of which is the AWS PEM file) Current executable size for the demo is 953k (90k of which is the AWS PEM file)
after compiling with -Drelease-safe and after compiling with -Drelease-safe and
@ -43,37 +41,33 @@ supersede all other configuration. Note that an alternative endpoint may
require passing in a client option to specify an different TLS root certificate require passing in a client option to specify an different TLS root certificate
(pass null to disable certificate verification). (pass null to disable certificate verification).
Given that credential handling is still very basic, you may want to look at The [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) exists
the [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) if for posterity, and supports x86_64 linux. This branch is recommended moving
your needs include something more robust. Note that that branch supports forward.
x86_64 linux only.
## Limitations ## Limitations
There are many nuances of AWS V4 signature calculation. S3 is not supported There are many nuances of AWS V4 signature calculation. S3 is not supported
because it uses many of these test cases. STS tokens using a session token because it uses many of these test cases. WebIdentityToken is not yet
are not yet implemented, though should be trivial. I have also seen a few implemented.
service errors caused by discrepancies in signatures, though I don't know yet
if this was an issue in the service itself (has not repro'd) or if there
is a latent bug.
Only environment variable based credentials can be used at the moment.
TODO List: TODO List:
* Add option to cache signature keys * To work around compiler issues, the best option may be to convert from
* Implement credentials provider Xml to json, then parse from there. This will be pursued first. It may need
* Implement jitter/exponential backoff to wait for zig 0.10.0 when self-hosted compiler is likely to be completed
* Implement timeouts and other TODO's in the code (zig 0.10.0 eta May 2022) discovered. If we need to wait, S3, EC2 and other
restXml protocols will be blocked.
* Implement [AWS restXml protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-restxml-protocol.html). * Implement [AWS restXml protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-restxml-protocol.html).
Includes S3. Total service count 4. This may be blocked due to the same issue as EC2. Includes S3. Total service count 4. This may be blocked due to the same issue as EC2.
* Implement [AWS EC2 query protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-ec2-query-protocol.html). * Implement [AWS EC2 query protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-ec2-query-protocol.html).
Includes EC2. Total service count 1. This may be blocked on a compiler bug, Includes EC2. Total service count 1. This may be blocked on a compiler bug,
though has not been tested with zig 0.9.0. It may need to wait for zig 0.10.0 though has not been tested with zig 0.9.0. More details and llvm ir log can be found in the
when self-hosted compiler is likely to be completed (zig 0.10.0 eta May 2022)
discovered. More details and llvm ir log can be found in the
[XML branch](https://git.lerch.org/lobo/aws-sdk-for-zig/src/branch/xml). [XML branch](https://git.lerch.org/lobo/aws-sdk-for-zig/src/branch/xml).
* Implement sigv4a signing * Implement sigv4a signing
* Implement jitter/exponential backoff
* Implement timeouts and other TODO's in the code
* Add option to cache signature keys
Compiler wishlist/watchlist: Compiler wishlist/watchlist:

View File

@ -544,12 +544,12 @@ fn generateSimpleTypeFor(_: anytype, type_name: []const u8, writer: anytype) !vo
} }
fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, type_type_name: []const u8, writer: anytype, state: GenerationState) anyerror!void { fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, type_type_name: []const u8, writer: anytype, state: GenerationState) anyerror!void {
_ = shape_id; _ = shape_id;
const Mapping = struct { snake: []const u8, json: []const u8 }; const Mapping = struct { snake: []const u8, original: []const u8 };
var json_field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len); var field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len);
defer { defer {
for (json_field_name_mappings.items) |mapping| for (field_name_mappings.items) |mapping|
state.allocator.free(mapping.snake); state.allocator.free(mapping.snake);
json_field_name_mappings.deinit(); field_name_mappings.deinit();
} }
// There is an httpQueryParams trait as well, but nobody is using it. API GW // There is an httpQueryParams trait as well, but nobody is using it. API GW
// pretends to, but it's an empty map // pretends to, but it's an empty map
@ -591,15 +591,19 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty
switch (trait) { switch (trait) {
.json_name => { .json_name => {
found_name_trait = true; found_name_trait = true;
json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.json_name }); field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.json_name });
}, },
.http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_query }), .xml_name => {
.http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_header }), found_name_trait = true;
field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.xml_name });
},
.http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_query }),
.http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_header }),
else => {}, else => {},
} }
} }
if (!found_name_trait) if (!found_name_trait)
json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = member.name }); field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = member.name });
defer state.allocator.free(snake_case_member); defer state.allocator.free(snake_case_member);
try outputIndent(child_state, writer); try outputIndent(child_state, writer);
const member_name = avoidReserved(snake_case_member); const member_name = avoidReserved(snake_case_member);
@ -637,11 +641,11 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty
// //
try writer.writeByte('\n'); try writer.writeByte('\n');
try outputIndent(child_state, writer); try outputIndent(child_state, writer);
_ = try writer.write("pub fn jsonFieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n"); _ = try writer.write("pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n");
var grandchild_state = child_state; var grandchild_state = child_state;
grandchild_state.indent_level += 1; grandchild_state.indent_level += 1;
// We need to force output here becaseu we're referencing the field in the return statement below // We need to force output here becaseu we're referencing the field in the return statement below
try writeMappings(grandchild_state, "", "mappings", json_field_name_mappings, true, writer); try writeMappings(grandchild_state, "", "mappings", field_name_mappings, true, writer);
try outputIndent(grandchild_state, writer); try outputIndent(grandchild_state, writer);
_ = try writer.write("return @field(mappings, field_name);\n"); _ = try writer.write("return @field(mappings, field_name);\n");
try outputIndent(child_state, writer); try outputIndent(child_state, writer);
@ -667,7 +671,7 @@ fn writeStringify(state: GenerationState, fields: [][]const u8, writer: anytype)
try outputIndent(child_state, writer); try outputIndent(child_state, writer);
try writer.print("if (std.mem.eql(u8, \"{s}\", field_name))\n", .{field}); try writer.print("if (std.mem.eql(u8, \"{s}\", field_name))\n", .{field});
try outputIndent(return_state, writer); try outputIndent(return_state, writer);
try writer.print("return try serializeMap(self.{s}, self.jsonFieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field }); try writer.print("return try serializeMap(self.{s}, self.fieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field });
} }
try outputIndent(child_state, writer); try outputIndent(child_state, writer);
_ = try writer.write("return false;\n"); _ = try writer.write("return false;\n");
@ -690,7 +694,7 @@ fn writeMappings(state: GenerationState, @"pub": []const u8, mapping_name: []con
child_state.indent_level += 1; child_state.indent_level += 1;
for (mappings.items) |mapping| { for (mappings.items) |mapping| {
try outputIndent(child_state, writer); try outputIndent(child_state, writer);
try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.json }); try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.original });
} }
try outputIndent(state, writer); try outputIndent(state, writer);
_ = try writer.write("};\n"); _ = try writer.write("};\n");

View File

@ -96,6 +96,7 @@ pub const TraitType = enum {
http_label, http_label,
http_query, http_query,
json_name, json_name,
xml_name,
required, required,
documentation, documentation,
pattern, pattern,
@ -118,6 +119,7 @@ pub const Trait = union(TraitType) {
aws_protocol: AwsProtocol, aws_protocol: AwsProtocol,
ec2_query_name: []const u8, ec2_query_name: []const u8,
json_name: []const u8, json_name: []const u8,
xml_name: []const u8,
http: struct { http: struct {
method: []const u8, method: []const u8,
uri: []const u8, uri: []const u8,
@ -565,6 +567,8 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra
} }
if (std.mem.eql(u8, trait_type, "smithy.api#jsonName")) if (std.mem.eql(u8, trait_type, "smithy.api#jsonName"))
return Trait{ .json_name = value.String }; return Trait{ .json_name = value.String };
if (std.mem.eql(u8, trait_type, "smithy.api#xmlName"))
return Trait{ .xml_name = value.String };
if (std.mem.eql(u8, trait_type, "smithy.api#httpQuery")) if (std.mem.eql(u8, trait_type, "smithy.api#httpQuery"))
return Trait{ .http_query = value.String }; return Trait{ .http_query = value.String };
if (std.mem.eql(u8, trait_type, "smithy.api#httpHeader")) if (std.mem.eql(u8, trait_type, "smithy.api#httpHeader"))
@ -617,7 +621,6 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra
\\smithy.api#timestampFormat \\smithy.api#timestampFormat
\\smithy.api#xmlAttribute \\smithy.api#xmlAttribute
\\smithy.api#xmlFlattened \\smithy.api#xmlFlattened
\\smithy.api#xmlName
\\smithy.waiters#waitable \\smithy.waiters#waitable
; ;
var iterator = std.mem.split(u8, list, "\n"); var iterator = std.mem.split(u8, list, "\n");

View File

@ -432,7 +432,7 @@ fn buildPath(allocator: std.mem.Allocator, raw_uri: []const u8, comptime ActionR
in_var = false; in_var = false;
const replacement_var = raw_uri[start..inx]; const replacement_var = raw_uri[start..inx];
inline for (std.meta.fields(ActionRequest)) |field| { inline for (std.meta.fields(ActionRequest)) |field| {
if (std.mem.eql(u8, request.jsonFieldNameFor(field.name), replacement_var)) { if (std.mem.eql(u8, request.fieldNameFor(field.name), replacement_var)) {
var replacement_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len); var replacement_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len);
defer replacement_buffer.deinit(); defer replacement_buffer.deinit();
var encoded_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len); var encoded_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len);

View File

@ -2871,8 +2871,8 @@ pub fn stringify(
field_written = try value.jsonStringifyField(Field.name, child_options, out_stream); field_written = try value.jsonStringifyField(Field.name, child_options, out_stream);
if (!field_written) { if (!field_written) {
if (comptime std.meta.trait.hasFn("jsonFieldNameFor")(T)) { if (comptime std.meta.trait.hasFn("fieldNameFor")(T)) {
const name = value.jsonFieldNameFor(Field.name); const name = value.fieldNameFor(Field.name);
try stringify(name, options, out_stream); try stringify(name, options, out_stream);
} else { } else {
try stringify(Field.name, options, out_stream); try stringify(Field.name, options, out_stream);

676
src/xml.zig Normal file
View File

@ -0,0 +1,676 @@
// File sourced from:
// https://github.com/Snektron/vulkan-zig/blob/797ae8af88e84753af9640266de61a985b76b580/generator/xml.zig
const std = @import("std");
const mem = std.mem;
const testing = std.testing;
const Allocator = mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const ArrayList = std.ArrayList;
pub const Attribute = struct {
name: []const u8,
value: []const u8,
};
pub const Content = union(enum) {
CharData: []const u8,
Comment: []const u8,
Element: *Element,
};
pub const Element = struct {
pub const AttributeList = ArrayList(*Attribute);
pub const ContentList = ArrayList(Content);
tag: []const u8,
attributes: AttributeList,
children: ContentList,
fn init(tag: []const u8, alloc: Allocator) Element {
return .{
.tag = tag,
.attributes = AttributeList.init(alloc),
.children = ContentList.init(alloc),
};
}
pub fn getAttribute(self: *Element, attrib_name: []const u8) ?[]const u8 {
for (self.attributes.items) |child| {
if (mem.eql(u8, child.name, attrib_name)) {
return child.value;
}
}
return null;
}
pub fn getCharData(self: *Element, child_tag: []const u8) ?[]const u8 {
const child = self.findChildByTag(child_tag) orelse return null;
if (child.children.items.len != 1) {
return null;
}
return switch (child.children.items[0]) {
.CharData => |char_data| char_data,
else => null,
};
}
pub fn iterator(self: *Element) ChildIterator {
return .{
.items = self.children.items,
.i = 0,
};
}
pub fn elements(self: *Element) ChildElementIterator {
return .{
.inner = self.iterator(),
};
}
pub fn findChildByTag(self: *Element, tag: []const u8) !?*Element {
return try self.findChildrenByTag(tag).next();
}
pub fn findChildrenByTag(self: *Element, tag: []const u8) FindChildrenByTagIterator {
return .{
.inner = self.elements(),
.tag = tag,
};
}
pub const ChildIterator = struct {
items: []Content,
i: usize,
pub fn next(self: *ChildIterator) ?*Content {
if (self.i < self.items.len) {
self.i += 1;
return &self.items[self.i - 1];
}
return null;
}
};
pub const ChildElementIterator = struct {
inner: ChildIterator,
pub fn next(self: *ChildElementIterator) ?*Element {
while (self.inner.next()) |child| {
if (child.* != .Element) {
continue;
}
return child.*.Element;
}
return null;
}
};
fn strictEqual(a: []const u8, b: []const u8, _: PredicateOptions) !bool {
return mem.eql(u8, a, b);
}
pub const FindChildrenByTagIterator = struct {
inner: ChildElementIterator,
tag: []const u8,
predicate: fn (a: []const u8, b: []const u8, options: PredicateOptions) anyerror!bool = strictEqual,
predicate_options: PredicateOptions = .{},
pub fn next(self: *FindChildrenByTagIterator) !?*Element {
while (self.inner.next()) |child| {
if (!try self.predicate(child.tag, self.tag, self.predicate_options)) {
continue;
}
return child;
}
return null;
}
};
};
pub const PredicateOptions = struct {
allocator: ?std.mem.Allocator = null,
};
pub const XmlDecl = struct {
version: []const u8,
encoding: ?[]const u8,
standalone: ?bool,
};
pub const Document = struct {
arena: ArenaAllocator,
xml_decl: ?*XmlDecl,
root: *Element,
pub fn deinit(self: Document) void {
var arena = self.arena; // Copy to stack so self can be taken by value.
arena.deinit();
}
};
const ParseContext = struct {
source: []const u8,
offset: usize,
line: usize,
column: usize,
fn init(source: []const u8) ParseContext {
return .{
.source = source,
.offset = 0,
.line = 0,
.column = 0,
};
}
fn peek(self: *ParseContext) ?u8 {
return if (self.offset < self.source.len) self.source[self.offset] else null;
}
fn consume(self: *ParseContext) !u8 {
if (self.offset < self.source.len) {
return self.consumeNoEof();
}
return error.UnexpectedEof;
}
fn consumeNoEof(self: *ParseContext) u8 {
std.debug.assert(self.offset < self.source.len);
const c = self.source[self.offset];
self.offset += 1;
if (c == '\n') {
self.line += 1;
self.column = 0;
} else {
self.column += 1;
}
return c;
}
fn eat(self: *ParseContext, char: u8) bool {
self.expect(char) catch return false;
return true;
}
fn expect(self: *ParseContext, expected: u8) !void {
if (self.peek()) |actual| {
if (expected != actual) {
return error.UnexpectedCharacter;
}
_ = self.consumeNoEof();
return;
}
return error.UnexpectedEof;
}
fn eatStr(self: *ParseContext, text: []const u8) bool {
self.expectStr(text) catch return false;
return true;
}
fn expectStr(self: *ParseContext, text: []const u8) !void {
if (self.source.len < self.offset + text.len) {
return error.UnexpectedEof;
} else if (std.mem.startsWith(u8, self.source[self.offset..], text)) {
var i: usize = 0;
while (i < text.len) : (i += 1) {
_ = self.consumeNoEof();
}
return;
}
return error.UnexpectedCharacter;
}
fn eatWs(self: *ParseContext) bool {
var ws = false;
while (self.peek()) |ch| {
switch (ch) {
' ', '\t', '\n', '\r' => {
ws = true;
_ = self.consumeNoEof();
},
else => break,
}
}
return ws;
}
fn expectWs(self: *ParseContext) !void {
if (!self.eatWs()) return error.UnexpectedCharacter;
}
fn currentLine(self: ParseContext) []const u8 {
var begin: usize = 0;
if (mem.lastIndexOfScalar(u8, self.source[0..self.offset], '\n')) |prev_nl| {
begin = prev_nl + 1;
}
var end = mem.indexOfScalarPos(u8, self.source, self.offset, '\n') orelse self.source.len;
return self.source[begin..end];
}
};
test "ParseContext" {
{
var ctx = ParseContext.init("I like pythons");
try testing.expectEqual(@as(?u8, 'I'), ctx.peek());
try testing.expectEqual(@as(u8, 'I'), ctx.consumeNoEof());
try testing.expectEqual(@as(?u8, ' '), ctx.peek());
try testing.expectEqual(@as(u8, ' '), try ctx.consume());
try testing.expect(ctx.eat('l'));
try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
try testing.expectEqual(false, ctx.eat('a'));
try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
try ctx.expect('i');
try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
try testing.expectError(error.UnexpectedCharacter, ctx.expect('a'));
try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
try testing.expect(ctx.eatStr("ke"));
try testing.expectEqual(@as(?u8, ' '), ctx.peek());
try testing.expect(ctx.eatWs());
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectEqual(false, ctx.eatWs());
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectEqual(false, ctx.eatStr("aaaaaaaaa"));
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectError(error.UnexpectedEof, ctx.expectStr("aaaaaaaaa"));
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try testing.expectError(error.UnexpectedCharacter, ctx.expectStr("pytn"));
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
try ctx.expectStr("python");
try testing.expectEqual(@as(?u8, 's'), ctx.peek());
}
{
var ctx = ParseContext.init("");
try testing.expectEqual(ctx.peek(), null);
try testing.expectError(error.UnexpectedEof, ctx.consume());
try testing.expectEqual(ctx.eat('p'), false);
try testing.expectError(error.UnexpectedEof, ctx.expect('p'));
}
}
pub const ParseError = error{
IllegalCharacter,
UnexpectedEof,
UnexpectedCharacter,
UnclosedValue,
UnclosedComment,
InvalidName,
InvalidEntity,
InvalidStandaloneValue,
NonMatchingClosingTag,
InvalidDocument,
OutOfMemory,
};
pub fn parse(backing_allocator: Allocator, source: []const u8) !Document {
var ctx = ParseContext.init(source);
return try parseDocument(&ctx, backing_allocator);
}
fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document {
var doc = Document{
.arena = ArenaAllocator.init(backing_allocator),
.xml_decl = null,
.root = undefined,
};
errdefer doc.deinit();
const allocator = doc.arena.allocator();
try trySkipComments(ctx, allocator);
doc.xml_decl = try tryParseProlog(ctx, allocator);
_ = ctx.eatWs();
try trySkipComments(ctx, allocator);
doc.root = (try tryParseElement(ctx, allocator)) orelse return error.InvalidDocument;
_ = ctx.eatWs();
try trySkipComments(ctx, allocator);
if (ctx.peek() != null) return error.InvalidDocument;
return doc;
}
fn parseAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
const quote = try ctx.consume();
if (quote != '"' and quote != '\'') return error.UnexpectedCharacter;
const begin = ctx.offset;
while (true) {
const c = ctx.consume() catch return error.UnclosedValue;
if (c == quote) break;
}
const end = ctx.offset - 1;
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
}
fn parseEqAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
_ = ctx.eatWs();
try ctx.expect('=');
_ = ctx.eatWs();
return try parseAttrValue(ctx, alloc);
}
fn parseNameNoDupe(ctx: *ParseContext) ![]const u8 {
// XML's spec on names is very long, so to make this easier
// we just take any character that is not special and not whitespace
const begin = ctx.offset;
while (ctx.peek()) |ch| {
switch (ch) {
' ', '\t', '\n', '\r' => break,
'&', '"', '\'', '<', '>', '?', '=', '/' => break,
else => _ = ctx.consumeNoEof(),
}
}
const end = ctx.offset;
if (begin == end) return error.InvalidName;
return ctx.source[begin..end];
}
fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
const begin = ctx.offset;
while (ctx.peek()) |ch| {
switch (ch) {
'<' => break,
else => _ = ctx.consumeNoEof(),
}
}
const end = ctx.offset;
if (begin == end) return null;
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
}
fn parseContent(ctx: *ParseContext, alloc: Allocator) ParseError!Content {
if (try tryParseCharData(ctx, alloc)) |cd| {
return Content{ .CharData = cd };
} else if (try tryParseComment(ctx, alloc)) |comment| {
return Content{ .Comment = comment };
} else if (try tryParseElement(ctx, alloc)) |elem| {
return Content{ .Element = elem };
} else {
return error.UnexpectedCharacter;
}
}
fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute {
const name = parseNameNoDupe(ctx) catch return null;
_ = ctx.eatWs();
try ctx.expect('=');
_ = ctx.eatWs();
const value = try parseAttrValue(ctx, alloc);
const attr = try alloc.create(Attribute);
attr.name = try alloc.dupe(u8, name);
attr.value = value;
return attr;
}
fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
const start = ctx.offset;
if (!ctx.eat('<')) return null;
const tag = parseNameNoDupe(ctx) catch {
ctx.offset = start;
return null;
};
const element = try alloc.create(Element);
element.* = Element.init(try alloc.dupe(u8, tag), alloc);
while (ctx.eatWs()) {
const attr = (try tryParseAttr(ctx, alloc)) orelse break;
try element.attributes.append(attr);
}
if (ctx.eatStr("/>")) {
return element;
}
try ctx.expect('>');
while (true) {
if (ctx.peek() == null) {
return error.UnexpectedEof;
} else if (ctx.eatStr("</")) {
break;
}
const content = try parseContent(ctx, alloc);
try element.children.append(content);
}
const closing_tag = try parseNameNoDupe(ctx);
if (!std.mem.eql(u8, tag, closing_tag)) {
return error.NonMatchingClosingTag;
}
_ = ctx.eatWs();
try ctx.expect('>');
return element;
}
test "tryParseElement" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
{
var ctx = ParseContext.init("<= a='b'/>");
try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc));
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
}
{
var ctx = ParseContext.init("<python size='15' color = \"green\"/>");
const elem = try tryParseElement(&ctx, alloc);
try testing.expectEqualSlices(u8, elem.?.tag, "python");
const size_attr = elem.?.attributes.items[0];
try testing.expectEqualSlices(u8, size_attr.name, "size");
try testing.expectEqualSlices(u8, size_attr.value, "15");
const color_attr = elem.?.attributes.items[1];
try testing.expectEqualSlices(u8, color_attr.name, "color");
try testing.expectEqualSlices(u8, color_attr.value, "green");
}
{
var ctx = ParseContext.init("<python>test</python>");
const elem = try tryParseElement(&ctx, alloc);
try testing.expectEqualSlices(u8, elem.?.tag, "python");
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test");
}
{
var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>");
const elem = try tryParseElement(&ctx, alloc);
try testing.expectEqualSlices(u8, elem.?.tag, "a");
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b");
try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c");
try testing.expectEqualSlices(u8, elem.?.children.items[2].CharData, "d");
try testing.expectEqualSlices(u8, elem.?.children.items[3].Element.tag, "e");
try testing.expectEqualSlices(u8, elem.?.children.items[4].CharData, "f");
try testing.expectEqualSlices(u8, elem.?.children.items[5].Comment, "g");
}
}
fn tryParseProlog(ctx: *ParseContext, alloc: Allocator) !?*XmlDecl {
const start = ctx.offset;
if (!ctx.eatStr("<?") or !mem.eql(u8, try parseNameNoDupe(ctx), "xml")) {
ctx.offset = start;
return null;
}
const decl = try alloc.create(XmlDecl);
decl.encoding = null;
decl.standalone = null;
// Version info is mandatory
try ctx.expectWs();
try ctx.expectStr("version");
decl.version = try parseEqAttrValue(ctx, alloc);
if (ctx.eatWs()) {
// Optional encoding and standalone info
var require_ws = false;
if (ctx.eatStr("encoding")) {
decl.encoding = try parseEqAttrValue(ctx, alloc);
require_ws = true;
}
if (require_ws == ctx.eatWs() and ctx.eatStr("standalone")) {
const standalone = try parseEqAttrValue(ctx, alloc);
if (std.mem.eql(u8, standalone, "yes")) {
decl.standalone = true;
} else if (std.mem.eql(u8, standalone, "no")) {
decl.standalone = false;
} else {
return error.InvalidStandaloneValue;
}
}
_ = ctx.eatWs();
}
try ctx.expectStr("?>");
return decl;
}
test "tryParseProlog" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
{
var ctx = ParseContext.init("<?xmla version='aa'?>");
try testing.expectEqual(@as(?*XmlDecl, null), try tryParseProlog(&ctx, alloc));
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
}
{
var ctx = ParseContext.init("<?xml version='aa'?>");
const decl = try tryParseProlog(&ctx, alloc);
try testing.expectEqualSlices(u8, "aa", decl.?.version);
try testing.expectEqual(@as(?[]const u8, null), decl.?.encoding);
try testing.expectEqual(@as(?bool, null), decl.?.standalone);
}
{
var ctx = ParseContext.init("<?xml version=\"aa\" encoding = 'bbb' standalone \t = 'yes'?>");
const decl = try tryParseProlog(&ctx, alloc);
try testing.expectEqualSlices(u8, "aa", decl.?.version);
try testing.expectEqualSlices(u8, "bbb", decl.?.encoding.?);
try testing.expectEqual(@as(?bool, true), decl.?.standalone.?);
}
}
fn trySkipComments(ctx: *ParseContext, alloc: Allocator) !void {
while (try tryParseComment(ctx, alloc)) |_| {
_ = ctx.eatWs();
}
}
fn tryParseComment(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
if (!ctx.eatStr("<!--")) return null;
const begin = ctx.offset;
while (!ctx.eatStr("-->")) {
_ = ctx.consume() catch return error.UnclosedComment;
}
const end = ctx.offset - "-->".len;
return try alloc.dupe(u8, ctx.source[begin..end]);
}
fn unescapeEntity(text: []const u8) !u8 {
const EntitySubstition = struct { text: []const u8, replacement: u8 };
const entities = [_]EntitySubstition{
.{ .text = "&lt;", .replacement = '<' },
.{ .text = "&gt;", .replacement = '>' },
.{ .text = "&amp;", .replacement = '&' },
.{ .text = "&apos;", .replacement = '\'' },
.{ .text = "&quot;", .replacement = '"' },
};
for (entities) |entity| {
if (std.mem.eql(u8, text, entity.text)) return entity.replacement;
}
return error.InvalidEntity;
}
fn dupeAndUnescape(alloc: Allocator, text: []const u8) ![]const u8 {
const str = try alloc.alloc(u8, text.len);
var j: usize = 0;
var i: usize = 0;
while (i < text.len) : (j += 1) {
if (text[i] == '&') {
const entity_end = 1 + (mem.indexOfScalarPos(u8, text, i, ';') orelse return error.InvalidEntity);
str[j] = try unescapeEntity(text[i..entity_end]);
i = entity_end;
} else {
str[j] = text[i];
i += 1;
}
}
return alloc.shrink(str, j);
}
test "dupeAndUnescape" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
try testing.expectEqualSlices(u8, "test", try dupeAndUnescape(alloc, "test"));
try testing.expectEqualSlices(u8, "a<b&c>d\"e'f<", try dupeAndUnescape(alloc, "a&lt;b&amp;c&gt;d&quot;e&apos;f&lt;"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&&"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&test;"));
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&boa"));
}
test "Top level comments" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
const doc = try parse(alloc, "<?xml version='aa'?><!--comment--><python color='green'/><!--another comment-->");
try testing.expectEqualSlices(u8, "python", doc.root.tag);
}