Compare commits
5 Commits
5bb382bda3
...
ca801799fc
Author | SHA1 | Date | |
---|---|---|---|
ca801799fc | |||
f374df3fa1 | |||
744d834cfd | |||
c9369504fa | |||
4606205b82 |
42
README.md
42
README.md
|
@ -2,13 +2,11 @@
|
|||
|
||||
[![Build Status](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/status.svg?ref=refs/heads/master)](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/)
|
||||
|
||||
|
||||
### NOTE: All tests pass, but credentials currently must be passed through environment
|
||||
|
||||
This SDK currently supports all AWS services except EC2 and S3. These two
|
||||
services only support XML, and zig 0.8.0 and master both trigger compile
|
||||
errors while incorporating the XML parser. S3 also requires some plumbing
|
||||
tweaks in the signature calculation. Examples of usage are in src/main.zig.
|
||||
services only support XML, and zig 0.9.0 and master both trigger compile
|
||||
errors while incorporating the XML parser in conjunction with a process
|
||||
to fill the types. S3 also requires some plumbing tweaks in the signature
|
||||
calculation. Examples of usage are in src/main.zig.
|
||||
|
||||
Current executable size for the demo is 953k (90k of which is the AWS PEM file)
|
||||
after compiling with -Drelease-safe and
|
||||
|
@ -43,37 +41,33 @@ supersede all other configuration. Note that an alternative endpoint may
|
|||
require passing in a client option to specify an different TLS root certificate
|
||||
(pass null to disable certificate verification).
|
||||
|
||||
Given that credential handling is still very basic, you may want to look at
|
||||
the [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) if
|
||||
your needs include something more robust. Note that that branch supports
|
||||
x86_64 linux only.
|
||||
The [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) exists
|
||||
for posterity, and supports x86_64 linux. This branch is recommended moving
|
||||
forward.
|
||||
|
||||
## Limitations
|
||||
|
||||
There are many nuances of AWS V4 signature calculation. S3 is not supported
|
||||
because it uses many of these test cases. STS tokens using a session token
|
||||
are not yet implemented, though should be trivial. I have also seen a few
|
||||
service errors caused by discrepancies in signatures, though I don't know yet
|
||||
if this was an issue in the service itself (has not repro'd) or if there
|
||||
is a latent bug.
|
||||
|
||||
Only environment variable based credentials can be used at the moment.
|
||||
because it uses many of these test cases. WebIdentityToken is not yet
|
||||
implemented.
|
||||
|
||||
TODO List:
|
||||
|
||||
* Add option to cache signature keys
|
||||
* Implement credentials provider
|
||||
* Implement jitter/exponential backoff
|
||||
* Implement timeouts and other TODO's in the code
|
||||
* To work around compiler issues, the best option may be to convert from
|
||||
Xml to json, then parse from there. This will be pursued first. It may need
|
||||
to wait for zig 0.10.0 when self-hosted compiler is likely to be completed
|
||||
(zig 0.10.0 eta May 2022) discovered. If we need to wait, S3, EC2 and other
|
||||
restXml protocols will be blocked.
|
||||
* Implement [AWS restXml protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-restxml-protocol.html).
|
||||
Includes S3. Total service count 4. This may be blocked due to the same issue as EC2.
|
||||
* Implement [AWS EC2 query protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-ec2-query-protocol.html).
|
||||
Includes EC2. Total service count 1. This may be blocked on a compiler bug,
|
||||
though has not been tested with zig 0.9.0. It may need to wait for zig 0.10.0
|
||||
when self-hosted compiler is likely to be completed (zig 0.10.0 eta May 2022)
|
||||
discovered. More details and llvm ir log can be found in the
|
||||
though has not been tested with zig 0.9.0. More details and llvm ir log can be found in the
|
||||
[XML branch](https://git.lerch.org/lobo/aws-sdk-for-zig/src/branch/xml).
|
||||
* Implement sigv4a signing
|
||||
* Implement jitter/exponential backoff
|
||||
* Implement timeouts and other TODO's in the code
|
||||
* Add option to cache signature keys
|
||||
|
||||
Compiler wishlist/watchlist:
|
||||
|
||||
|
|
|
@ -544,12 +544,12 @@ fn generateSimpleTypeFor(_: anytype, type_name: []const u8, writer: anytype) !vo
|
|||
}
|
||||
fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, type_type_name: []const u8, writer: anytype, state: GenerationState) anyerror!void {
|
||||
_ = shape_id;
|
||||
const Mapping = struct { snake: []const u8, json: []const u8 };
|
||||
var json_field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len);
|
||||
const Mapping = struct { snake: []const u8, original: []const u8 };
|
||||
var field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len);
|
||||
defer {
|
||||
for (json_field_name_mappings.items) |mapping|
|
||||
for (field_name_mappings.items) |mapping|
|
||||
state.allocator.free(mapping.snake);
|
||||
json_field_name_mappings.deinit();
|
||||
field_name_mappings.deinit();
|
||||
}
|
||||
// There is an httpQueryParams trait as well, but nobody is using it. API GW
|
||||
// pretends to, but it's an empty map
|
||||
|
@ -591,15 +591,19 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty
|
|||
switch (trait) {
|
||||
.json_name => {
|
||||
found_name_trait = true;
|
||||
json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.json_name });
|
||||
field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.json_name });
|
||||
},
|
||||
.http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_query }),
|
||||
.http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_header }),
|
||||
.xml_name => {
|
||||
found_name_trait = true;
|
||||
field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.xml_name });
|
||||
},
|
||||
.http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_query }),
|
||||
.http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_header }),
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
if (!found_name_trait)
|
||||
json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = member.name });
|
||||
field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = member.name });
|
||||
defer state.allocator.free(snake_case_member);
|
||||
try outputIndent(child_state, writer);
|
||||
const member_name = avoidReserved(snake_case_member);
|
||||
|
@ -637,11 +641,11 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty
|
|||
//
|
||||
try writer.writeByte('\n');
|
||||
try outputIndent(child_state, writer);
|
||||
_ = try writer.write("pub fn jsonFieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n");
|
||||
_ = try writer.write("pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n");
|
||||
var grandchild_state = child_state;
|
||||
grandchild_state.indent_level += 1;
|
||||
// We need to force output here becaseu we're referencing the field in the return statement below
|
||||
try writeMappings(grandchild_state, "", "mappings", json_field_name_mappings, true, writer);
|
||||
try writeMappings(grandchild_state, "", "mappings", field_name_mappings, true, writer);
|
||||
try outputIndent(grandchild_state, writer);
|
||||
_ = try writer.write("return @field(mappings, field_name);\n");
|
||||
try outputIndent(child_state, writer);
|
||||
|
@ -667,7 +671,7 @@ fn writeStringify(state: GenerationState, fields: [][]const u8, writer: anytype)
|
|||
try outputIndent(child_state, writer);
|
||||
try writer.print("if (std.mem.eql(u8, \"{s}\", field_name))\n", .{field});
|
||||
try outputIndent(return_state, writer);
|
||||
try writer.print("return try serializeMap(self.{s}, self.jsonFieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field });
|
||||
try writer.print("return try serializeMap(self.{s}, self.fieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field });
|
||||
}
|
||||
try outputIndent(child_state, writer);
|
||||
_ = try writer.write("return false;\n");
|
||||
|
@ -690,7 +694,7 @@ fn writeMappings(state: GenerationState, @"pub": []const u8, mapping_name: []con
|
|||
child_state.indent_level += 1;
|
||||
for (mappings.items) |mapping| {
|
||||
try outputIndent(child_state, writer);
|
||||
try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.json });
|
||||
try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.original });
|
||||
}
|
||||
try outputIndent(state, writer);
|
||||
_ = try writer.write("};\n");
|
||||
|
|
|
@ -96,6 +96,7 @@ pub const TraitType = enum {
|
|||
http_label,
|
||||
http_query,
|
||||
json_name,
|
||||
xml_name,
|
||||
required,
|
||||
documentation,
|
||||
pattern,
|
||||
|
@ -118,6 +119,7 @@ pub const Trait = union(TraitType) {
|
|||
aws_protocol: AwsProtocol,
|
||||
ec2_query_name: []const u8,
|
||||
json_name: []const u8,
|
||||
xml_name: []const u8,
|
||||
http: struct {
|
||||
method: []const u8,
|
||||
uri: []const u8,
|
||||
|
@ -565,6 +567,8 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra
|
|||
}
|
||||
if (std.mem.eql(u8, trait_type, "smithy.api#jsonName"))
|
||||
return Trait{ .json_name = value.String };
|
||||
if (std.mem.eql(u8, trait_type, "smithy.api#xmlName"))
|
||||
return Trait{ .xml_name = value.String };
|
||||
if (std.mem.eql(u8, trait_type, "smithy.api#httpQuery"))
|
||||
return Trait{ .http_query = value.String };
|
||||
if (std.mem.eql(u8, trait_type, "smithy.api#httpHeader"))
|
||||
|
@ -617,7 +621,6 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra
|
|||
\\smithy.api#timestampFormat
|
||||
\\smithy.api#xmlAttribute
|
||||
\\smithy.api#xmlFlattened
|
||||
\\smithy.api#xmlName
|
||||
\\smithy.waiters#waitable
|
||||
;
|
||||
var iterator = std.mem.split(u8, list, "\n");
|
||||
|
|
|
@ -432,7 +432,7 @@ fn buildPath(allocator: std.mem.Allocator, raw_uri: []const u8, comptime ActionR
|
|||
in_var = false;
|
||||
const replacement_var = raw_uri[start..inx];
|
||||
inline for (std.meta.fields(ActionRequest)) |field| {
|
||||
if (std.mem.eql(u8, request.jsonFieldNameFor(field.name), replacement_var)) {
|
||||
if (std.mem.eql(u8, request.fieldNameFor(field.name), replacement_var)) {
|
||||
var replacement_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len);
|
||||
defer replacement_buffer.deinit();
|
||||
var encoded_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len);
|
||||
|
|
|
@ -2871,8 +2871,8 @@ pub fn stringify(
|
|||
field_written = try value.jsonStringifyField(Field.name, child_options, out_stream);
|
||||
|
||||
if (!field_written) {
|
||||
if (comptime std.meta.trait.hasFn("jsonFieldNameFor")(T)) {
|
||||
const name = value.jsonFieldNameFor(Field.name);
|
||||
if (comptime std.meta.trait.hasFn("fieldNameFor")(T)) {
|
||||
const name = value.fieldNameFor(Field.name);
|
||||
try stringify(name, options, out_stream);
|
||||
} else {
|
||||
try stringify(Field.name, options, out_stream);
|
||||
|
|
676
src/xml.zig
Normal file
676
src/xml.zig
Normal file
|
@ -0,0 +1,676 @@
|
|||
// File sourced from:
|
||||
// https://github.com/Snektron/vulkan-zig/blob/797ae8af88e84753af9640266de61a985b76b580/generator/xml.zig
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
const Allocator = mem.Allocator;
|
||||
const ArenaAllocator = std.heap.ArenaAllocator;
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
pub const Attribute = struct {
|
||||
name: []const u8,
|
||||
value: []const u8,
|
||||
};
|
||||
|
||||
pub const Content = union(enum) {
|
||||
CharData: []const u8,
|
||||
Comment: []const u8,
|
||||
Element: *Element,
|
||||
};
|
||||
|
||||
pub const Element = struct {
|
||||
pub const AttributeList = ArrayList(*Attribute);
|
||||
pub const ContentList = ArrayList(Content);
|
||||
|
||||
tag: []const u8,
|
||||
attributes: AttributeList,
|
||||
children: ContentList,
|
||||
|
||||
fn init(tag: []const u8, alloc: Allocator) Element {
|
||||
return .{
|
||||
.tag = tag,
|
||||
.attributes = AttributeList.init(alloc),
|
||||
.children = ContentList.init(alloc),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn getAttribute(self: *Element, attrib_name: []const u8) ?[]const u8 {
|
||||
for (self.attributes.items) |child| {
|
||||
if (mem.eql(u8, child.name, attrib_name)) {
|
||||
return child.value;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
pub fn getCharData(self: *Element, child_tag: []const u8) ?[]const u8 {
|
||||
const child = self.findChildByTag(child_tag) orelse return null;
|
||||
if (child.children.items.len != 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return switch (child.children.items[0]) {
|
||||
.CharData => |char_data| char_data,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn iterator(self: *Element) ChildIterator {
|
||||
return .{
|
||||
.items = self.children.items,
|
||||
.i = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn elements(self: *Element) ChildElementIterator {
|
||||
return .{
|
||||
.inner = self.iterator(),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn findChildByTag(self: *Element, tag: []const u8) !?*Element {
|
||||
return try self.findChildrenByTag(tag).next();
|
||||
}
|
||||
|
||||
pub fn findChildrenByTag(self: *Element, tag: []const u8) FindChildrenByTagIterator {
|
||||
return .{
|
||||
.inner = self.elements(),
|
||||
.tag = tag,
|
||||
};
|
||||
}
|
||||
|
||||
pub const ChildIterator = struct {
|
||||
items: []Content,
|
||||
i: usize,
|
||||
|
||||
pub fn next(self: *ChildIterator) ?*Content {
|
||||
if (self.i < self.items.len) {
|
||||
self.i += 1;
|
||||
return &self.items[self.i - 1];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
pub const ChildElementIterator = struct {
|
||||
inner: ChildIterator,
|
||||
|
||||
pub fn next(self: *ChildElementIterator) ?*Element {
|
||||
while (self.inner.next()) |child| {
|
||||
if (child.* != .Element) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return child.*.Element;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
fn strictEqual(a: []const u8, b: []const u8, _: PredicateOptions) !bool {
|
||||
return mem.eql(u8, a, b);
|
||||
}
|
||||
pub const FindChildrenByTagIterator = struct {
|
||||
inner: ChildElementIterator,
|
||||
tag: []const u8,
|
||||
predicate: fn (a: []const u8, b: []const u8, options: PredicateOptions) anyerror!bool = strictEqual,
|
||||
predicate_options: PredicateOptions = .{},
|
||||
|
||||
pub fn next(self: *FindChildrenByTagIterator) !?*Element {
|
||||
while (self.inner.next()) |child| {
|
||||
if (!try self.predicate(child.tag, self.tag, self.predicate_options)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return child;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
pub const PredicateOptions = struct {
|
||||
allocator: ?std.mem.Allocator = null,
|
||||
};
|
||||
pub const XmlDecl = struct {
|
||||
version: []const u8,
|
||||
encoding: ?[]const u8,
|
||||
standalone: ?bool,
|
||||
};
|
||||
|
||||
pub const Document = struct {
|
||||
arena: ArenaAllocator,
|
||||
xml_decl: ?*XmlDecl,
|
||||
root: *Element,
|
||||
|
||||
pub fn deinit(self: Document) void {
|
||||
var arena = self.arena; // Copy to stack so self can be taken by value.
|
||||
arena.deinit();
|
||||
}
|
||||
};
|
||||
|
||||
const ParseContext = struct {
|
||||
source: []const u8,
|
||||
offset: usize,
|
||||
line: usize,
|
||||
column: usize,
|
||||
|
||||
fn init(source: []const u8) ParseContext {
|
||||
return .{
|
||||
.source = source,
|
||||
.offset = 0,
|
||||
.line = 0,
|
||||
.column = 0,
|
||||
};
|
||||
}
|
||||
|
||||
fn peek(self: *ParseContext) ?u8 {
|
||||
return if (self.offset < self.source.len) self.source[self.offset] else null;
|
||||
}
|
||||
|
||||
fn consume(self: *ParseContext) !u8 {
|
||||
if (self.offset < self.source.len) {
|
||||
return self.consumeNoEof();
|
||||
}
|
||||
|
||||
return error.UnexpectedEof;
|
||||
}
|
||||
|
||||
fn consumeNoEof(self: *ParseContext) u8 {
|
||||
std.debug.assert(self.offset < self.source.len);
|
||||
const c = self.source[self.offset];
|
||||
self.offset += 1;
|
||||
|
||||
if (c == '\n') {
|
||||
self.line += 1;
|
||||
self.column = 0;
|
||||
} else {
|
||||
self.column += 1;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
fn eat(self: *ParseContext, char: u8) bool {
|
||||
self.expect(char) catch return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
fn expect(self: *ParseContext, expected: u8) !void {
|
||||
if (self.peek()) |actual| {
|
||||
if (expected != actual) {
|
||||
return error.UnexpectedCharacter;
|
||||
}
|
||||
|
||||
_ = self.consumeNoEof();
|
||||
return;
|
||||
}
|
||||
|
||||
return error.UnexpectedEof;
|
||||
}
|
||||
|
||||
fn eatStr(self: *ParseContext, text: []const u8) bool {
|
||||
self.expectStr(text) catch return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
fn expectStr(self: *ParseContext, text: []const u8) !void {
|
||||
if (self.source.len < self.offset + text.len) {
|
||||
return error.UnexpectedEof;
|
||||
} else if (std.mem.startsWith(u8, self.source[self.offset..], text)) {
|
||||
var i: usize = 0;
|
||||
while (i < text.len) : (i += 1) {
|
||||
_ = self.consumeNoEof();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
return error.UnexpectedCharacter;
|
||||
}
|
||||
|
||||
fn eatWs(self: *ParseContext) bool {
|
||||
var ws = false;
|
||||
|
||||
while (self.peek()) |ch| {
|
||||
switch (ch) {
|
||||
' ', '\t', '\n', '\r' => {
|
||||
ws = true;
|
||||
_ = self.consumeNoEof();
|
||||
},
|
||||
else => break,
|
||||
}
|
||||
}
|
||||
|
||||
return ws;
|
||||
}
|
||||
|
||||
fn expectWs(self: *ParseContext) !void {
|
||||
if (!self.eatWs()) return error.UnexpectedCharacter;
|
||||
}
|
||||
|
||||
fn currentLine(self: ParseContext) []const u8 {
|
||||
var begin: usize = 0;
|
||||
if (mem.lastIndexOfScalar(u8, self.source[0..self.offset], '\n')) |prev_nl| {
|
||||
begin = prev_nl + 1;
|
||||
}
|
||||
|
||||
var end = mem.indexOfScalarPos(u8, self.source, self.offset, '\n') orelse self.source.len;
|
||||
return self.source[begin..end];
|
||||
}
|
||||
};
|
||||
|
||||
test "ParseContext" {
|
||||
{
|
||||
var ctx = ParseContext.init("I like pythons");
|
||||
try testing.expectEqual(@as(?u8, 'I'), ctx.peek());
|
||||
try testing.expectEqual(@as(u8, 'I'), ctx.consumeNoEof());
|
||||
try testing.expectEqual(@as(?u8, ' '), ctx.peek());
|
||||
try testing.expectEqual(@as(u8, ' '), try ctx.consume());
|
||||
|
||||
try testing.expect(ctx.eat('l'));
|
||||
try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
|
||||
try testing.expectEqual(false, ctx.eat('a'));
|
||||
try testing.expectEqual(@as(?u8, 'i'), ctx.peek());
|
||||
|
||||
try ctx.expect('i');
|
||||
try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
|
||||
try testing.expectError(error.UnexpectedCharacter, ctx.expect('a'));
|
||||
try testing.expectEqual(@as(?u8, 'k'), ctx.peek());
|
||||
|
||||
try testing.expect(ctx.eatStr("ke"));
|
||||
try testing.expectEqual(@as(?u8, ' '), ctx.peek());
|
||||
|
||||
try testing.expect(ctx.eatWs());
|
||||
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
|
||||
try testing.expectEqual(false, ctx.eatWs());
|
||||
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
|
||||
|
||||
try testing.expectEqual(false, ctx.eatStr("aaaaaaaaa"));
|
||||
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
|
||||
|
||||
try testing.expectError(error.UnexpectedEof, ctx.expectStr("aaaaaaaaa"));
|
||||
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
|
||||
try testing.expectError(error.UnexpectedCharacter, ctx.expectStr("pytn"));
|
||||
try testing.expectEqual(@as(?u8, 'p'), ctx.peek());
|
||||
try ctx.expectStr("python");
|
||||
try testing.expectEqual(@as(?u8, 's'), ctx.peek());
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("");
|
||||
try testing.expectEqual(ctx.peek(), null);
|
||||
try testing.expectError(error.UnexpectedEof, ctx.consume());
|
||||
try testing.expectEqual(ctx.eat('p'), false);
|
||||
try testing.expectError(error.UnexpectedEof, ctx.expect('p'));
|
||||
}
|
||||
}
|
||||
|
||||
pub const ParseError = error{
|
||||
IllegalCharacter,
|
||||
UnexpectedEof,
|
||||
UnexpectedCharacter,
|
||||
UnclosedValue,
|
||||
UnclosedComment,
|
||||
InvalidName,
|
||||
InvalidEntity,
|
||||
InvalidStandaloneValue,
|
||||
NonMatchingClosingTag,
|
||||
InvalidDocument,
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
pub fn parse(backing_allocator: Allocator, source: []const u8) !Document {
|
||||
var ctx = ParseContext.init(source);
|
||||
return try parseDocument(&ctx, backing_allocator);
|
||||
}
|
||||
|
||||
fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document {
|
||||
var doc = Document{
|
||||
.arena = ArenaAllocator.init(backing_allocator),
|
||||
.xml_decl = null,
|
||||
.root = undefined,
|
||||
};
|
||||
|
||||
errdefer doc.deinit();
|
||||
|
||||
const allocator = doc.arena.allocator();
|
||||
|
||||
try trySkipComments(ctx, allocator);
|
||||
|
||||
doc.xml_decl = try tryParseProlog(ctx, allocator);
|
||||
_ = ctx.eatWs();
|
||||
try trySkipComments(ctx, allocator);
|
||||
|
||||
doc.root = (try tryParseElement(ctx, allocator)) orelse return error.InvalidDocument;
|
||||
_ = ctx.eatWs();
|
||||
try trySkipComments(ctx, allocator);
|
||||
|
||||
if (ctx.peek() != null) return error.InvalidDocument;
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
fn parseAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
|
||||
const quote = try ctx.consume();
|
||||
if (quote != '"' and quote != '\'') return error.UnexpectedCharacter;
|
||||
|
||||
const begin = ctx.offset;
|
||||
|
||||
while (true) {
|
||||
const c = ctx.consume() catch return error.UnclosedValue;
|
||||
if (c == quote) break;
|
||||
}
|
||||
|
||||
const end = ctx.offset - 1;
|
||||
|
||||
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
|
||||
}
|
||||
|
||||
fn parseEqAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 {
|
||||
_ = ctx.eatWs();
|
||||
try ctx.expect('=');
|
||||
_ = ctx.eatWs();
|
||||
|
||||
return try parseAttrValue(ctx, alloc);
|
||||
}
|
||||
|
||||
fn parseNameNoDupe(ctx: *ParseContext) ![]const u8 {
|
||||
// XML's spec on names is very long, so to make this easier
|
||||
// we just take any character that is not special and not whitespace
|
||||
const begin = ctx.offset;
|
||||
|
||||
while (ctx.peek()) |ch| {
|
||||
switch (ch) {
|
||||
' ', '\t', '\n', '\r' => break,
|
||||
'&', '"', '\'', '<', '>', '?', '=', '/' => break,
|
||||
else => _ = ctx.consumeNoEof(),
|
||||
}
|
||||
}
|
||||
|
||||
const end = ctx.offset;
|
||||
if (begin == end) return error.InvalidName;
|
||||
|
||||
return ctx.source[begin..end];
|
||||
}
|
||||
|
||||
fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
|
||||
const begin = ctx.offset;
|
||||
|
||||
while (ctx.peek()) |ch| {
|
||||
switch (ch) {
|
||||
'<' => break,
|
||||
else => _ = ctx.consumeNoEof(),
|
||||
}
|
||||
}
|
||||
|
||||
const end = ctx.offset;
|
||||
if (begin == end) return null;
|
||||
|
||||
return try dupeAndUnescape(alloc, ctx.source[begin..end]);
|
||||
}
|
||||
|
||||
fn parseContent(ctx: *ParseContext, alloc: Allocator) ParseError!Content {
|
||||
if (try tryParseCharData(ctx, alloc)) |cd| {
|
||||
return Content{ .CharData = cd };
|
||||
} else if (try tryParseComment(ctx, alloc)) |comment| {
|
||||
return Content{ .Comment = comment };
|
||||
} else if (try tryParseElement(ctx, alloc)) |elem| {
|
||||
return Content{ .Element = elem };
|
||||
} else {
|
||||
return error.UnexpectedCharacter;
|
||||
}
|
||||
}
|
||||
|
||||
fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute {
|
||||
const name = parseNameNoDupe(ctx) catch return null;
|
||||
_ = ctx.eatWs();
|
||||
try ctx.expect('=');
|
||||
_ = ctx.eatWs();
|
||||
const value = try parseAttrValue(ctx, alloc);
|
||||
|
||||
const attr = try alloc.create(Attribute);
|
||||
attr.name = try alloc.dupe(u8, name);
|
||||
attr.value = value;
|
||||
return attr;
|
||||
}
|
||||
|
||||
fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element {
|
||||
const start = ctx.offset;
|
||||
if (!ctx.eat('<')) return null;
|
||||
const tag = parseNameNoDupe(ctx) catch {
|
||||
ctx.offset = start;
|
||||
return null;
|
||||
};
|
||||
|
||||
const element = try alloc.create(Element);
|
||||
element.* = Element.init(try alloc.dupe(u8, tag), alloc);
|
||||
|
||||
while (ctx.eatWs()) {
|
||||
const attr = (try tryParseAttr(ctx, alloc)) orelse break;
|
||||
try element.attributes.append(attr);
|
||||
}
|
||||
|
||||
if (ctx.eatStr("/>")) {
|
||||
return element;
|
||||
}
|
||||
|
||||
try ctx.expect('>');
|
||||
|
||||
while (true) {
|
||||
if (ctx.peek() == null) {
|
||||
return error.UnexpectedEof;
|
||||
} else if (ctx.eatStr("</")) {
|
||||
break;
|
||||
}
|
||||
|
||||
const content = try parseContent(ctx, alloc);
|
||||
try element.children.append(content);
|
||||
}
|
||||
|
||||
const closing_tag = try parseNameNoDupe(ctx);
|
||||
if (!std.mem.eql(u8, tag, closing_tag)) {
|
||||
return error.NonMatchingClosingTag;
|
||||
}
|
||||
|
||||
_ = ctx.eatWs();
|
||||
try ctx.expect('>');
|
||||
return element;
|
||||
}
|
||||
|
||||
test "tryParseElement" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<= a='b'/>");
|
||||
try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc));
|
||||
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<python size='15' color = \"green\"/>");
|
||||
const elem = try tryParseElement(&ctx, alloc);
|
||||
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
||||
|
||||
const size_attr = elem.?.attributes.items[0];
|
||||
try testing.expectEqualSlices(u8, size_attr.name, "size");
|
||||
try testing.expectEqualSlices(u8, size_attr.value, "15");
|
||||
|
||||
const color_attr = elem.?.attributes.items[1];
|
||||
try testing.expectEqualSlices(u8, color_attr.name, "color");
|
||||
try testing.expectEqualSlices(u8, color_attr.value, "green");
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<python>test</python>");
|
||||
const elem = try tryParseElement(&ctx, alloc);
|
||||
try testing.expectEqualSlices(u8, elem.?.tag, "python");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test");
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>");
|
||||
const elem = try tryParseElement(&ctx, alloc);
|
||||
try testing.expectEqualSlices(u8, elem.?.tag, "a");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[2].CharData, "d");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[3].Element.tag, "e");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[4].CharData, "f");
|
||||
try testing.expectEqualSlices(u8, elem.?.children.items[5].Comment, "g");
|
||||
}
|
||||
}
|
||||
|
||||
fn tryParseProlog(ctx: *ParseContext, alloc: Allocator) !?*XmlDecl {
|
||||
const start = ctx.offset;
|
||||
if (!ctx.eatStr("<?") or !mem.eql(u8, try parseNameNoDupe(ctx), "xml")) {
|
||||
ctx.offset = start;
|
||||
return null;
|
||||
}
|
||||
|
||||
const decl = try alloc.create(XmlDecl);
|
||||
decl.encoding = null;
|
||||
decl.standalone = null;
|
||||
|
||||
// Version info is mandatory
|
||||
try ctx.expectWs();
|
||||
try ctx.expectStr("version");
|
||||
decl.version = try parseEqAttrValue(ctx, alloc);
|
||||
|
||||
if (ctx.eatWs()) {
|
||||
// Optional encoding and standalone info
|
||||
var require_ws = false;
|
||||
|
||||
if (ctx.eatStr("encoding")) {
|
||||
decl.encoding = try parseEqAttrValue(ctx, alloc);
|
||||
require_ws = true;
|
||||
}
|
||||
|
||||
if (require_ws == ctx.eatWs() and ctx.eatStr("standalone")) {
|
||||
const standalone = try parseEqAttrValue(ctx, alloc);
|
||||
if (std.mem.eql(u8, standalone, "yes")) {
|
||||
decl.standalone = true;
|
||||
} else if (std.mem.eql(u8, standalone, "no")) {
|
||||
decl.standalone = false;
|
||||
} else {
|
||||
return error.InvalidStandaloneValue;
|
||||
}
|
||||
}
|
||||
|
||||
_ = ctx.eatWs();
|
||||
}
|
||||
|
||||
try ctx.expectStr("?>");
|
||||
return decl;
|
||||
}
|
||||
|
||||
test "tryParseProlog" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<?xmla version='aa'?>");
|
||||
try testing.expectEqual(@as(?*XmlDecl, null), try tryParseProlog(&ctx, alloc));
|
||||
try testing.expectEqual(@as(?u8, '<'), ctx.peek());
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<?xml version='aa'?>");
|
||||
const decl = try tryParseProlog(&ctx, alloc);
|
||||
try testing.expectEqualSlices(u8, "aa", decl.?.version);
|
||||
try testing.expectEqual(@as(?[]const u8, null), decl.?.encoding);
|
||||
try testing.expectEqual(@as(?bool, null), decl.?.standalone);
|
||||
}
|
||||
|
||||
{
|
||||
var ctx = ParseContext.init("<?xml version=\"aa\" encoding = 'bbb' standalone \t = 'yes'?>");
|
||||
const decl = try tryParseProlog(&ctx, alloc);
|
||||
try testing.expectEqualSlices(u8, "aa", decl.?.version);
|
||||
try testing.expectEqualSlices(u8, "bbb", decl.?.encoding.?);
|
||||
try testing.expectEqual(@as(?bool, true), decl.?.standalone.?);
|
||||
}
|
||||
}
|
||||
|
||||
fn trySkipComments(ctx: *ParseContext, alloc: Allocator) !void {
|
||||
while (try tryParseComment(ctx, alloc)) |_| {
|
||||
_ = ctx.eatWs();
|
||||
}
|
||||
}
|
||||
|
||||
fn tryParseComment(ctx: *ParseContext, alloc: Allocator) !?[]const u8 {
|
||||
if (!ctx.eatStr("<!--")) return null;
|
||||
|
||||
const begin = ctx.offset;
|
||||
while (!ctx.eatStr("-->")) {
|
||||
_ = ctx.consume() catch return error.UnclosedComment;
|
||||
}
|
||||
|
||||
const end = ctx.offset - "-->".len;
|
||||
return try alloc.dupe(u8, ctx.source[begin..end]);
|
||||
}
|
||||
|
||||
fn unescapeEntity(text: []const u8) !u8 {
|
||||
const EntitySubstition = struct { text: []const u8, replacement: u8 };
|
||||
|
||||
const entities = [_]EntitySubstition{
|
||||
.{ .text = "<", .replacement = '<' },
|
||||
.{ .text = ">", .replacement = '>' },
|
||||
.{ .text = "&", .replacement = '&' },
|
||||
.{ .text = "'", .replacement = '\'' },
|
||||
.{ .text = """, .replacement = '"' },
|
||||
};
|
||||
|
||||
for (entities) |entity| {
|
||||
if (std.mem.eql(u8, text, entity.text)) return entity.replacement;
|
||||
}
|
||||
|
||||
return error.InvalidEntity;
|
||||
}
|
||||
|
||||
fn dupeAndUnescape(alloc: Allocator, text: []const u8) ![]const u8 {
|
||||
const str = try alloc.alloc(u8, text.len);
|
||||
|
||||
var j: usize = 0;
|
||||
var i: usize = 0;
|
||||
while (i < text.len) : (j += 1) {
|
||||
if (text[i] == '&') {
|
||||
const entity_end = 1 + (mem.indexOfScalarPos(u8, text, i, ';') orelse return error.InvalidEntity);
|
||||
str[j] = try unescapeEntity(text[i..entity_end]);
|
||||
i = entity_end;
|
||||
} else {
|
||||
str[j] = text[i];
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return alloc.shrink(str, j);
|
||||
}
|
||||
|
||||
test "dupeAndUnescape" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
||||
try testing.expectEqualSlices(u8, "test", try dupeAndUnescape(alloc, "test"));
|
||||
try testing.expectEqualSlices(u8, "a<b&c>d\"e'f<", try dupeAndUnescape(alloc, "a<b&c>d"e'f<"));
|
||||
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&"));
|
||||
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&&"));
|
||||
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&test;"));
|
||||
try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&boa"));
|
||||
}
|
||||
|
||||
test "Top level comments" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
||||
const doc = try parse(alloc, "<?xml version='aa'?><!--comment--><python color='green'/><!--another comment-->");
|
||||
try testing.expectEqualSlices(u8, "python", doc.root.tag);
|
||||
}
|
Loading…
Reference in New Issue
Block a user