Compare commits
	
		
			5 commits
		
	
	
		
			5bb382bda3
			...
			ca801799fc
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| ca801799fc | |||
| f374df3fa1 | |||
| 744d834cfd | |||
| c9369504fa | |||
| 4606205b82 | 
					 6 changed files with 717 additions and 40 deletions
				
			
		
							
								
								
									
										42
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										42
									
								
								README.md
									
										
									
									
									
								
							|  | @ -2,13 +2,11 @@ | |||
| 
 | ||||
| [](https://drone.lerch.org/api/badges/lobo/aws-sdk-for-zig/) | ||||
| 
 | ||||
| 
 | ||||
| ### NOTE: All tests pass, but credentials currently must be passed through environment | ||||
| 
 | ||||
| This SDK currently supports all AWS services except EC2 and S3. These two | ||||
| services only support XML, and zig 0.8.0 and master both trigger compile | ||||
| errors while incorporating the XML parser. S3 also requires some plumbing | ||||
| tweaks in the signature calculation. Examples of usage are in src/main.zig. | ||||
| services only support XML, and zig 0.9.0 and master both trigger compile | ||||
| errors while incorporating the XML parser in conjunction with a process | ||||
| to fill the types. S3 also requires some plumbing tweaks in the signature | ||||
| calculation. Examples of usage are in src/main.zig. | ||||
| 
 | ||||
| Current executable size for the demo is 953k (90k of which is the AWS PEM file) | ||||
| after compiling with -Drelease-safe and | ||||
|  | @ -43,37 +41,33 @@ supersede all other configuration. Note that an alternative endpoint may | |||
| require passing in a client option to specify an different TLS root certificate | ||||
| (pass null to disable certificate verification). | ||||
| 
 | ||||
| Given that credential handling is still very basic, you may want to look at | ||||
| the [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) if | ||||
| your needs include something more robust. Note that that branch supports | ||||
| x86_64 linux only. | ||||
| The [old branch](https://github.com/elerch/aws-sdk-for-zig/tree/aws-crt) exists | ||||
| for posterity, and supports x86_64 linux. This branch is recommended moving | ||||
| forward. | ||||
| 
 | ||||
| ## Limitations | ||||
| 
 | ||||
| There are many nuances of AWS V4 signature calculation. S3 is not supported | ||||
| because it uses many of these test cases. STS tokens using a session token | ||||
| are not yet implemented, though should be trivial. I have also seen a few | ||||
| service errors caused by discrepancies in signatures, though I don't know yet | ||||
| if this was an issue in the service itself (has not repro'd) or if there | ||||
| is a latent bug. | ||||
| 
 | ||||
| Only environment variable based credentials can be used at the moment. | ||||
| because it uses many of these test cases. WebIdentityToken is not yet | ||||
| implemented. | ||||
| 
 | ||||
| TODO List: | ||||
| 
 | ||||
| * Add option to cache signature keys | ||||
| * Implement credentials provider | ||||
| * Implement jitter/exponential backoff | ||||
| * Implement timeouts and other TODO's in the code | ||||
| * To work around compiler issues, the best option may be to convert from | ||||
|   Xml to json, then parse from there. This will be pursued first. It may need | ||||
|   to wait for zig 0.10.0 when self-hosted compiler is likely to be completed | ||||
|   (zig 0.10.0 eta May 2022) discovered. If we need to wait, S3, EC2 and other | ||||
|   restXml protocols will be blocked. | ||||
| * Implement [AWS restXml protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-restxml-protocol.html). | ||||
|   Includes S3. Total service count 4. This may be blocked due to the same issue as EC2. | ||||
| * Implement [AWS EC2 query protocol](https://awslabs.github.io/smithy/1.0/spec/aws/aws-ec2-query-protocol.html). | ||||
|   Includes EC2. Total service count 1. This may be blocked on a compiler bug, | ||||
|   though has not been tested with zig 0.9.0. It may need to wait for zig 0.10.0 | ||||
|   when self-hosted compiler is likely to be completed (zig 0.10.0 eta May 2022) | ||||
|   discovered. More details and llvm ir log can be found in the | ||||
|   though has not been tested with zig 0.9.0. More details and llvm ir log can be found in the | ||||
|   [XML branch](https://git.lerch.org/lobo/aws-sdk-for-zig/src/branch/xml). | ||||
| * Implement sigv4a signing | ||||
| * Implement jitter/exponential backoff | ||||
| * Implement timeouts and other TODO's in the code | ||||
| * Add option to cache signature keys | ||||
| 
 | ||||
| Compiler wishlist/watchlist: | ||||
| 
 | ||||
|  |  | |||
|  | @ -544,12 +544,12 @@ fn generateSimpleTypeFor(_: anytype, type_name: []const u8, writer: anytype) !vo | |||
| } | ||||
| fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, type_type_name: []const u8, writer: anytype, state: GenerationState) anyerror!void { | ||||
|     _ = shape_id; | ||||
|     const Mapping = struct { snake: []const u8, json: []const u8 }; | ||||
|     var json_field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len); | ||||
|     const Mapping = struct { snake: []const u8, original: []const u8 }; | ||||
|     var field_name_mappings = try std.ArrayList(Mapping).initCapacity(state.allocator, members.len); | ||||
|     defer { | ||||
|         for (json_field_name_mappings.items) |mapping| | ||||
|         for (field_name_mappings.items) |mapping| | ||||
|             state.allocator.free(mapping.snake); | ||||
|         json_field_name_mappings.deinit(); | ||||
|         field_name_mappings.deinit(); | ||||
|     } | ||||
|     // There is an httpQueryParams trait as well, but nobody is using it. API GW | ||||
|     // pretends to, but it's an empty map | ||||
|  | @ -591,15 +591,19 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty | |||
|             switch (trait) { | ||||
|                 .json_name => { | ||||
|                     found_name_trait = true; | ||||
|                     json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.json_name }); | ||||
|                     field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.json_name }); | ||||
|                 }, | ||||
|                 .http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_query }), | ||||
|                 .http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = trait.http_header }), | ||||
|                 .xml_name => { | ||||
|                     found_name_trait = true; | ||||
|                     field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.xml_name }); | ||||
|                 }, | ||||
|                 .http_query => http_query_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_query }), | ||||
|                 .http_header => http_header_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = trait.http_header }), | ||||
|                 else => {}, | ||||
|             } | ||||
|         } | ||||
|         if (!found_name_trait) | ||||
|             json_field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .json = member.name }); | ||||
|             field_name_mappings.appendAssumeCapacity(.{ .snake = try state.allocator.dupe(u8, snake_case_member), .original = member.name }); | ||||
|         defer state.allocator.free(snake_case_member); | ||||
|         try outputIndent(child_state, writer); | ||||
|         const member_name = avoidReserved(snake_case_member); | ||||
|  | @ -637,11 +641,11 @@ fn generateComplexTypeFor(shape_id: []const u8, members: []smithy.TypeMember, ty | |||
|     // | ||||
|     try writer.writeByte('\n'); | ||||
|     try outputIndent(child_state, writer); | ||||
|     _ = try writer.write("pub fn jsonFieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n"); | ||||
|     _ = try writer.write("pub fn fieldNameFor(_: @This(), comptime field_name: []const u8) []const u8 {\n"); | ||||
|     var grandchild_state = child_state; | ||||
|     grandchild_state.indent_level += 1; | ||||
|     // We need to force output here becaseu we're referencing the field in the return statement below | ||||
|     try writeMappings(grandchild_state, "", "mappings", json_field_name_mappings, true, writer); | ||||
|     try writeMappings(grandchild_state, "", "mappings", field_name_mappings, true, writer); | ||||
|     try outputIndent(grandchild_state, writer); | ||||
|     _ = try writer.write("return @field(mappings, field_name);\n"); | ||||
|     try outputIndent(child_state, writer); | ||||
|  | @ -667,7 +671,7 @@ fn writeStringify(state: GenerationState, fields: [][]const u8, writer: anytype) | |||
|             try outputIndent(child_state, writer); | ||||
|             try writer.print("if (std.mem.eql(u8, \"{s}\", field_name))\n", .{field}); | ||||
|             try outputIndent(return_state, writer); | ||||
|             try writer.print("return try serializeMap(self.{s}, self.jsonFieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field }); | ||||
|             try writer.print("return try serializeMap(self.{s}, self.fieldNameFor(\"{s}\"), options, out_stream);\n", .{ field, field }); | ||||
|         } | ||||
|         try outputIndent(child_state, writer); | ||||
|         _ = try writer.write("return false;\n"); | ||||
|  | @ -690,7 +694,7 @@ fn writeMappings(state: GenerationState, @"pub": []const u8, mapping_name: []con | |||
|     child_state.indent_level += 1; | ||||
|     for (mappings.items) |mapping| { | ||||
|         try outputIndent(child_state, writer); | ||||
|         try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.json }); | ||||
|         try writer.print(".{s} = \"{s}\",\n", .{ avoidReserved(mapping.snake), mapping.original }); | ||||
|     } | ||||
|     try outputIndent(state, writer); | ||||
|     _ = try writer.write("};\n"); | ||||
|  |  | |||
|  | @ -96,6 +96,7 @@ pub const TraitType = enum { | |||
|     http_label, | ||||
|     http_query, | ||||
|     json_name, | ||||
|     xml_name, | ||||
|     required, | ||||
|     documentation, | ||||
|     pattern, | ||||
|  | @ -118,6 +119,7 @@ pub const Trait = union(TraitType) { | |||
|     aws_protocol: AwsProtocol, | ||||
|     ec2_query_name: []const u8, | ||||
|     json_name: []const u8, | ||||
|     xml_name: []const u8, | ||||
|     http: struct { | ||||
|         method: []const u8, | ||||
|         uri: []const u8, | ||||
|  | @ -565,6 +567,8 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra | |||
|     } | ||||
|     if (std.mem.eql(u8, trait_type, "smithy.api#jsonName")) | ||||
|         return Trait{ .json_name = value.String }; | ||||
|     if (std.mem.eql(u8, trait_type, "smithy.api#xmlName")) | ||||
|         return Trait{ .xml_name = value.String }; | ||||
|     if (std.mem.eql(u8, trait_type, "smithy.api#httpQuery")) | ||||
|         return Trait{ .http_query = value.String }; | ||||
|     if (std.mem.eql(u8, trait_type, "smithy.api#httpHeader")) | ||||
|  | @ -617,7 +621,6 @@ fn getTrait(trait_type: []const u8, value: std.json.Value) SmithyParseError!?Tra | |||
|         \\smithy.api#timestampFormat | ||||
|         \\smithy.api#xmlAttribute | ||||
|         \\smithy.api#xmlFlattened | ||||
|         \\smithy.api#xmlName | ||||
|         \\smithy.waiters#waitable | ||||
|     ; | ||||
|     var iterator = std.mem.split(u8, list, "\n"); | ||||
|  |  | |||
|  | @ -432,7 +432,7 @@ fn buildPath(allocator: std.mem.Allocator, raw_uri: []const u8, comptime ActionR | |||
|                 in_var = false; | ||||
|                 const replacement_var = raw_uri[start..inx]; | ||||
|                 inline for (std.meta.fields(ActionRequest)) |field| { | ||||
|                     if (std.mem.eql(u8, request.jsonFieldNameFor(field.name), replacement_var)) { | ||||
|                     if (std.mem.eql(u8, request.fieldNameFor(field.name), replacement_var)) { | ||||
|                         var replacement_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len); | ||||
|                         defer replacement_buffer.deinit(); | ||||
|                         var encoded_buffer = try std.ArrayList(u8).initCapacity(allocator, raw_uri.len); | ||||
|  |  | |||
|  | @ -2871,8 +2871,8 @@ pub fn stringify( | |||
|                     field_written = try value.jsonStringifyField(Field.name, child_options, out_stream); | ||||
| 
 | ||||
|                 if (!field_written) { | ||||
|                     if (comptime std.meta.trait.hasFn("jsonFieldNameFor")(T)) { | ||||
|                         const name = value.jsonFieldNameFor(Field.name); | ||||
|                     if (comptime std.meta.trait.hasFn("fieldNameFor")(T)) { | ||||
|                         const name = value.fieldNameFor(Field.name); | ||||
|                         try stringify(name, options, out_stream); | ||||
|                     } else { | ||||
|                         try stringify(Field.name, options, out_stream); | ||||
|  |  | |||
							
								
								
									
										676
									
								
								src/xml.zig
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										676
									
								
								src/xml.zig
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,676 @@ | |||
| // File sourced from: | ||||
| // https://github.com/Snektron/vulkan-zig/blob/797ae8af88e84753af9640266de61a985b76b580/generator/xml.zig | ||||
| const std = @import("std"); | ||||
| const mem = std.mem; | ||||
| const testing = std.testing; | ||||
| const Allocator = mem.Allocator; | ||||
| const ArenaAllocator = std.heap.ArenaAllocator; | ||||
| const ArrayList = std.ArrayList; | ||||
| 
 | ||||
| pub const Attribute = struct { | ||||
|     name: []const u8, | ||||
|     value: []const u8, | ||||
| }; | ||||
| 
 | ||||
| pub const Content = union(enum) { | ||||
|     CharData: []const u8, | ||||
|     Comment: []const u8, | ||||
|     Element: *Element, | ||||
| }; | ||||
| 
 | ||||
| pub const Element = struct { | ||||
|     pub const AttributeList = ArrayList(*Attribute); | ||||
|     pub const ContentList = ArrayList(Content); | ||||
| 
 | ||||
|     tag: []const u8, | ||||
|     attributes: AttributeList, | ||||
|     children: ContentList, | ||||
| 
 | ||||
|     fn init(tag: []const u8, alloc: Allocator) Element { | ||||
|         return .{ | ||||
|             .tag = tag, | ||||
|             .attributes = AttributeList.init(alloc), | ||||
|             .children = ContentList.init(alloc), | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     pub fn getAttribute(self: *Element, attrib_name: []const u8) ?[]const u8 { | ||||
|         for (self.attributes.items) |child| { | ||||
|             if (mem.eql(u8, child.name, attrib_name)) { | ||||
|                 return child.value; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         return null; | ||||
|     } | ||||
| 
 | ||||
|     pub fn getCharData(self: *Element, child_tag: []const u8) ?[]const u8 { | ||||
|         const child = self.findChildByTag(child_tag) orelse return null; | ||||
|         if (child.children.items.len != 1) { | ||||
|             return null; | ||||
|         } | ||||
| 
 | ||||
|         return switch (child.children.items[0]) { | ||||
|             .CharData => |char_data| char_data, | ||||
|             else => null, | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     pub fn iterator(self: *Element) ChildIterator { | ||||
|         return .{ | ||||
|             .items = self.children.items, | ||||
|             .i = 0, | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     pub fn elements(self: *Element) ChildElementIterator { | ||||
|         return .{ | ||||
|             .inner = self.iterator(), | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     pub fn findChildByTag(self: *Element, tag: []const u8) !?*Element { | ||||
|         return try self.findChildrenByTag(tag).next(); | ||||
|     } | ||||
| 
 | ||||
|     pub fn findChildrenByTag(self: *Element, tag: []const u8) FindChildrenByTagIterator { | ||||
|         return .{ | ||||
|             .inner = self.elements(), | ||||
|             .tag = tag, | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     pub const ChildIterator = struct { | ||||
|         items: []Content, | ||||
|         i: usize, | ||||
| 
 | ||||
|         pub fn next(self: *ChildIterator) ?*Content { | ||||
|             if (self.i < self.items.len) { | ||||
|                 self.i += 1; | ||||
|                 return &self.items[self.i - 1]; | ||||
|             } | ||||
| 
 | ||||
|             return null; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     pub const ChildElementIterator = struct { | ||||
|         inner: ChildIterator, | ||||
| 
 | ||||
|         pub fn next(self: *ChildElementIterator) ?*Element { | ||||
|             while (self.inner.next()) |child| { | ||||
|                 if (child.* != .Element) { | ||||
|                     continue; | ||||
|                 } | ||||
| 
 | ||||
|                 return child.*.Element; | ||||
|             } | ||||
| 
 | ||||
|             return null; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     fn strictEqual(a: []const u8, b: []const u8, _: PredicateOptions) !bool { | ||||
|         return mem.eql(u8, a, b); | ||||
|     } | ||||
|     pub const FindChildrenByTagIterator = struct { | ||||
|         inner: ChildElementIterator, | ||||
|         tag: []const u8, | ||||
|         predicate: fn (a: []const u8, b: []const u8, options: PredicateOptions) anyerror!bool = strictEqual, | ||||
|         predicate_options: PredicateOptions = .{}, | ||||
| 
 | ||||
|         pub fn next(self: *FindChildrenByTagIterator) !?*Element { | ||||
|             while (self.inner.next()) |child| { | ||||
|                 if (!try self.predicate(child.tag, self.tag, self.predicate_options)) { | ||||
|                     continue; | ||||
|                 } | ||||
| 
 | ||||
|                 return child; | ||||
|             } | ||||
| 
 | ||||
|             return null; | ||||
|         } | ||||
|     }; | ||||
| }; | ||||
| 
 | ||||
| pub const PredicateOptions = struct { | ||||
|     allocator: ?std.mem.Allocator = null, | ||||
| }; | ||||
| pub const XmlDecl = struct { | ||||
|     version: []const u8, | ||||
|     encoding: ?[]const u8, | ||||
|     standalone: ?bool, | ||||
| }; | ||||
| 
 | ||||
| pub const Document = struct { | ||||
|     arena: ArenaAllocator, | ||||
|     xml_decl: ?*XmlDecl, | ||||
|     root: *Element, | ||||
| 
 | ||||
|     pub fn deinit(self: Document) void { | ||||
|         var arena = self.arena; // Copy to stack so self can be taken by value. | ||||
|         arena.deinit(); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| const ParseContext = struct { | ||||
|     source: []const u8, | ||||
|     offset: usize, | ||||
|     line: usize, | ||||
|     column: usize, | ||||
| 
 | ||||
|     fn init(source: []const u8) ParseContext { | ||||
|         return .{ | ||||
|             .source = source, | ||||
|             .offset = 0, | ||||
|             .line = 0, | ||||
|             .column = 0, | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     fn peek(self: *ParseContext) ?u8 { | ||||
|         return if (self.offset < self.source.len) self.source[self.offset] else null; | ||||
|     } | ||||
| 
 | ||||
|     fn consume(self: *ParseContext) !u8 { | ||||
|         if (self.offset < self.source.len) { | ||||
|             return self.consumeNoEof(); | ||||
|         } | ||||
| 
 | ||||
|         return error.UnexpectedEof; | ||||
|     } | ||||
| 
 | ||||
|     fn consumeNoEof(self: *ParseContext) u8 { | ||||
|         std.debug.assert(self.offset < self.source.len); | ||||
|         const c = self.source[self.offset]; | ||||
|         self.offset += 1; | ||||
| 
 | ||||
|         if (c == '\n') { | ||||
|             self.line += 1; | ||||
|             self.column = 0; | ||||
|         } else { | ||||
|             self.column += 1; | ||||
|         } | ||||
| 
 | ||||
|         return c; | ||||
|     } | ||||
| 
 | ||||
|     fn eat(self: *ParseContext, char: u8) bool { | ||||
|         self.expect(char) catch return false; | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     fn expect(self: *ParseContext, expected: u8) !void { | ||||
|         if (self.peek()) |actual| { | ||||
|             if (expected != actual) { | ||||
|                 return error.UnexpectedCharacter; | ||||
|             } | ||||
| 
 | ||||
|             _ = self.consumeNoEof(); | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         return error.UnexpectedEof; | ||||
|     } | ||||
| 
 | ||||
|     fn eatStr(self: *ParseContext, text: []const u8) bool { | ||||
|         self.expectStr(text) catch return false; | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     fn expectStr(self: *ParseContext, text: []const u8) !void { | ||||
|         if (self.source.len < self.offset + text.len) { | ||||
|             return error.UnexpectedEof; | ||||
|         } else if (std.mem.startsWith(u8, self.source[self.offset..], text)) { | ||||
|             var i: usize = 0; | ||||
|             while (i < text.len) : (i += 1) { | ||||
|                 _ = self.consumeNoEof(); | ||||
|             } | ||||
| 
 | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         return error.UnexpectedCharacter; | ||||
|     } | ||||
| 
 | ||||
|     fn eatWs(self: *ParseContext) bool { | ||||
|         var ws = false; | ||||
| 
 | ||||
|         while (self.peek()) |ch| { | ||||
|             switch (ch) { | ||||
|                 ' ', '\t', '\n', '\r' => { | ||||
|                     ws = true; | ||||
|                     _ = self.consumeNoEof(); | ||||
|                 }, | ||||
|                 else => break, | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         return ws; | ||||
|     } | ||||
| 
 | ||||
|     fn expectWs(self: *ParseContext) !void { | ||||
|         if (!self.eatWs()) return error.UnexpectedCharacter; | ||||
|     } | ||||
| 
 | ||||
|     fn currentLine(self: ParseContext) []const u8 { | ||||
|         var begin: usize = 0; | ||||
|         if (mem.lastIndexOfScalar(u8, self.source[0..self.offset], '\n')) |prev_nl| { | ||||
|             begin = prev_nl + 1; | ||||
|         } | ||||
| 
 | ||||
|         var end = mem.indexOfScalarPos(u8, self.source, self.offset, '\n') orelse self.source.len; | ||||
|         return self.source[begin..end]; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| test "ParseContext" { | ||||
|     { | ||||
|         var ctx = ParseContext.init("I like pythons"); | ||||
|         try testing.expectEqual(@as(?u8, 'I'), ctx.peek()); | ||||
|         try testing.expectEqual(@as(u8, 'I'), ctx.consumeNoEof()); | ||||
|         try testing.expectEqual(@as(?u8, ' '), ctx.peek()); | ||||
|         try testing.expectEqual(@as(u8, ' '), try ctx.consume()); | ||||
| 
 | ||||
|         try testing.expect(ctx.eat('l')); | ||||
|         try testing.expectEqual(@as(?u8, 'i'), ctx.peek()); | ||||
|         try testing.expectEqual(false, ctx.eat('a')); | ||||
|         try testing.expectEqual(@as(?u8, 'i'), ctx.peek()); | ||||
| 
 | ||||
|         try ctx.expect('i'); | ||||
|         try testing.expectEqual(@as(?u8, 'k'), ctx.peek()); | ||||
|         try testing.expectError(error.UnexpectedCharacter, ctx.expect('a')); | ||||
|         try testing.expectEqual(@as(?u8, 'k'), ctx.peek()); | ||||
| 
 | ||||
|         try testing.expect(ctx.eatStr("ke")); | ||||
|         try testing.expectEqual(@as(?u8, ' '), ctx.peek()); | ||||
| 
 | ||||
|         try testing.expect(ctx.eatWs()); | ||||
|         try testing.expectEqual(@as(?u8, 'p'), ctx.peek()); | ||||
|         try testing.expectEqual(false, ctx.eatWs()); | ||||
|         try testing.expectEqual(@as(?u8, 'p'), ctx.peek()); | ||||
| 
 | ||||
|         try testing.expectEqual(false, ctx.eatStr("aaaaaaaaa")); | ||||
|         try testing.expectEqual(@as(?u8, 'p'), ctx.peek()); | ||||
| 
 | ||||
|         try testing.expectError(error.UnexpectedEof, ctx.expectStr("aaaaaaaaa")); | ||||
|         try testing.expectEqual(@as(?u8, 'p'), ctx.peek()); | ||||
|         try testing.expectError(error.UnexpectedCharacter, ctx.expectStr("pytn")); | ||||
|         try testing.expectEqual(@as(?u8, 'p'), ctx.peek()); | ||||
|         try ctx.expectStr("python"); | ||||
|         try testing.expectEqual(@as(?u8, 's'), ctx.peek()); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init(""); | ||||
|         try testing.expectEqual(ctx.peek(), null); | ||||
|         try testing.expectError(error.UnexpectedEof, ctx.consume()); | ||||
|         try testing.expectEqual(ctx.eat('p'), false); | ||||
|         try testing.expectError(error.UnexpectedEof, ctx.expect('p')); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub const ParseError = error{ | ||||
|     IllegalCharacter, | ||||
|     UnexpectedEof, | ||||
|     UnexpectedCharacter, | ||||
|     UnclosedValue, | ||||
|     UnclosedComment, | ||||
|     InvalidName, | ||||
|     InvalidEntity, | ||||
|     InvalidStandaloneValue, | ||||
|     NonMatchingClosingTag, | ||||
|     InvalidDocument, | ||||
|     OutOfMemory, | ||||
| }; | ||||
| 
 | ||||
| pub fn parse(backing_allocator: Allocator, source: []const u8) !Document { | ||||
|     var ctx = ParseContext.init(source); | ||||
|     return try parseDocument(&ctx, backing_allocator); | ||||
| } | ||||
| 
 | ||||
| fn parseDocument(ctx: *ParseContext, backing_allocator: Allocator) !Document { | ||||
|     var doc = Document{ | ||||
|         .arena = ArenaAllocator.init(backing_allocator), | ||||
|         .xml_decl = null, | ||||
|         .root = undefined, | ||||
|     }; | ||||
| 
 | ||||
|     errdefer doc.deinit(); | ||||
| 
 | ||||
|     const allocator = doc.arena.allocator(); | ||||
| 
 | ||||
|     try trySkipComments(ctx, allocator); | ||||
| 
 | ||||
|     doc.xml_decl = try tryParseProlog(ctx, allocator); | ||||
|     _ = ctx.eatWs(); | ||||
|     try trySkipComments(ctx, allocator); | ||||
| 
 | ||||
|     doc.root = (try tryParseElement(ctx, allocator)) orelse return error.InvalidDocument; | ||||
|     _ = ctx.eatWs(); | ||||
|     try trySkipComments(ctx, allocator); | ||||
| 
 | ||||
|     if (ctx.peek() != null) return error.InvalidDocument; | ||||
| 
 | ||||
|     return doc; | ||||
| } | ||||
| 
 | ||||
| fn parseAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 { | ||||
|     const quote = try ctx.consume(); | ||||
|     if (quote != '"' and quote != '\'') return error.UnexpectedCharacter; | ||||
| 
 | ||||
|     const begin = ctx.offset; | ||||
| 
 | ||||
|     while (true) { | ||||
|         const c = ctx.consume() catch return error.UnclosedValue; | ||||
|         if (c == quote) break; | ||||
|     } | ||||
| 
 | ||||
|     const end = ctx.offset - 1; | ||||
| 
 | ||||
|     return try dupeAndUnescape(alloc, ctx.source[begin..end]); | ||||
| } | ||||
| 
 | ||||
| fn parseEqAttrValue(ctx: *ParseContext, alloc: Allocator) ![]const u8 { | ||||
|     _ = ctx.eatWs(); | ||||
|     try ctx.expect('='); | ||||
|     _ = ctx.eatWs(); | ||||
| 
 | ||||
|     return try parseAttrValue(ctx, alloc); | ||||
| } | ||||
| 
 | ||||
| fn parseNameNoDupe(ctx: *ParseContext) ![]const u8 { | ||||
|     // XML's spec on names is very long, so to make this easier | ||||
|     // we just take any character that is not special and not whitespace | ||||
|     const begin = ctx.offset; | ||||
| 
 | ||||
|     while (ctx.peek()) |ch| { | ||||
|         switch (ch) { | ||||
|             ' ', '\t', '\n', '\r' => break, | ||||
|             '&', '"', '\'', '<', '>', '?', '=', '/' => break, | ||||
|             else => _ = ctx.consumeNoEof(), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     const end = ctx.offset; | ||||
|     if (begin == end) return error.InvalidName; | ||||
| 
 | ||||
|     return ctx.source[begin..end]; | ||||
| } | ||||
| 
 | ||||
| fn tryParseCharData(ctx: *ParseContext, alloc: Allocator) !?[]const u8 { | ||||
|     const begin = ctx.offset; | ||||
| 
 | ||||
|     while (ctx.peek()) |ch| { | ||||
|         switch (ch) { | ||||
|             '<' => break, | ||||
|             else => _ = ctx.consumeNoEof(), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     const end = ctx.offset; | ||||
|     if (begin == end) return null; | ||||
| 
 | ||||
|     return try dupeAndUnescape(alloc, ctx.source[begin..end]); | ||||
| } | ||||
| 
 | ||||
| fn parseContent(ctx: *ParseContext, alloc: Allocator) ParseError!Content { | ||||
|     if (try tryParseCharData(ctx, alloc)) |cd| { | ||||
|         return Content{ .CharData = cd }; | ||||
|     } else if (try tryParseComment(ctx, alloc)) |comment| { | ||||
|         return Content{ .Comment = comment }; | ||||
|     } else if (try tryParseElement(ctx, alloc)) |elem| { | ||||
|         return Content{ .Element = elem }; | ||||
|     } else { | ||||
|         return error.UnexpectedCharacter; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn tryParseAttr(ctx: *ParseContext, alloc: Allocator) !?*Attribute { | ||||
|     const name = parseNameNoDupe(ctx) catch return null; | ||||
|     _ = ctx.eatWs(); | ||||
|     try ctx.expect('='); | ||||
|     _ = ctx.eatWs(); | ||||
|     const value = try parseAttrValue(ctx, alloc); | ||||
| 
 | ||||
|     const attr = try alloc.create(Attribute); | ||||
|     attr.name = try alloc.dupe(u8, name); | ||||
|     attr.value = value; | ||||
|     return attr; | ||||
| } | ||||
| 
 | ||||
| fn tryParseElement(ctx: *ParseContext, alloc: Allocator) !?*Element { | ||||
|     const start = ctx.offset; | ||||
|     if (!ctx.eat('<')) return null; | ||||
|     const tag = parseNameNoDupe(ctx) catch { | ||||
|         ctx.offset = start; | ||||
|         return null; | ||||
|     }; | ||||
| 
 | ||||
|     const element = try alloc.create(Element); | ||||
|     element.* = Element.init(try alloc.dupe(u8, tag), alloc); | ||||
| 
 | ||||
|     while (ctx.eatWs()) { | ||||
|         const attr = (try tryParseAttr(ctx, alloc)) orelse break; | ||||
|         try element.attributes.append(attr); | ||||
|     } | ||||
| 
 | ||||
|     if (ctx.eatStr("/>")) { | ||||
|         return element; | ||||
|     } | ||||
| 
 | ||||
|     try ctx.expect('>'); | ||||
| 
 | ||||
|     while (true) { | ||||
|         if (ctx.peek() == null) { | ||||
|             return error.UnexpectedEof; | ||||
|         } else if (ctx.eatStr("</")) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         const content = try parseContent(ctx, alloc); | ||||
|         try element.children.append(content); | ||||
|     } | ||||
| 
 | ||||
|     const closing_tag = try parseNameNoDupe(ctx); | ||||
|     if (!std.mem.eql(u8, tag, closing_tag)) { | ||||
|         return error.NonMatchingClosingTag; | ||||
|     } | ||||
| 
 | ||||
|     _ = ctx.eatWs(); | ||||
|     try ctx.expect('>'); | ||||
|     return element; | ||||
| } | ||||
| 
 | ||||
| test "tryParseElement" { | ||||
|     var arena = std.heap.ArenaAllocator.init(testing.allocator); | ||||
|     defer arena.deinit(); | ||||
|     const alloc = arena.allocator(); | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init("<= a='b'/>"); | ||||
|         try testing.expectEqual(@as(?*Element, null), try tryParseElement(&ctx, alloc)); | ||||
|         try testing.expectEqual(@as(?u8, '<'), ctx.peek()); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init("<python size='15' color = \"green\"/>"); | ||||
|         const elem = try tryParseElement(&ctx, alloc); | ||||
|         try testing.expectEqualSlices(u8, elem.?.tag, "python"); | ||||
| 
 | ||||
|         const size_attr = elem.?.attributes.items[0]; | ||||
|         try testing.expectEqualSlices(u8, size_attr.name, "size"); | ||||
|         try testing.expectEqualSlices(u8, size_attr.value, "15"); | ||||
| 
 | ||||
|         const color_attr = elem.?.attributes.items[1]; | ||||
|         try testing.expectEqualSlices(u8, color_attr.name, "color"); | ||||
|         try testing.expectEqualSlices(u8, color_attr.value, "green"); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init("<python>test</python>"); | ||||
|         const elem = try tryParseElement(&ctx, alloc); | ||||
|         try testing.expectEqualSlices(u8, elem.?.tag, "python"); | ||||
|         try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "test"); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init("<a>b<c/>d<e/>f<!--g--></a>"); | ||||
|         const elem = try tryParseElement(&ctx, alloc); | ||||
|         try testing.expectEqualSlices(u8, elem.?.tag, "a"); | ||||
|         try testing.expectEqualSlices(u8, elem.?.children.items[0].CharData, "b"); | ||||
|         try testing.expectEqualSlices(u8, elem.?.children.items[1].Element.tag, "c"); | ||||
|         try testing.expectEqualSlices(u8, elem.?.children.items[2].CharData, "d"); | ||||
|         try testing.expectEqualSlices(u8, elem.?.children.items[3].Element.tag, "e"); | ||||
|         try testing.expectEqualSlices(u8, elem.?.children.items[4].CharData, "f"); | ||||
|         try testing.expectEqualSlices(u8, elem.?.children.items[5].Comment, "g"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn tryParseProlog(ctx: *ParseContext, alloc: Allocator) !?*XmlDecl { | ||||
|     const start = ctx.offset; | ||||
|     if (!ctx.eatStr("<?") or !mem.eql(u8, try parseNameNoDupe(ctx), "xml")) { | ||||
|         ctx.offset = start; | ||||
|         return null; | ||||
|     } | ||||
| 
 | ||||
|     const decl = try alloc.create(XmlDecl); | ||||
|     decl.encoding = null; | ||||
|     decl.standalone = null; | ||||
| 
 | ||||
|     // Version info is mandatory | ||||
|     try ctx.expectWs(); | ||||
|     try ctx.expectStr("version"); | ||||
|     decl.version = try parseEqAttrValue(ctx, alloc); | ||||
| 
 | ||||
|     if (ctx.eatWs()) { | ||||
|         // Optional encoding and standalone info | ||||
|         var require_ws = false; | ||||
| 
 | ||||
|         if (ctx.eatStr("encoding")) { | ||||
|             decl.encoding = try parseEqAttrValue(ctx, alloc); | ||||
|             require_ws = true; | ||||
|         } | ||||
| 
 | ||||
|         if (require_ws == ctx.eatWs() and ctx.eatStr("standalone")) { | ||||
|             const standalone = try parseEqAttrValue(ctx, alloc); | ||||
|             if (std.mem.eql(u8, standalone, "yes")) { | ||||
|                 decl.standalone = true; | ||||
|             } else if (std.mem.eql(u8, standalone, "no")) { | ||||
|                 decl.standalone = false; | ||||
|             } else { | ||||
|                 return error.InvalidStandaloneValue; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         _ = ctx.eatWs(); | ||||
|     } | ||||
| 
 | ||||
|     try ctx.expectStr("?>"); | ||||
|     return decl; | ||||
| } | ||||
| 
 | ||||
| test "tryParseProlog" { | ||||
|     var arena = std.heap.ArenaAllocator.init(testing.allocator); | ||||
|     defer arena.deinit(); | ||||
|     const alloc = arena.allocator(); | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init("<?xmla version='aa'?>"); | ||||
|         try testing.expectEqual(@as(?*XmlDecl, null), try tryParseProlog(&ctx, alloc)); | ||||
|         try testing.expectEqual(@as(?u8, '<'), ctx.peek()); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init("<?xml version='aa'?>"); | ||||
|         const decl = try tryParseProlog(&ctx, alloc); | ||||
|         try testing.expectEqualSlices(u8, "aa", decl.?.version); | ||||
|         try testing.expectEqual(@as(?[]const u8, null), decl.?.encoding); | ||||
|         try testing.expectEqual(@as(?bool, null), decl.?.standalone); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         var ctx = ParseContext.init("<?xml version=\"aa\" encoding = 'bbb' standalone   \t =   'yes'?>"); | ||||
|         const decl = try tryParseProlog(&ctx, alloc); | ||||
|         try testing.expectEqualSlices(u8, "aa", decl.?.version); | ||||
|         try testing.expectEqualSlices(u8, "bbb", decl.?.encoding.?); | ||||
|         try testing.expectEqual(@as(?bool, true), decl.?.standalone.?); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn trySkipComments(ctx: *ParseContext, alloc: Allocator) !void { | ||||
|     while (try tryParseComment(ctx, alloc)) |_| { | ||||
|         _ = ctx.eatWs(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn tryParseComment(ctx: *ParseContext, alloc: Allocator) !?[]const u8 { | ||||
|     if (!ctx.eatStr("<!--")) return null; | ||||
| 
 | ||||
|     const begin = ctx.offset; | ||||
|     while (!ctx.eatStr("-->")) { | ||||
|         _ = ctx.consume() catch return error.UnclosedComment; | ||||
|     } | ||||
| 
 | ||||
|     const end = ctx.offset - "-->".len; | ||||
|     return try alloc.dupe(u8, ctx.source[begin..end]); | ||||
| } | ||||
| 
 | ||||
| fn unescapeEntity(text: []const u8) !u8 { | ||||
|     const EntitySubstition = struct { text: []const u8, replacement: u8 }; | ||||
| 
 | ||||
|     const entities = [_]EntitySubstition{ | ||||
|         .{ .text = "<", .replacement = '<' }, | ||||
|         .{ .text = ">", .replacement = '>' }, | ||||
|         .{ .text = "&", .replacement = '&' }, | ||||
|         .{ .text = "'", .replacement = '\'' }, | ||||
|         .{ .text = """, .replacement = '"' }, | ||||
|     }; | ||||
| 
 | ||||
|     for (entities) |entity| { | ||||
|         if (std.mem.eql(u8, text, entity.text)) return entity.replacement; | ||||
|     } | ||||
| 
 | ||||
|     return error.InvalidEntity; | ||||
| } | ||||
| 
 | ||||
| fn dupeAndUnescape(alloc: Allocator, text: []const u8) ![]const u8 { | ||||
|     const str = try alloc.alloc(u8, text.len); | ||||
| 
 | ||||
|     var j: usize = 0; | ||||
|     var i: usize = 0; | ||||
|     while (i < text.len) : (j += 1) { | ||||
|         if (text[i] == '&') { | ||||
|             const entity_end = 1 + (mem.indexOfScalarPos(u8, text, i, ';') orelse return error.InvalidEntity); | ||||
|             str[j] = try unescapeEntity(text[i..entity_end]); | ||||
|             i = entity_end; | ||||
|         } else { | ||||
|             str[j] = text[i]; | ||||
|             i += 1; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return alloc.shrink(str, j); | ||||
| } | ||||
| 
 | ||||
| test "dupeAndUnescape" { | ||||
|     var arena = std.heap.ArenaAllocator.init(testing.allocator); | ||||
|     defer arena.deinit(); | ||||
|     const alloc = arena.allocator(); | ||||
| 
 | ||||
|     try testing.expectEqualSlices(u8, "test", try dupeAndUnescape(alloc, "test")); | ||||
|     try testing.expectEqualSlices(u8, "a<b&c>d\"e'f<", try dupeAndUnescape(alloc, "a<b&c>d"e'f<")); | ||||
|     try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&")); | ||||
|     try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&&")); | ||||
|     try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&test;")); | ||||
|     try testing.expectError(error.InvalidEntity, dupeAndUnescape(alloc, "python&boa")); | ||||
| } | ||||
| 
 | ||||
| test "Top level comments" { | ||||
|     var arena = std.heap.ArenaAllocator.init(testing.allocator); | ||||
|     defer arena.deinit(); | ||||
|     const alloc = arena.allocator(); | ||||
| 
 | ||||
|     const doc = try parse(alloc, "<?xml version='aa'?><!--comment--><python color='green'/><!--another comment-->"); | ||||
|     try testing.expectEqualSlices(u8, "python", doc.root.tag); | ||||
| } | ||||
		Loading…
	
	Add table
		
		Reference in a new issue