aws-sdk-for-zig/src/json.zig

3054 lines
113 KiB
Zig

// SPDX-License-Identifier: MIT
// Copyright (c) 2015-2021 Zig Contributors
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
// JSON parser conforming to RFC8259.
//
// https://tools.ietf.org/html/rfc8259
const std = @import("std");
const debug = std.debug;
const assert = debug.assert;
const testing = std.testing;
const mem = std.mem;
const maxInt = std.math.maxInt;
pub const WriteStream = @import("json/write_stream.zig").WriteStream;
pub const writeStream = @import("json/write_stream.zig").writeStream;
const StringEscapes = union(enum) {
None,
Some: struct {
size_diff: isize,
},
};
/// Checks to see if a string matches what it would be as a json-encoded string
/// Assumes that `encoded` is a well-formed json string
fn encodesTo(decoded: []const u8, encoded: []const u8) bool {
var i: usize = 0;
var j: usize = 0;
while (i < decoded.len) {
if (j >= encoded.len) return false;
if (encoded[j] != '\\') {
if (decoded[i] != encoded[j]) return false;
j += 1;
i += 1;
} else {
const escape_type = encoded[j + 1];
if (escape_type != 'u') {
const t: u8 = switch (escape_type) {
'\\' => '\\',
'/' => '/',
'n' => '\n',
'r' => '\r',
't' => '\t',
'f' => 12,
'b' => 8,
'"' => '"',
else => unreachable,
};
if (decoded[i] != t) return false;
j += 2;
i += 1;
} else {
var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
j += 6;
if (codepoint >= 0xD800 and codepoint < 0xDC00) {
// surrogate pair
assert(encoded[j] == '\\');
assert(encoded[j + 1] == 'u');
const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff));
j += 6;
}
var buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
if (i + len > decoded.len) return false;
if (!mem.eql(u8, decoded[i .. i + len], buf[0..len])) return false;
i += len;
}
}
}
assert(i == decoded.len);
assert(j == encoded.len);
return true;
}
test "encodesTo" {
// same
try testing.expectEqual(true, encodesTo("false", "false"));
// totally different
try testing.expectEqual(false, encodesTo("false", "true"));
// different lengths
try testing.expectEqual(false, encodesTo("false", "other"));
// with escape
try testing.expectEqual(true, encodesTo("\\", "\\\\"));
try testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape"));
// with unicode
try testing.expectEqual(true, encodesTo("ą", "\\u0105"));
try testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02"));
try testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02"));
}
/// A single token slice into the parent string.
///
/// Use `token.slice()` on the input at the current position to get the current slice.
pub const Token = union(enum) {
ObjectBegin,
ObjectEnd,
ArrayBegin,
ArrayEnd,
String: struct {
/// How many bytes the token is.
count: usize,
/// Whether string contains an escape sequence and cannot be zero-copied
escapes: StringEscapes,
pub fn decodedLength(self: @This()) usize {
return self.count +% switch (self.escapes) {
.None => 0,
.Some => |s| @bitCast(usize, s.size_diff),
};
}
/// Slice into the underlying input string.
pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 {
return input[i - self.count .. i];
}
},
Number: struct {
/// How many bytes the token is.
count: usize,
/// Whether number is simple and can be represented by an integer (i.e. no `.` or `e`)
is_integer: bool,
/// Slice into the underlying input string.
pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 {
return input[i - self.count .. i];
}
},
True,
False,
Null,
};
/// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as
/// they are encountered. No copies or allocations are performed during parsing and the entire
/// parsing state requires ~40-50 bytes of stack space.
///
/// Conforms strictly to RFC8259.
///
/// For a non-byte based wrapper, consider using TokenStream instead.
pub const StreamingParser = struct {
// Current state
state: State,
// How many bytes we have counted for the current token
count: usize,
// What state to follow after parsing a string (either property or value string)
after_string_state: State,
// What state to follow after parsing a value (either top-level or value end)
after_value_state: State,
// If we stopped now, would the complete parsed string to now be a valid json string
complete: bool,
// Current token flags to pass through to the next generated, see Token.
string_escapes: StringEscapes,
// When in .String states, was the previous character a high surrogate?
string_last_was_high_surrogate: bool,
// Used inside of StringEscapeHexUnicode* states
string_unicode_codepoint: u21,
// The first byte needs to be stored to validate 3- and 4-byte sequences.
sequence_first_byte: u8 = undefined,
// When in .Number states, is the number a (still) valid integer?
number_is_integer: bool,
// Bit-stack for nested object/map literals (max 255 nestings).
stack: u256,
stack_used: u8,
const object_bit = 0;
const array_bit = 1;
const max_stack_size = maxInt(u8);
pub fn init() StreamingParser {
var p: StreamingParser = undefined;
p.reset();
return p;
}
pub fn reset(p: *StreamingParser) void {
p.state = .TopLevelBegin;
p.count = 0;
// Set before ever read in main transition function
p.after_string_state = undefined;
p.after_value_state = .ValueEnd; // handle end of values normally
p.stack = 0;
p.stack_used = 0;
p.complete = false;
p.string_escapes = undefined;
p.string_last_was_high_surrogate = undefined;
p.string_unicode_codepoint = undefined;
p.number_is_integer = undefined;
}
pub const State = enum {
// These must be first with these explicit values as we rely on them for indexing the
// bit-stack directly and avoiding a branch.
ObjectSeparator = 0,
ValueEnd = 1,
TopLevelBegin,
TopLevelEnd,
ValueBegin,
ValueBeginNoClosing,
String,
StringUtf8Byte2Of2,
StringUtf8Byte2Of3,
StringUtf8Byte3Of3,
StringUtf8Byte2Of4,
StringUtf8Byte3Of4,
StringUtf8Byte4Of4,
StringEscapeCharacter,
StringEscapeHexUnicode4,
StringEscapeHexUnicode3,
StringEscapeHexUnicode2,
StringEscapeHexUnicode1,
Number,
NumberMaybeDotOrExponent,
NumberMaybeDigitOrDotOrExponent,
NumberFractionalRequired,
NumberFractional,
NumberMaybeExponent,
NumberExponent,
NumberExponentDigitsRequired,
NumberExponentDigits,
TrueLiteral1,
TrueLiteral2,
TrueLiteral3,
FalseLiteral1,
FalseLiteral2,
FalseLiteral3,
FalseLiteral4,
NullLiteral1,
NullLiteral2,
NullLiteral3,
// Only call this function to generate array/object final state.
pub fn fromInt(x: anytype) State {
debug.assert(x == 0 or x == 1);
const T = std.meta.Tag(State);
return @intToEnum(State, @intCast(T, x));
}
};
pub const Error = error{
InvalidTopLevel,
TooManyNestedItems,
TooManyClosingItems,
InvalidValueBegin,
InvalidValueEnd,
UnbalancedBrackets,
UnbalancedBraces,
UnexpectedClosingBracket,
UnexpectedClosingBrace,
InvalidNumber,
InvalidSeparator,
InvalidLiteral,
InvalidEscapeCharacter,
InvalidUnicodeHexSymbol,
InvalidUtf8Byte,
InvalidTopLevelTrailing,
InvalidControlCharacter,
};
/// Give another byte to the parser and obtain any new tokens. This may (rarely) return two
/// tokens. token2 is always null if token1 is null.
///
/// There is currently no error recovery on a bad stream.
pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
token1.* = null;
token2.* = null;
p.count += 1;
// unlikely
if (try p.transition(c, token1)) {
_ = try p.transition(c, token2);
}
}
// Perform a single transition on the state machine and return any possible token.
fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
switch (p.state) {
.TopLevelBegin => switch (c) {
'{' => {
p.stack <<= 1;
p.stack |= object_bit;
p.stack_used += 1;
p.state = .ValueBegin;
p.after_string_state = .ObjectSeparator;
token.* = Token.ObjectBegin;
},
'[' => {
p.stack <<= 1;
p.stack |= array_bit;
p.stack_used += 1;
p.state = .ValueBegin;
p.after_string_state = .ValueEnd;
token.* = Token.ArrayBegin;
},
'-' => {
p.number_is_integer = true;
p.state = .Number;
p.after_value_state = .TopLevelEnd;
p.count = 0;
},
'0' => {
p.number_is_integer = true;
p.state = .NumberMaybeDotOrExponent;
p.after_value_state = .TopLevelEnd;
p.count = 0;
},
'1'...'9' => {
p.number_is_integer = true;
p.state = .NumberMaybeDigitOrDotOrExponent;
p.after_value_state = .TopLevelEnd;
p.count = 0;
},
'"' => {
p.state = .String;
p.after_value_state = .TopLevelEnd;
// We don't actually need the following since after_value_state should override.
p.after_string_state = .ValueEnd;
p.string_escapes = .None;
p.string_last_was_high_surrogate = false;
p.count = 0;
},
't' => {
p.state = .TrueLiteral1;
p.after_value_state = .TopLevelEnd;
p.count = 0;
},
'f' => {
p.state = .FalseLiteral1;
p.after_value_state = .TopLevelEnd;
p.count = 0;
},
'n' => {
p.state = .NullLiteral1;
p.after_value_state = .TopLevelEnd;
p.count = 0;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidTopLevel;
},
},
.TopLevelEnd => switch (c) {
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidTopLevelTrailing;
},
},
.ValueBegin => switch (c) {
// NOTE: These are shared in ValueEnd as well, think we can reorder states to
// be a bit clearer and avoid this duplication.
'}' => {
// unlikely
if (p.stack & 1 != object_bit) {
return error.UnexpectedClosingBrace;
}
if (p.stack_used == 0) {
return error.TooManyClosingItems;
}
p.state = .ValueBegin;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
switch (p.stack_used) {
0 => {
p.complete = true;
p.state = .TopLevelEnd;
},
else => {
p.state = .ValueEnd;
},
}
token.* = Token.ObjectEnd;
},
']' => {
if (p.stack & 1 != array_bit) {
return error.UnexpectedClosingBracket;
}
if (p.stack_used == 0) {
return error.TooManyClosingItems;
}
p.state = .ValueBegin;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
switch (p.stack_used) {
0 => {
p.complete = true;
p.state = .TopLevelEnd;
},
else => {
p.state = .ValueEnd;
},
}
token.* = Token.ArrayEnd;
},
'{' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= object_bit;
p.stack_used += 1;
p.state = .ValueBegin;
p.after_string_state = .ObjectSeparator;
token.* = Token.ObjectBegin;
},
'[' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= array_bit;
p.stack_used += 1;
p.state = .ValueBegin;
p.after_string_state = .ValueEnd;
token.* = Token.ArrayBegin;
},
'-' => {
p.number_is_integer = true;
p.state = .Number;
p.count = 0;
},
'0' => {
p.number_is_integer = true;
p.state = .NumberMaybeDotOrExponent;
p.count = 0;
},
'1'...'9' => {
p.number_is_integer = true;
p.state = .NumberMaybeDigitOrDotOrExponent;
p.count = 0;
},
'"' => {
p.state = .String;
p.string_escapes = .None;
p.string_last_was_high_surrogate = false;
p.count = 0;
},
't' => {
p.state = .TrueLiteral1;
p.count = 0;
},
'f' => {
p.state = .FalseLiteral1;
p.count = 0;
},
'n' => {
p.state = .NullLiteral1;
p.count = 0;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidValueBegin;
},
},
// TODO: A bit of duplication here and in the following state, redo.
.ValueBeginNoClosing => switch (c) {
'{' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= object_bit;
p.stack_used += 1;
p.state = .ValueBegin;
p.after_string_state = .ObjectSeparator;
token.* = Token.ObjectBegin;
},
'[' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= array_bit;
p.stack_used += 1;
p.state = .ValueBegin;
p.after_string_state = .ValueEnd;
token.* = Token.ArrayBegin;
},
'-' => {
p.number_is_integer = true;
p.state = .Number;
p.count = 0;
},
'0' => {
p.number_is_integer = true;
p.state = .NumberMaybeDotOrExponent;
p.count = 0;
},
'1'...'9' => {
p.number_is_integer = true;
p.state = .NumberMaybeDigitOrDotOrExponent;
p.count = 0;
},
'"' => {
p.state = .String;
p.string_escapes = .None;
p.string_last_was_high_surrogate = false;
p.count = 0;
},
't' => {
p.state = .TrueLiteral1;
p.count = 0;
},
'f' => {
p.state = .FalseLiteral1;
p.count = 0;
},
'n' => {
p.state = .NullLiteral1;
p.count = 0;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidValueBegin;
},
},
.ValueEnd => switch (c) {
',' => {
p.after_string_state = State.fromInt(p.stack & 1);
p.state = .ValueBeginNoClosing;
},
']' => {
if (p.stack & 1 != array_bit) {
return error.UnexpectedClosingBracket;
}
if (p.stack_used == 0) {
return error.TooManyClosingItems;
}
p.state = .ValueEnd;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
if (p.stack_used == 0) {
p.complete = true;
p.state = .TopLevelEnd;
}
token.* = Token.ArrayEnd;
},
'}' => {
// unlikely
if (p.stack & 1 != object_bit) {
return error.UnexpectedClosingBrace;
}
if (p.stack_used == 0) {
return error.TooManyClosingItems;
}
p.state = .ValueEnd;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
if (p.stack_used == 0) {
p.complete = true;
p.state = .TopLevelEnd;
}
token.* = Token.ObjectEnd;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidValueEnd;
},
},
.ObjectSeparator => switch (c) {
':' => {
p.state = .ValueBegin;
p.after_string_state = .ValueEnd;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidSeparator;
},
},
.String => switch (c) {
0x00...0x1F => {
return error.InvalidControlCharacter;
},
'"' => {
p.state = p.after_string_state;
if (p.after_value_state == .TopLevelEnd) {
p.state = .TopLevelEnd;
p.complete = true;
}
token.* = .{
.String = .{
.count = p.count - 1,
.escapes = p.string_escapes,
},
};
p.string_escapes = undefined;
p.string_last_was_high_surrogate = undefined;
},
'\\' => {
p.state = .StringEscapeCharacter;
switch (p.string_escapes) {
.None => {
p.string_escapes = .{ .Some = .{ .size_diff = 0 } };
},
.Some => {},
}
},
0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => {
// non-control ascii
p.string_last_was_high_surrogate = false;
},
0xC2...0xDF => {
p.state = .StringUtf8Byte2Of2;
},
0xE0...0xEF => {
p.state = .StringUtf8Byte2Of3;
p.sequence_first_byte = c;
},
0xF0...0xF4 => {
p.state = .StringUtf8Byte2Of4;
p.sequence_first_byte = c;
},
else => {
return error.InvalidUtf8Byte;
},
},
.StringUtf8Byte2Of2 => switch (c >> 6) {
0b10 => p.state = .String,
else => return error.InvalidUtf8Byte,
},
.StringUtf8Byte2Of3 => {
switch (p.sequence_first_byte) {
0xE0 => switch (c) {
0xA0...0xBF => {},
else => return error.InvalidUtf8Byte,
},
0xE1...0xEF => switch (c) {
0x80...0xBF => {},
else => return error.InvalidUtf8Byte,
},
else => return error.InvalidUtf8Byte,
}
p.state = .StringUtf8Byte3Of3;
},
.StringUtf8Byte3Of3 => switch (c) {
0x80...0xBF => p.state = .String,
else => return error.InvalidUtf8Byte,
},
.StringUtf8Byte2Of4 => {
switch (p.sequence_first_byte) {
0xF0 => switch (c) {
0x90...0xBF => {},
else => return error.InvalidUtf8Byte,
},
0xF1...0xF3 => switch (c) {
0x80...0xBF => {},
else => return error.InvalidUtf8Byte,
},
0xF4 => switch (c) {
0x80...0x8F => {},
else => return error.InvalidUtf8Byte,
},
else => return error.InvalidUtf8Byte,
}
p.state = .StringUtf8Byte3Of4;
},
.StringUtf8Byte3Of4 => switch (c) {
0x80...0xBF => p.state = .StringUtf8Byte4Of4,
else => return error.InvalidUtf8Byte,
},
.StringUtf8Byte4Of4 => switch (c) {
0x80...0xBF => p.state = .String,
else => return error.InvalidUtf8Byte,
},
.StringEscapeCharacter => switch (c) {
// NOTE: '/' is allowed as an escaped character but it also is allowed
// as unescaped according to the RFC. There is a reported errata which suggests
// removing the non-escaped variant but it makes more sense to simply disallow
// it as an escape code here.
//
// The current JSONTestSuite tests rely on both of this behaviour being present
// however, so we default to the status quo where both are accepted until this
// is further clarified.
'"', '\\', '/', 'b', 'f', 'n', 'r', 't' => {
p.string_escapes.Some.size_diff -= 1;
p.state = .String;
p.string_last_was_high_surrogate = false;
},
'u' => {
p.state = .StringEscapeHexUnicode4;
},
else => {
return error.InvalidEscapeCharacter;
},
},
.StringEscapeHexUnicode4 => {
var codepoint: u21 = undefined;
switch (c) {
else => return error.InvalidUnicodeHexSymbol,
'0'...'9' => {
codepoint = c - '0';
},
'A'...'F' => {
codepoint = c - 'A' + 10;
},
'a'...'f' => {
codepoint = c - 'a' + 10;
},
}
p.state = .StringEscapeHexUnicode3;
p.string_unicode_codepoint = codepoint << 12;
},
.StringEscapeHexUnicode3 => {
var codepoint: u21 = undefined;
switch (c) {
else => return error.InvalidUnicodeHexSymbol,
'0'...'9' => {
codepoint = c - '0';
},
'A'...'F' => {
codepoint = c - 'A' + 10;
},
'a'...'f' => {
codepoint = c - 'a' + 10;
},
}
p.state = .StringEscapeHexUnicode2;
p.string_unicode_codepoint |= codepoint << 8;
},
.StringEscapeHexUnicode2 => {
var codepoint: u21 = undefined;
switch (c) {
else => return error.InvalidUnicodeHexSymbol,
'0'...'9' => {
codepoint = c - '0';
},
'A'...'F' => {
codepoint = c - 'A' + 10;
},
'a'...'f' => {
codepoint = c - 'a' + 10;
},
}
p.state = .StringEscapeHexUnicode1;
p.string_unicode_codepoint |= codepoint << 4;
},
.StringEscapeHexUnicode1 => {
var codepoint: u21 = undefined;
switch (c) {
else => return error.InvalidUnicodeHexSymbol,
'0'...'9' => {
codepoint = c - '0';
},
'A'...'F' => {
codepoint = c - 'A' + 10;
},
'a'...'f' => {
codepoint = c - 'a' + 10;
},
}
p.state = .String;
p.string_unicode_codepoint |= codepoint;
if (p.string_unicode_codepoint < 0xD800 or p.string_unicode_codepoint >= 0xE000) {
// not part of surrogate pair
p.string_escapes.Some.size_diff -= @as(isize, 6 - (std.unicode.utf8CodepointSequenceLength(p.string_unicode_codepoint) catch unreachable));
p.string_last_was_high_surrogate = false;
} else if (p.string_unicode_codepoint < 0xDC00) {
// 'high' surrogate
// takes 3 bytes to encode a half surrogate pair into wtf8
p.string_escapes.Some.size_diff -= 6 - 3;
p.string_last_was_high_surrogate = true;
} else {
// 'low' surrogate
p.string_escapes.Some.size_diff -= 6;
if (p.string_last_was_high_surrogate) {
// takes 4 bytes to encode a full surrogate pair into utf8
// 3 bytes are already reserved by high surrogate
p.string_escapes.Some.size_diff -= -1;
} else {
// takes 3 bytes to encode a half surrogate pair into wtf8
p.string_escapes.Some.size_diff -= -3;
}
p.string_last_was_high_surrogate = false;
}
p.string_unicode_codepoint = undefined;
},
.Number => {
p.complete = p.after_value_state == .TopLevelEnd;
switch (c) {
'0' => {
p.state = .NumberMaybeDotOrExponent;
},
'1'...'9' => {
p.state = .NumberMaybeDigitOrDotOrExponent;
},
else => {
return error.InvalidNumber;
},
}
},
.NumberMaybeDotOrExponent => {
p.complete = p.after_value_state == .TopLevelEnd;
switch (c) {
'.' => {
p.number_is_integer = false;
p.state = .NumberFractionalRequired;
},
'e', 'E' => {
p.number_is_integer = false;
p.state = .NumberExponent;
},
else => {
p.state = p.after_value_state;
token.* = .{
.Number = .{
.count = p.count,
.is_integer = p.number_is_integer,
},
};
p.number_is_integer = undefined;
return true;
},
}
},
.NumberMaybeDigitOrDotOrExponent => {
p.complete = p.after_value_state == .TopLevelEnd;
switch (c) {
'.' => {
p.number_is_integer = false;
p.state = .NumberFractionalRequired;
},
'e', 'E' => {
p.number_is_integer = false;
p.state = .NumberExponent;
},
'0'...'9' => {
// another digit
},
else => {
p.state = p.after_value_state;
token.* = .{
.Number = .{
.count = p.count,
.is_integer = p.number_is_integer,
},
};
return true;
},
}
},
.NumberFractionalRequired => {
p.complete = p.after_value_state == .TopLevelEnd;
switch (c) {
'0'...'9' => {
p.state = .NumberFractional;
},
else => {
return error.InvalidNumber;
},
}
},
.NumberFractional => {
p.complete = p.after_value_state == .TopLevelEnd;
switch (c) {
'0'...'9' => {
// another digit
},
'e', 'E' => {
p.number_is_integer = false;
p.state = .NumberExponent;
},
else => {
p.state = p.after_value_state;
token.* = .{
.Number = .{
.count = p.count,
.is_integer = p.number_is_integer,
},
};
return true;
},
}
},
.NumberMaybeExponent => {
p.complete = p.after_value_state == .TopLevelEnd;
switch (c) {
'e', 'E' => {
p.number_is_integer = false;
p.state = .NumberExponent;
},
else => {
p.state = p.after_value_state;
token.* = .{
.Number = .{
.count = p.count,
.is_integer = p.number_is_integer,
},
};
return true;
},
}
},
.NumberExponent => switch (c) {
'-', '+' => {
p.complete = false;
p.state = .NumberExponentDigitsRequired;
},
'0'...'9' => {
p.complete = p.after_value_state == .TopLevelEnd;
p.state = .NumberExponentDigits;
},
else => {
return error.InvalidNumber;
},
},
.NumberExponentDigitsRequired => switch (c) {
'0'...'9' => {
p.complete = p.after_value_state == .TopLevelEnd;
p.state = .NumberExponentDigits;
},
else => {
return error.InvalidNumber;
},
},
.NumberExponentDigits => {
p.complete = p.after_value_state == .TopLevelEnd;
switch (c) {
'0'...'9' => {
// another digit
},
else => {
p.state = p.after_value_state;
token.* = .{
.Number = .{
.count = p.count,
.is_integer = p.number_is_integer,
},
};
return true;
},
}
},
.TrueLiteral1 => switch (c) {
'r' => p.state = .TrueLiteral2,
else => return error.InvalidLiteral,
},
.TrueLiteral2 => switch (c) {
'u' => p.state = .TrueLiteral3,
else => return error.InvalidLiteral,
},
.TrueLiteral3 => switch (c) {
'e' => {
p.state = p.after_value_state;
p.complete = p.state == .TopLevelEnd;
token.* = Token.True;
},
else => {
return error.InvalidLiteral;
},
},
.FalseLiteral1 => switch (c) {
'a' => p.state = .FalseLiteral2,
else => return error.InvalidLiteral,
},
.FalseLiteral2 => switch (c) {
'l' => p.state = .FalseLiteral3,
else => return error.InvalidLiteral,
},
.FalseLiteral3 => switch (c) {
's' => p.state = .FalseLiteral4,
else => return error.InvalidLiteral,
},
.FalseLiteral4 => switch (c) {
'e' => {
p.state = p.after_value_state;
p.complete = p.state == .TopLevelEnd;
token.* = Token.False;
},
else => {
return error.InvalidLiteral;
},
},
.NullLiteral1 => switch (c) {
'u' => p.state = .NullLiteral2,
else => return error.InvalidLiteral,
},
.NullLiteral2 => switch (c) {
'l' => p.state = .NullLiteral3,
else => return error.InvalidLiteral,
},
.NullLiteral3 => switch (c) {
'l' => {
p.state = p.after_value_state;
p.complete = p.state == .TopLevelEnd;
token.* = Token.Null;
},
else => {
return error.InvalidLiteral;
},
},
}
return false;
}
};
/// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
pub const TokenStream = struct {
i: usize,
slice: []const u8,
parser: StreamingParser,
token: ?Token,
pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson};
pub fn init(slice: []const u8) TokenStream {
return TokenStream{
.i = 0,
.slice = slice,
.parser = StreamingParser.init(),
.token = null,
};
}
pub fn next(self: *TokenStream) Error!?Token {
if (self.token) |token| {
self.token = null;
return token;
}
var t1: ?Token = undefined;
var t2: ?Token = undefined;
while (self.i < self.slice.len) {
try self.parser.feed(self.slice[self.i], &t1, &t2);
self.i += 1;
if (t1) |token| {
self.token = t2;
return token;
}
}
// Without this a bare number fails, the streaming parser doesn't know the input ended
try self.parser.feed(' ', &t1, &t2);
self.i += 1;
if (t1) |token| {
return token;
} else if (self.parser.complete) {
return null;
} else {
return error.UnexpectedEndOfJson;
}
}
};
fn checkNext(p: *TokenStream, id: std.meta.Tag(Token)) !void {
const token = (p.next() catch unreachable).?;
try testing.expect(std.meta.activeTag(token) == id);
}
test "json.token" {
const s =
\\{
\\ "Image": {
\\ "Width": 800,
\\ "Height": 600,
\\ "Title": "View from 15th Floor",
\\ "Thumbnail": {
\\ "Url": "http://www.example.com/image/481989943",
\\ "Height": 125,
\\ "Width": 100
\\ },
\\ "Animated" : false,
\\ "IDs": [116, 943, 234, 38793]
\\ }
\\}
;
var p = TokenStream.init(s);
try checkNext(&p, .ObjectBegin);
try checkNext(&p, .String); // Image
try checkNext(&p, .ObjectBegin);
try checkNext(&p, .String); // Width
try checkNext(&p, .Number);
try checkNext(&p, .String); // Height
try checkNext(&p, .Number);
try checkNext(&p, .String); // Title
try checkNext(&p, .String);
try checkNext(&p, .String); // Thumbnail
try checkNext(&p, .ObjectBegin);
try checkNext(&p, .String); // Url
try checkNext(&p, .String);
try checkNext(&p, .String); // Height
try checkNext(&p, .Number);
try checkNext(&p, .String); // Width
try checkNext(&p, .Number);
try checkNext(&p, .ObjectEnd);
try checkNext(&p, .String); // Animated
try checkNext(&p, .False);
try checkNext(&p, .String); // IDs
try checkNext(&p, .ArrayBegin);
try checkNext(&p, .Number);
try checkNext(&p, .Number);
try checkNext(&p, .Number);
try checkNext(&p, .Number);
try checkNext(&p, .ArrayEnd);
try checkNext(&p, .ObjectEnd);
try checkNext(&p, .ObjectEnd);
try testing.expect((try p.next()) == null);
}
test "json.token mismatched close" {
var p = TokenStream.init("[102, 111, 111 }");
try checkNext(&p, .ArrayBegin);
try checkNext(&p, .Number);
try checkNext(&p, .Number);
try checkNext(&p, .Number);
try testing.expectError(error.UnexpectedClosingBrace, p.next());
}
/// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
/// be able to decode the string even if this returns true.
pub fn validate(s: []const u8) bool {
var p = StreamingParser.init();
for (s) |c, i| {
var token1: ?Token = undefined;
var token2: ?Token = undefined;
p.feed(c, &token1, &token2) catch |err| {
return false;
};
}
return p.complete;
}
test "json.validate" {
try testing.expectEqual(true, validate("{}"));
try testing.expectEqual(true, validate("[]"));
try testing.expectEqual(true, validate("[{[[[[{}]]]]}]"));
try testing.expectEqual(false, validate("{]"));
try testing.expectEqual(false, validate("[}"));
try testing.expectEqual(false, validate("{{{{[]}}}]"));
}
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const ArrayList = std.ArrayList;
const StringArrayHashMap = std.StringArrayHashMap;
pub const ValueTree = struct {
arena: ArenaAllocator,
root: Value,
pub fn deinit(self: *ValueTree) void {
self.arena.deinit();
}
};
pub const ObjectMap = StringArrayHashMap(Value);
pub const Array = ArrayList(Value);
/// Represents a JSON value
/// Currently only supports numbers that fit into i64 or f64.
pub const Value = union(enum) {
Null,
Bool: bool,
Integer: i64,
Float: f64,
NumberString: []const u8,
String: []const u8,
Array: Array,
Object: ObjectMap,
pub fn jsonStringify(
value: @This(),
options: StringifyOptions,
out_stream: anytype,
) @TypeOf(out_stream).Error!void {
switch (value) {
.Null => try stringify(null, options, out_stream),
.Bool => |inner| try stringify(inner, options, out_stream),
.Integer => |inner| try stringify(inner, options, out_stream),
.Float => |inner| try stringify(inner, options, out_stream),
.NumberString => |inner| try out_stream.writeAll(inner),
.String => |inner| try stringify(inner, options, out_stream),
.Array => |inner| try stringify(inner.items, options, out_stream),
.Object => |inner| {
try out_stream.writeByte('{');
var field_output = false;
var child_options = options;
if (child_options.whitespace) |*child_whitespace| {
child_whitespace.indent_level += 1;
}
var it = inner.iterator();
while (it.next()) |entry| {
if (!field_output) {
field_output = true;
} else {
try out_stream.writeByte(',');
}
if (child_options.whitespace) |child_whitespace| {
try out_stream.writeByte('\n');
try child_whitespace.outputIndent(out_stream);
}
try stringify(entry.key, options, out_stream);
try out_stream.writeByte(':');
if (child_options.whitespace) |child_whitespace| {
if (child_whitespace.separator) {
try out_stream.writeByte(' ');
}
}
try stringify(entry.value, child_options, out_stream);
}
if (field_output) {
if (options.whitespace) |whitespace| {
try out_stream.writeByte('\n');
try whitespace.outputIndent(out_stream);
}
}
try out_stream.writeByte('}');
},
}
}
pub fn dump(self: Value) void {
var held = std.debug.getStderrMutex().acquire();
defer held.release();
const stderr = std.io.getStdErr().writer();
std.json.stringify(self, std.json.StringifyOptions{ .whitespace = null }, stderr) catch return;
}
};
test "Value.jsonStringify" {
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
try @as(Value, .Null).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "null");
}
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
try (Value{ .Bool = true }).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "true");
}
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
try (Value{ .Integer = 42 }).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "42");
}
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
try (Value{ .NumberString = "43" }).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "43");
}
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
try (Value{ .Float = 42 }).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "4.2e+01");
}
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
try (Value{ .String = "weeee" }).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "\"weeee\"");
}
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
var vals = [_]Value{
.{ .Integer = 1 },
.{ .Integer = 2 },
.{ .NumberString = "3" },
};
try (Value{
.Array = Array.fromOwnedSlice(undefined, &vals),
}).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "[1,2,3]");
}
{
var buffer: [10]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buffer);
var obj = ObjectMap.init(testing.allocator);
defer obj.deinit();
try obj.putNoClobber("a", .{ .String = "b" });
try (Value{ .Object = obj }).jsonStringify(.{}, fbs.writer());
try testing.expectEqualSlices(u8, fbs.getWritten(), "{\"a\":\"b\"}");
}
}
/// parse tokens from a stream, returning `false` if they do not decode to `value`
fn parsesTo(comptime T: type, value: T, tokens: *TokenStream, options: ParseOptions) !bool {
// TODO: should be able to write this function to not require an allocator
const tmp = try parse(T, tokens, options);
defer parseFree(T, tmp, options);
return parsedEqual(tmp, value);
}
/// Returns if a value returned by `parse` is deep-equal to another value
fn parsedEqual(a: anytype, b: @TypeOf(a)) bool {
switch (@typeInfo(@TypeOf(a))) {
.Optional => {
if (a == null and b == null) return true;
if (a == null or b == null) return false;
return parsedEqual(a.?, b.?);
},
.Union => |unionInfo| {
if (info.tag_type) |UnionTag| {
const tag_a = std.meta.activeTag(a);
const tag_b = std.meta.activeTag(b);
if (tag_a != tag_b) return false;
inline for (info.fields) |field_info| {
if (@field(UnionTag, field_info.name) == tag_a) {
return parsedEqual(@field(a, field_info.name), @field(b, field_info.name));
}
}
return false;
} else {
unreachable;
}
},
.Array => {
for (a) |e, i|
if (!parsedEqual(e, b[i])) return false;
return true;
},
.Struct => |info| {
inline for (info.fields) |field_info| {
if (!parsedEqual(@field(a, field_info.name), @field(b, field_info.name))) return false;
}
return true;
},
.Pointer => |ptrInfo| switch (ptrInfo.size) {
.One => return parsedEqual(a.*, b.*),
.Slice => {
if (a.len != b.len) return false;
for (a) |e, i|
if (!parsedEqual(e, b[i])) return false;
return true;
},
.Many, .C => unreachable,
},
else => return a == b,
}
unreachable;
}
pub const ParseOptions = struct {
allocator: ?*Allocator = null,
/// Behaviour when a duplicate field is encountered.
duplicate_field_behavior: enum {
UseFirst,
Error,
UseLast,
} = .Error,
allow_camel_case_conversion: bool = false,
allow_snake_case_conversion: bool = false,
allow_unknown_fields: bool = false,
allow_missing_fields: bool = false,
};
fn camelCaseComp(field: []const u8, key: []const u8, options: ParseOptions) !bool {
var utf8_source_key = (std.unicode.Utf8View.init(key) catch unreachable).iterator();
if (utf8_source_key.nextCodepoint()) |codepoint| {
if (codepoint >= 'A' and codepoint <= 'Z') {
const allocator = options.allocator orelse return error.AllocatorRequired;
const source_key_camel_case = try allocator.dupeZ(u8, key);
defer allocator.free(source_key_camel_case);
// First codepoint is uppercase Latin char, which is all we're handling atm
source_key_camel_case[0] = source_key_camel_case[0] + ('a' - 'A');
// We will assume the target field is in camelCase
return std.mem.eql(u8, field, source_key_camel_case);
}
}
return std.mem.eql(u8, field, key);
}
fn snakeCaseComp(field: []const u8, key: []const u8, options: ParseOptions) !bool {
// snake case is much more intricate. Input:
// Field: user_id
// Key: UserId
// We can duplicate the field and remove all _ characters safely
// Then take the key and lowercase all the uppercase.
// Then compare
var found: u32 = 0;
for (field) |ch| {
if (ch == '_')
found = found + 1;
}
if (found == 0)
return std.mem.eql(u8, field, key);
// We have a snake case field. Let's do this
const allocator = options.allocator orelse return error.AllocatorRequired;
const comp_field = try allocator.alloc(u8, field.len - found);
defer allocator.free(comp_field);
var inx: u32 = 0;
for (field) |ch| {
if (ch != '_') {
comp_field[inx] = ch;
inx = inx + 1;
}
}
// field = 'user_id', comp_field = 'userid'
// We now transform the key by lowercasing. We will only deal with Latin
var utf8_source_key = (std.unicode.Utf8View.init(key) catch unreachable).iterator();
const normalized_key = try allocator.dupeZ(u8, key);
defer allocator.free(normalized_key);
inx = 0;
while (utf8_source_key.nextCodepoint()) |codepoint| {
if (codepoint > 255) return error.InvalidLiteral;
if (codepoint >= 'A' and codepoint <= 'Z') {
// First codepoint is uppercase Latin char, which is all we're handling atm
normalized_key[inx] = normalized_key[inx] + ('a' - 'A');
// We will assume the target field is in camelCase
}
inx = inx + 1;
}
// std.debug.print("comp_field, len {d}: {s}\n", .{ comp_field.len, comp_field });
// std.debug.print("normalized_key, len {d}: {s}\n", .{ normalized_key.len, normalized_key });
// std.debug.print("comp_field, last: {d}\n", .{comp_field[comp_field.len - 1]});
// std.debug.print("normalized_key, last: {d}\n", .{normalized_key[normalized_key.len - 1]});
return std.mem.eql(u8, comp_field, normalized_key);
}
fn parseInternal(comptime T: type, token: Token, tokens: *TokenStream, options: ParseOptions) !T {
switch (@typeInfo(T)) {
.Bool => {
return switch (token) {
.True => true,
.False => false,
else => error.UnexpectedToken,
};
},
.Float, .ComptimeFloat => {
const numberToken = switch (token) {
.Number => |n| n,
else => return error.UnexpectedToken,
};
return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1));
},
.Int, .ComptimeInt => {
const numberToken = switch (token) {
.Number => |n| n,
else => return error.UnexpectedToken,
};
// This is a bug. you can still potentially have an integer that has exponents
// if (!numberToken.is_integer) return error.UnexpectedToken;
if (numberToken.is_integer)
return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
const float = try std.fmt.parseFloat(f128, numberToken.slice(tokens.slice, tokens.i - 1));
if (std.math.round(float) != float) return error.InvalidNumber;
return @floatToInt(T, float);
},
.Optional => |optionalInfo| {
if (token == .Null) {
return null;
} else {
return try parseInternal(optionalInfo.child, token, tokens, options);
}
},
.Enum => |enumInfo| {
switch (token) {
.Number => |numberToken| {
if (!numberToken.is_integer) return error.UnexpectedToken;
const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10);
return try std.meta.intToEnum(T, n);
},
.String => |stringToken| {
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag,
.Some => {
inline for (enumInfo.fields) |field| {
if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) {
return @field(T, field.name);
}
}
return error.InvalidEnumTag;
},
}
},
else => return error.UnexpectedToken,
}
},
.Union => |unionInfo| {
if (unionInfo.tag_type) |_| {
// try each of the union fields until we find one that matches
inline for (unionInfo.fields) |u_field| {
// take a copy of tokens so we can withhold mutations until success
var tokens_copy = tokens.*;
if (parseInternal(u_field.field_type, token, &tokens_copy, options)) |value| {
tokens.* = tokens_copy;
return @unionInit(T, u_field.name, value);
} else |err| {
// Bubble up error.OutOfMemory
// Parsing some types won't have OutOfMemory in their
// error-sets, for the condition to be valid, merge it in.
if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
// Bubble up AllocatorRequired, as it indicates missing option
if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err;
// otherwise continue through the `inline for`
}
}
return error.NoUnionMembersMatched;
} else {
@compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
}
},
.Struct => |structInfo| {
switch (token) {
.ObjectBegin => {},
else => return error.UnexpectedToken,
}
var r: T = undefined;
var fields_seen = [_]bool{false} ** structInfo.fields.len;
errdefer {
inline for (structInfo.fields) |field, i| {
if (fields_seen[i] and !field.is_comptime) {
parseFree(field.field_type, @field(r, field.name), options);
}
}
}
while (true) {
switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
.ObjectEnd => break,
.String => |stringToken| {
const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
var found = false;
inline for (structInfo.fields) |field, i| {
// TODO: using switches here segfault the compiler (#2727?)
if ((stringToken.escapes == .None and mem.eql(u8, field.name, key_source_slice)) or (stringToken.escapes == .Some and (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice))) or (stringToken.escapes == .None and options.allow_camel_case_conversion and try camelCaseComp(field.name, key_source_slice, options)) or (stringToken.escapes == .None and options.allow_snake_case_conversion and try snakeCaseComp(field.name, key_source_slice, options))) {
// if (switch (stringToken.escapes) {
// .None => mem.eql(u8, field.name, key_source_slice),
// .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)),
// }) {
if (fields_seen[i]) {
// switch (options.duplicate_field_behavior) {
// .UseFirst => {},
// .Error => {},
// .UseLast => {},
// }
if (options.duplicate_field_behavior == .UseFirst) {
break;
} else if (options.duplicate_field_behavior == .Error) {
return error.DuplicateJSONField;
} else if (options.duplicate_field_behavior == .UseLast) {
parseFree(field.field_type, @field(r, field.name), options);
fields_seen[i] = false;
}
}
if (field.is_comptime) {
if (!try parsesTo(field.field_type, field.default_value.?, tokens, options)) {
return error.UnexpectedValue;
}
} else {
@field(r, field.name) = try parse(field.field_type, tokens, options);
}
fields_seen[i] = true;
found = true;
break;
}
}
if (!found and !options.allow_unknown_fields) return error.UnknownField;
},
.ObjectBegin => {
if (!options.allow_unknown_fields) return error.UnknownField;
// At this point, we are in a struct that we do not care about. Fast forward
var objects: u64 = 1;
while (true) {
switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
.ObjectBegin => objects = objects + 1,
.ObjectEnd => {
objects = objects - 1;
if (objects == 0) break;
},
else => {},
}
}
},
else => return error.UnexpectedToken,
}
}
inline for (structInfo.fields) |field, i| {
if (!fields_seen[i]) {
if (field.default_value) |default| {
if (!field.is_comptime) {
@field(r, field.name) = default;
}
} else {
if (!options.allow_missing_fields)
return error.MissingField;
}
}
}
return r;
},
.Array => |arrayInfo| {
switch (token) {
.ArrayBegin => {
var r: T = undefined;
var i: usize = 0;
errdefer {
while (true) : (i -= 1) {
parseFree(arrayInfo.child, r[i], options);
if (i == 0) break;
}
}
while (i < r.len) : (i += 1) {
r[i] = try parse(arrayInfo.child, tokens, options);
}
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
switch (tok) {
.ArrayEnd => {},
else => return error.UnexpectedToken,
}
return r;
},
.String => |stringToken| {
if (arrayInfo.child != u8) return error.UnexpectedToken;
var r: T = undefined;
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => mem.copy(u8, &r, source_slice),
.Some => try unescapeValidString(&r, source_slice),
}
return r;
},
else => return error.UnexpectedToken,
}
},
.Pointer => |ptrInfo| {
const allocator = options.allocator orelse return error.AllocatorRequired;
switch (ptrInfo.size) {
.One => {
const r: T = try allocator.create(ptrInfo.child);
errdefer allocator.destroy(r);
r.* = try parseInternal(ptrInfo.child, token, tokens, options);
return r;
},
.Slice => {
switch (token) {
.ArrayBegin => {
var arraylist = std.ArrayList(ptrInfo.child).init(allocator);
errdefer {
while (arraylist.popOrNull()) |v| {
parseFree(ptrInfo.child, v, options);
}
arraylist.deinit();
}
while (true) {
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
switch (tok) {
.ArrayEnd => break,
else => {},
}
try arraylist.ensureCapacity(arraylist.items.len + 1);
const v = try parseInternal(ptrInfo.child, tok, tokens, options);
arraylist.appendAssumeCapacity(v);
}
return arraylist.toOwnedSlice();
},
.String => |stringToken| {
if (ptrInfo.child != u8) return error.UnexpectedToken;
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => return allocator.dupe(u8, source_slice),
.Some => |some_escapes| {
const output = try allocator.alloc(u8, stringToken.decodedLength());
errdefer allocator.free(output);
try unescapeValidString(output, source_slice);
return output;
},
}
},
else => return error.UnexpectedToken,
}
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
unreachable;
}
pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) !T {
const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
return parseInternal(T, token, tokens, options);
}
/// Releases resources created by `parse`.
/// Should be called with the same type and `ParseOptions` that were passed to `parse`
pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void {
switch (@typeInfo(T)) {
.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
.Optional => {
if (value) |v| {
return parseFree(@TypeOf(v), v, options);
}
},
.Union => |unionInfo| {
if (unionInfo.tag_type) |UnionTagType| {
inline for (unionInfo.fields) |u_field| {
if (value == @field(UnionTagType, u_field.name)) {
parseFree(u_field.field_type, @field(value, u_field.name), options);
break;
}
}
} else {
unreachable;
}
},
.Struct => |structInfo| {
inline for (structInfo.fields) |field| {
parseFree(field.field_type, @field(value, field.name), options);
}
},
.Array => |arrayInfo| {
for (value) |v| {
parseFree(arrayInfo.child, v, options);
}
},
.Pointer => |ptrInfo| {
const allocator = options.allocator orelse unreachable;
switch (ptrInfo.size) {
.One => {
parseFree(ptrInfo.child, value.*, options);
allocator.destroy(value);
},
.Slice => {
for (value) |v| {
parseFree(ptrInfo.child, v, options);
}
allocator.free(value);
},
else => unreachable,
}
},
else => unreachable,
}
}
test "parse" {
try testing.expectEqual(false, try parse(bool, &TokenStream.init("false"), ParseOptions{}));
try testing.expectEqual(true, try parse(bool, &TokenStream.init("true"), ParseOptions{}));
try testing.expectEqual(@as(u1, 1), try parse(u1, &TokenStream.init("1"), ParseOptions{}));
try testing.expectError(error.Overflow, parse(u1, &TokenStream.init("50"), ParseOptions{}));
try testing.expectEqual(@as(u64, 42), try parse(u64, &TokenStream.init("42"), ParseOptions{}));
try testing.expectEqual(@as(f64, 42), try parse(f64, &TokenStream.init("42.0"), ParseOptions{}));
try testing.expectEqual(@as(?bool, null), try parse(?bool, &TokenStream.init("null"), ParseOptions{}));
try testing.expectEqual(@as(?bool, true), try parse(?bool, &TokenStream.init("true"), ParseOptions{}));
try testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
try testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("[102, 111, 111]"), ParseOptions{}));
}
test "parse into enum" {
const T = extern enum {
Foo = 42,
Bar,
@"with\\escape",
};
try testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("\"Foo\""), ParseOptions{}));
try testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("42"), ParseOptions{}));
try testing.expectEqual(@as(T, .@"with\\escape"), try parse(T, &TokenStream.init("\"with\\\\escape\""), ParseOptions{}));
try testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("5"), ParseOptions{}));
try testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("\"Qux\""), ParseOptions{}));
}
test "parse into that allocates a slice" {
try testing.expectError(error.AllocatorRequired, parse([]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
const options = ParseOptions{ .allocator = testing.allocator };
{
const r = try parse([]u8, &TokenStream.init("\"foo\""), options);
defer parseFree([]u8, r, options);
try testing.expectEqualSlices(u8, "foo", r);
}
{
const r = try parse([]u8, &TokenStream.init("[102, 111, 111]"), options);
defer parseFree([]u8, r, options);
try testing.expectEqualSlices(u8, "foo", r);
}
{
const r = try parse([]u8, &TokenStream.init("\"with\\\\escape\""), options);
defer parseFree([]u8, r, options);
try testing.expectEqualSlices(u8, "with\\escape", r);
}
}
test "parse into tagged union" {
{
const T = union(enum) {
int: i32,
float: f64,
string: []const u8,
};
try testing.expectEqual(T{ .float = 1.5 }, try parse(T, &TokenStream.init("1.5"), ParseOptions{}));
}
{ // failing allocations should be bubbled up instantly without trying next member
var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0);
const options = ParseOptions{ .allocator = &fail_alloc.allocator };
const T = union(enum) {
// both fields here match the input
string: []const u8,
array: [3]u8,
};
try testing.expectError(error.OutOfMemory, parse(T, &TokenStream.init("[1,2,3]"), options));
}
{
// if multiple matches possible, takes first option
const T = union(enum) {
x: u8,
y: u8,
};
try testing.expectEqual(T{ .x = 42 }, try parse(T, &TokenStream.init("42"), ParseOptions{}));
}
{ // needs to back out when first union member doesn't match
const T = union(enum) {
A: struct { x: u32 },
B: struct { y: u32 },
};
try testing.expectEqual(T{ .B = .{ .y = 42 } }, try parse(T, &TokenStream.init("{\"y\":42}"), ParseOptions{}));
}
}
test "parse union bubbles up AllocatorRequired" {
{ // string member first in union (and not matching)
const T = union(enum) {
string: []const u8,
int: i32,
};
try testing.expectError(error.AllocatorRequired, parse(T, &TokenStream.init("42"), ParseOptions{}));
}
{ // string member not first in union (and matching)
const T = union(enum) {
int: i32,
float: f64,
string: []const u8,
};
try testing.expectError(error.AllocatorRequired, parse(T, &TokenStream.init("\"foo\""), ParseOptions{}));
}
}
test "parseFree descends into tagged union" {
var fail_alloc = testing.FailingAllocator.init(testing.allocator, 1);
const options = ParseOptions{ .allocator = &fail_alloc.allocator };
const T = union(enum) {
int: i32,
float: f64,
string: []const u8,
};
// use a string with unicode escape so we know result can't be a reference to global constant
const r = try parse(T, &TokenStream.init("\"with\\u0105unicode\""), options);
try testing.expectEqual(std.meta.Tag(T).string, @as(std.meta.Tag(T), r));
try testing.expectEqualSlices(u8, "withąunicode", r.string);
try testing.expectEqual(@as(usize, 0), fail_alloc.deallocations);
parseFree(T, r, options);
try testing.expectEqual(@as(usize, 1), fail_alloc.deallocations);
}
test "parse with comptime field" {
{
const T = struct {
comptime a: i32 = 0,
b: bool,
};
try testing.expectEqual(T{ .a = 0, .b = true }, try parse(T, &TokenStream.init(
\\{
\\ "a": 0,
\\ "b": true
\\}
), ParseOptions{}));
}
{ // string comptime values currently require an allocator
const T = union(enum) {
foo: struct {
comptime kind: []const u8 = "boolean",
b: bool,
},
bar: struct {
comptime kind: []const u8 = "float",
b: f64,
},
};
const r = try std.json.parse(T, &std.json.TokenStream.init(
\\{
\\ "kind": "float",
\\ "b": 1.0
\\}
), .{
.allocator = std.testing.allocator,
});
}
}
test "parse into struct with no fields" {
const T = struct {};
try testing.expectEqual(T{}, try parse(T, &TokenStream.init("{}"), ParseOptions{}));
}
test "parse exponential into int" {
const T = struct { int: i64 };
const r = try parse(T, &TokenStream.init("{ \"int\": 4.2e2 }"), ParseOptions{});
try testing.expectEqual(@as(i64, 420), r.int);
}
test "parse into struct with misc fields" {
@setEvalBranchQuota(10000);
const options = ParseOptions{ .allocator = testing.allocator };
const T = struct {
int: i64,
float: f64,
@"with\\escape": bool,
@"withąunicode😂": bool,
language: []const u8,
optional: ?bool,
default_field: i32 = 42,
static_array: [3]f64,
dynamic_array: []f64,
complex: struct {
nested: []const u8,
},
veryComplex: []struct {
foo: []const u8,
},
a_union: Union,
const Union = union(enum) {
x: u8,
float: f64,
string: []const u8,
};
};
const r = try parse(T, &TokenStream.init(
\\{
\\ "int": 420,
\\ "float": 3.14,
\\ "with\\escape": true,
\\ "with\u0105unicode\ud83d\ude02": false,
\\ "language": "zig",
\\ "optional": null,
\\ "static_array": [66.6, 420.420, 69.69],
\\ "dynamic_array": [66.6, 420.420, 69.69],
\\ "complex": {
\\ "nested": "zig"
\\ },
\\ "veryComplex": [
\\ {
\\ "foo": "zig"
\\ }, {
\\ "foo": "rocks"
\\ }
\\ ],
\\ "a_union": 100000
\\}
), options);
defer parseFree(T, r, options);
try testing.expectEqual(@as(i64, 420), r.int);
try testing.expectEqual(@as(f64, 3.14), r.float);
try testing.expectEqual(true, r.@"with\\escape");
try testing.expectEqual(false, r.@"withąunicode😂");
try testing.expectEqualSlices(u8, "zig", r.language);
try testing.expectEqual(@as(?bool, null), r.optional);
try testing.expectEqual(@as(i32, 42), r.default_field);
try testing.expectEqual(@as(f64, 66.6), r.static_array[0]);
try testing.expectEqual(@as(f64, 420.420), r.static_array[1]);
try testing.expectEqual(@as(f64, 69.69), r.static_array[2]);
try testing.expectEqual(@as(usize, 3), r.dynamic_array.len);
try testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]);
try testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]);
try testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]);
try testing.expectEqualSlices(u8, r.complex.nested, "zig");
try testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo);
try testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo);
try testing.expectEqual(T.Union{ .float = 100000 }, r.a_union);
}
test "parse into struct with duplicate field" {
// allow allocator to detect double frees by keeping bucket in use
const ballast = try testing.allocator.alloc(u64, 1);
defer testing.allocator.free(ballast);
const options = ParseOptions{
.allocator = testing.allocator,
.duplicate_field_behavior = .UseLast,
};
const str = "{ \"a\": 1, \"a\": 0.25 }";
const T1 = struct { a: *u64 };
try testing.expectError(error.UnexpectedToken, parse(T1, &TokenStream.init(str), options));
const T2 = struct { a: f64 };
try testing.expectEqual(T2{ .a = 0.25 }, try parse(T2, &TokenStream.init(str), options));
}
/// A non-stream JSON parser which constructs a tree of Value's.
pub const Parser = struct {
allocator: *Allocator,
state: State,
copy_strings: bool,
// Stores parent nodes and un-combined Values.
stack: Array,
const State = enum {
ObjectKey,
ObjectValue,
ArrayValue,
Simple,
};
pub fn init(allocator: *Allocator, copy_strings: bool) Parser {
return Parser{
.allocator = allocator,
.state = .Simple,
.copy_strings = copy_strings,
.stack = Array.init(allocator),
};
}
pub fn deinit(p: *Parser) void {
p.stack.deinit();
}
pub fn reset(p: *Parser) void {
p.state = .Simple;
p.stack.shrinkRetainingCapacity(0);
}
pub fn parse(p: *Parser, input: []const u8) !ValueTree {
var s = TokenStream.init(input);
var arena = ArenaAllocator.init(p.allocator);
errdefer arena.deinit();
while (try s.next()) |token| {
try p.transition(&arena.allocator, input, s.i - 1, token);
}
debug.assert(p.stack.items.len == 1);
return ValueTree{
.arena = arena,
.root = p.stack.items[0],
};
}
// Even though p.allocator exists, we take an explicit allocator so that allocation state
// can be cleaned up on error correctly during a `parse` on call.
fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: Token) !void {
switch (p.state) {
.ObjectKey => switch (token) {
.ObjectEnd => {
if (p.stack.items.len == 1) {
return;
}
var value = p.stack.pop();
try p.pushToParent(&value);
},
.String => |s| {
try p.stack.append(try p.parseString(allocator, s, input, i));
p.state = .ObjectValue;
},
else => {
// The streaming parser would return an error eventually.
// To prevent invalid state we return an error now.
// TODO make the streaming parser return an error as soon as it encounters an invalid object key
return error.InvalidLiteral;
},
},
.ObjectValue => {
var object = &p.stack.items[p.stack.items.len - 2].Object;
var key = p.stack.items[p.stack.items.len - 1].String;
switch (token) {
.ObjectBegin => {
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
p.state = .ObjectKey;
},
.ArrayBegin => {
try p.stack.append(Value{ .Array = Array.init(allocator) });
p.state = .ArrayValue;
},
.String => |s| {
try object.put(key, try p.parseString(allocator, s, input, i));
_ = p.stack.pop();
p.state = .ObjectKey;
},
.Number => |n| {
try object.put(key, try p.parseNumber(n, input, i));
_ = p.stack.pop();
p.state = .ObjectKey;
},
.True => {
try object.put(key, Value{ .Bool = true });
_ = p.stack.pop();
p.state = .ObjectKey;
},
.False => {
try object.put(key, Value{ .Bool = false });
_ = p.stack.pop();
p.state = .ObjectKey;
},
.Null => {
try object.put(key, Value.Null);
_ = p.stack.pop();
p.state = .ObjectKey;
},
.ObjectEnd, .ArrayEnd => {
unreachable;
},
}
},
.ArrayValue => {
var array = &p.stack.items[p.stack.items.len - 1].Array;
switch (token) {
.ArrayEnd => {
if (p.stack.items.len == 1) {
return;
}
var value = p.stack.pop();
try p.pushToParent(&value);
},
.ObjectBegin => {
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
p.state = .ObjectKey;
},
.ArrayBegin => {
try p.stack.append(Value{ .Array = Array.init(allocator) });
p.state = .ArrayValue;
},
.String => |s| {
try array.append(try p.parseString(allocator, s, input, i));
},
.Number => |n| {
try array.append(try p.parseNumber(n, input, i));
},
.True => {
try array.append(Value{ .Bool = true });
},
.False => {
try array.append(Value{ .Bool = false });
},
.Null => {
try array.append(Value.Null);
},
.ObjectEnd => {
unreachable;
},
}
},
.Simple => switch (token) {
.ObjectBegin => {
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
p.state = .ObjectKey;
},
.ArrayBegin => {
try p.stack.append(Value{ .Array = Array.init(allocator) });
p.state = .ArrayValue;
},
.String => |s| {
try p.stack.append(try p.parseString(allocator, s, input, i));
},
.Number => |n| {
try p.stack.append(try p.parseNumber(n, input, i));
},
.True => {
try p.stack.append(Value{ .Bool = true });
},
.False => {
try p.stack.append(Value{ .Bool = false });
},
.Null => {
try p.stack.append(Value.Null);
},
.ObjectEnd, .ArrayEnd => {
unreachable;
},
},
}
}
fn pushToParent(p: *Parser, value: *const Value) !void {
switch (p.stack.items[p.stack.items.len - 1]) {
// Object Parent -> [ ..., object, <key>, value ]
Value.String => |key| {
_ = p.stack.pop();
var object = &p.stack.items[p.stack.items.len - 1].Object;
try object.put(key, value.*);
p.state = .ObjectKey;
},
// Array Parent -> [ ..., <array>, value ]
Value.Array => |*array| {
try array.append(value.*);
p.state = .ArrayValue;
},
else => {
unreachable;
},
}
}
fn parseString(p: *Parser, allocator: *Allocator, s: std.meta.TagPayload(Token, Token.String), input: []const u8, i: usize) !Value {
const slice = s.slice(input, i);
switch (s.escapes) {
.None => return Value{ .String = if (p.copy_strings) try allocator.dupe(u8, slice) else slice },
.Some => |some_escapes| {
const output = try allocator.alloc(u8, s.decodedLength());
errdefer allocator.free(output);
try unescapeValidString(output, slice);
return Value{ .String = output };
},
}
}
fn parseNumber(p: *Parser, n: std.meta.TagPayload(Token, Token.Number), input: []const u8, i: usize) !Value {
return if (n.is_integer)
Value{
.Integer = std.fmt.parseInt(i64, n.slice(input, i), 10) catch |e| switch (e) {
error.Overflow => return Value{ .NumberString = n.slice(input, i) },
error.InvalidCharacter => |err| return err,
},
}
else
Value{ .Float = try std.fmt.parseFloat(f64, n.slice(input, i)) };
}
};
/// Unescape a JSON string
/// Only to be used on strings already validated by the parser
/// (note the unreachable statements and lack of bounds checking)
pub fn unescapeValidString(output: []u8, input: []const u8) !void {
var inIndex: usize = 0;
var outIndex: usize = 0;
while (inIndex < input.len) {
if (input[inIndex] != '\\') {
// not an escape sequence
output[outIndex] = input[inIndex];
inIndex += 1;
outIndex += 1;
} else if (input[inIndex + 1] != 'u') {
// a simple escape sequence
output[outIndex] = @as(u8, switch (input[inIndex + 1]) {
'\\' => '\\',
'/' => '/',
'n' => '\n',
'r' => '\r',
't' => '\t',
'f' => 12,
'b' => 8,
'"' => '"',
else => unreachable,
});
inIndex += 2;
outIndex += 1;
} else {
// a unicode escape sequence
const firstCodeUnit = std.fmt.parseInt(u16, input[inIndex + 2 .. inIndex + 6], 16) catch unreachable;
// guess optimistically that it's not a surrogate pair
if (std.unicode.utf8Encode(firstCodeUnit, output[outIndex..])) |byteCount| {
outIndex += byteCount;
inIndex += 6;
} else |err| {
// it might be a surrogate pair
if (err != error.Utf8CannotEncodeSurrogateHalf) {
return error.InvalidUnicodeHexSymbol;
}
// check if a second code unit is present
if (inIndex + 7 >= input.len or input[inIndex + 6] != '\\' or input[inIndex + 7] != 'u') {
return error.InvalidUnicodeHexSymbol;
}
const secondCodeUnit = std.fmt.parseInt(u16, input[inIndex + 8 .. inIndex + 12], 16) catch unreachable;
const utf16le_seq = [2]u16{
mem.nativeToLittle(u16, firstCodeUnit),
mem.nativeToLittle(u16, secondCodeUnit),
};
if (std.unicode.utf16leToUtf8(output[outIndex..], &utf16le_seq)) |byteCount| {
outIndex += byteCount;
inIndex += 12;
} else |_| {
return error.InvalidUnicodeHexSymbol;
}
}
}
}
assert(outIndex == output.len);
}
test "json.parser.dynamic" {
var p = Parser.init(testing.allocator, false);
defer p.deinit();
const s =
\\{
\\ "Image": {
\\ "Width": 800,
\\ "Height": 600,
\\ "Title": "View from 15th Floor",
\\ "Thumbnail": {
\\ "Url": "http://www.example.com/image/481989943",
\\ "Height": 125,
\\ "Width": 100
\\ },
\\ "Animated" : false,
\\ "IDs": [116, 943, 234, 38793],
\\ "ArrayOfObject": [{"n": "m"}],
\\ "double": 1.3412,
\\ "LargeInt": 18446744073709551615
\\ }
\\}
;
var tree = try p.parse(s);
defer tree.deinit();
var root = tree.root;
var image = root.Object.get("Image").?;
const width = image.Object.get("Width").?;
try testing.expect(width.Integer == 800);
const height = image.Object.get("Height").?;
try testing.expect(height.Integer == 600);
const title = image.Object.get("Title").?;
try testing.expect(mem.eql(u8, title.String, "View from 15th Floor"));
const animated = image.Object.get("Animated").?;
try testing.expect(animated.Bool == false);
const array_of_object = image.Object.get("ArrayOfObject").?;
try testing.expect(array_of_object.Array.items.len == 1);
const obj0 = array_of_object.Array.items[0].Object.get("n").?;
try testing.expect(mem.eql(u8, obj0.String, "m"));
const double = image.Object.get("double").?;
try testing.expect(double.Float == 1.3412);
const large_int = image.Object.get("LargeInt").?;
try testing.expect(mem.eql(u8, large_int.NumberString, "18446744073709551615"));
}
test "import more json tests" {
_ = @import("json/test.zig");
_ = @import("json/write_stream.zig");
}
test "write json then parse it" {
var out_buffer: [1000]u8 = undefined;
var fixed_buffer_stream = std.io.fixedBufferStream(&out_buffer);
const out_stream = fixed_buffer_stream.writer();
var jw = writeStream(out_stream, 4);
try jw.beginObject();
try jw.objectField("f");
try jw.emitBool(false);
try jw.objectField("t");
try jw.emitBool(true);
try jw.objectField("int");
try jw.emitNumber(1234);
try jw.objectField("array");
try jw.beginArray();
try jw.arrayElem();
try jw.emitNull();
try jw.arrayElem();
try jw.emitNumber(12.34);
try jw.endArray();
try jw.objectField("str");
try jw.emitString("hello");
try jw.endObject();
var parser = Parser.init(testing.allocator, false);
defer parser.deinit();
var tree = try parser.parse(fixed_buffer_stream.getWritten());
defer tree.deinit();
try testing.expect(tree.root.Object.get("f").?.Bool == false);
try testing.expect(tree.root.Object.get("t").?.Bool == true);
try testing.expect(tree.root.Object.get("int").?.Integer == 1234);
try testing.expect(tree.root.Object.get("array").?.Array.items[0].Null == {});
try testing.expect(tree.root.Object.get("array").?.Array.items[1].Float == 12.34);
try testing.expect(mem.eql(u8, tree.root.Object.get("str").?.String, "hello"));
}
fn test_parse(arena_allocator: *std.mem.Allocator, json_str: []const u8) !Value {
var p = Parser.init(arena_allocator, false);
return (try p.parse(json_str)).root;
}
test "parsing empty string gives appropriate error" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
try testing.expectError(error.UnexpectedEndOfJson, test_parse(&arena_allocator.allocator, ""));
}
test "integer after float has proper type" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const json = try test_parse(&arena_allocator.allocator,
\\{
\\ "float": 3.14,
\\ "ints": [1, 2, 3]
\\}
);
try std.testing.expect(json.Object.get("ints").?.Array.items[0] == .Integer);
}
test "escaped characters" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const input =
\\{
\\ "backslash": "\\",
\\ "forwardslash": "\/",
\\ "newline": "\n",
\\ "carriagereturn": "\r",
\\ "tab": "\t",
\\ "formfeed": "\f",
\\ "backspace": "\b",
\\ "doublequote": "\"",
\\ "unicode": "\u0105",
\\ "surrogatepair": "\ud83d\ude02"
\\}
;
const obj = (try test_parse(&arena_allocator.allocator, input)).Object;
try testing.expectEqualSlices(u8, obj.get("backslash").?.String, "\\");
try testing.expectEqualSlices(u8, obj.get("forwardslash").?.String, "/");
try testing.expectEqualSlices(u8, obj.get("newline").?.String, "\n");
try testing.expectEqualSlices(u8, obj.get("carriagereturn").?.String, "\r");
try testing.expectEqualSlices(u8, obj.get("tab").?.String, "\t");
try testing.expectEqualSlices(u8, obj.get("formfeed").?.String, "\x0C");
try testing.expectEqualSlices(u8, obj.get("backspace").?.String, "\x08");
try testing.expectEqualSlices(u8, obj.get("doublequote").?.String, "\"");
try testing.expectEqualSlices(u8, obj.get("unicode").?.String, "ą");
try testing.expectEqualSlices(u8, obj.get("surrogatepair").?.String, "😂");
}
test "string copy option" {
const input =
\\{
\\ "noescape": "aą😂",
\\ "simple": "\\\/\n\r\t\f\b\"",
\\ "unicode": "\u0105",
\\ "surrogatepair": "\ud83d\ude02"
\\}
;
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const tree_nocopy = try Parser.init(&arena_allocator.allocator, false).parse(input);
const obj_nocopy = tree_nocopy.root.Object;
const tree_copy = try Parser.init(&arena_allocator.allocator, true).parse(input);
const obj_copy = tree_copy.root.Object;
for ([_][]const u8{ "noescape", "simple", "unicode", "surrogatepair" }) |field_name| {
try testing.expectEqualSlices(u8, obj_nocopy.get(field_name).?.String, obj_copy.get(field_name).?.String);
}
const nocopy_addr = &obj_nocopy.get("noescape").?.String[0];
const copy_addr = &obj_copy.get("noescape").?.String[0];
var found_nocopy = false;
for (input) |_, index| {
try testing.expect(copy_addr != &input[index]);
if (nocopy_addr == &input[index]) {
found_nocopy = true;
}
}
try testing.expect(found_nocopy);
}
pub const StringifyOptions = struct {
pub const Whitespace = struct {
/// How many indentation levels deep are we?
indent_level: usize = 0,
/// What character(s) should be used for indentation?
indent: union(enum) {
Space: u8,
Tab: void,
} = .{ .Space = 4 },
/// After a colon, should whitespace be inserted?
separator: bool = true,
pub fn outputIndent(
whitespace: @This(),
out_stream: anytype,
) @TypeOf(out_stream).Error!void {
var char: u8 = undefined;
var n_chars: usize = undefined;
switch (whitespace.indent) {
.Space => |n_spaces| {
char = ' ';
n_chars = n_spaces;
},
.Tab => {
char = '\t';
n_chars = 1;
},
}
n_chars *= whitespace.indent_level;
try out_stream.writeByteNTimes(char, n_chars);
}
};
/// Controls the whitespace emitted
whitespace: ?Whitespace = null,
string: StringOptions = StringOptions{ .String = .{} },
/// Should []u8 be serialised as a string? or an array?
pub const StringOptions = union(enum) {
Array,
String: StringOutputOptions,
/// String output options
const StringOutputOptions = struct {
/// Should '/' be escaped in strings?
escape_solidus: bool = false,
/// Should unicode characters be escaped in strings?
escape_unicode: bool = false,
};
};
};
fn outputUnicodeEscape(
codepoint: u21,
out_stream: anytype,
) !void {
if (codepoint <= 0xFFFF) {
// If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
// then it may be represented as a six-character sequence: a reverse solidus, followed
// by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
try out_stream.writeAll("\\u");
try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
} else {
assert(codepoint <= 0x10FFFF);
// To escape an extended character that is not in the Basic Multilingual Plane,
// the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
try out_stream.writeAll("\\u");
try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
try out_stream.writeAll("\\u");
try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
}
}
pub fn stringify(
value: anytype,
options: StringifyOptions,
out_stream: anytype,
) @TypeOf(out_stream).Error!void {
const T = @TypeOf(value);
switch (@typeInfo(T)) {
.Float, .ComptimeFloat => {
return std.fmt.formatFloatScientific(value, std.fmt.FormatOptions{}, out_stream);
},
.Int, .ComptimeInt => {
return std.fmt.formatIntValue(value, "", std.fmt.FormatOptions{}, out_stream);
},
.Bool => {
return out_stream.writeAll(if (value) "true" else "false");
},
.Null => {
return out_stream.writeAll("null");
},
.Optional => {
if (value) |payload| {
return try stringify(payload, options, out_stream);
} else {
return try stringify(null, options, out_stream);
}
},
.Enum => {
if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
return value.jsonStringify(options, out_stream);
}
@compileError("Unable to stringify enum '" ++ @typeName(T) ++ "'");
},
.Union => {
if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
return value.jsonStringify(options, out_stream);
}
const info = @typeInfo(T).Union;
if (info.tag_type) |UnionTagType| {
inline for (info.fields) |u_field| {
if (value == @field(UnionTagType, u_field.name)) {
return try stringify(@field(value, u_field.name), options, out_stream);
}
}
} else {
@compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'");
}
},
.Struct => |S| {
if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
return value.jsonStringify(options, out_stream);
}
try out_stream.writeByte('{');
comptime var field_output = false;
var child_options = options;
if (child_options.whitespace) |*child_whitespace| {
child_whitespace.indent_level += 1;
}
inline for (S.fields) |Field, field_i| {
// don't include void fields
if (Field.field_type == void) continue;
if (!field_output) {
field_output = true;
} else {
try out_stream.writeByte(',');
}
if (child_options.whitespace) |child_whitespace| {
try out_stream.writeByte('\n');
try child_whitespace.outputIndent(out_stream);
}
try stringify(Field.name, options, out_stream);
try out_stream.writeByte(':');
if (child_options.whitespace) |child_whitespace| {
if (child_whitespace.separator) {
try out_stream.writeByte(' ');
}
}
try stringify(@field(value, Field.name), child_options, out_stream);
}
if (field_output) {
if (options.whitespace) |whitespace| {
try out_stream.writeByte('\n');
try whitespace.outputIndent(out_stream);
}
}
try out_stream.writeByte('}');
return;
},
.ErrorSet => return stringify(@as([]const u8, @errorName(value)), options, out_stream),
.Pointer => |ptr_info| switch (ptr_info.size) {
.One => switch (@typeInfo(ptr_info.child)) {
.Array => {
const Slice = []const std.meta.Elem(ptr_info.child);
return stringify(@as(Slice, value), options, out_stream);
},
else => {
// TODO: avoid loops?
return stringify(value.*, options, out_stream);
},
},
// TODO: .Many when there is a sentinel (waiting for https://github.com/ziglang/zig/pull/3972)
.Slice => {
if (ptr_info.child == u8 and options.string == .String and std.unicode.utf8ValidateSlice(value)) {
try out_stream.writeByte('\"');
var i: usize = 0;
while (i < value.len) : (i += 1) {
switch (value[i]) {
// normal ascii character
0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try out_stream.writeByte(c),
// only 2 characters that *must* be escaped
'\\' => try out_stream.writeAll("\\\\"),
'\"' => try out_stream.writeAll("\\\""),
// solidus is optional to escape
'/' => {
if (options.string.String.escape_solidus) {
try out_stream.writeAll("\\/");
} else {
try out_stream.writeByte('/');
}
},
// control characters with short escapes
// TODO: option to switch between unicode and 'short' forms?
0x8 => try out_stream.writeAll("\\b"),
0xC => try out_stream.writeAll("\\f"),
'\n' => try out_stream.writeAll("\\n"),
'\r' => try out_stream.writeAll("\\r"),
'\t' => try out_stream.writeAll("\\t"),
else => {
const ulen = std.unicode.utf8ByteSequenceLength(value[i]) catch unreachable;
// control characters (only things left with 1 byte length) should always be printed as unicode escapes
if (ulen == 1 or options.string.String.escape_unicode) {
const codepoint = std.unicode.utf8Decode(value[i .. i + ulen]) catch unreachable;
try outputUnicodeEscape(codepoint, out_stream);
} else {
try out_stream.writeAll(value[i .. i + ulen]);
}
i += ulen - 1;
},
}
}
try out_stream.writeByte('\"');
return;
}
try out_stream.writeByte('[');
var child_options = options;
if (child_options.whitespace) |*whitespace| {
whitespace.indent_level += 1;
}
for (value) |x, i| {
if (i != 0) {
try out_stream.writeByte(',');
}
if (child_options.whitespace) |child_whitespace| {
try out_stream.writeByte('\n');
try child_whitespace.outputIndent(out_stream);
}
try stringify(x, child_options, out_stream);
}
if (value.len != 0) {
if (options.whitespace) |whitespace| {
try out_stream.writeByte('\n');
try whitespace.outputIndent(out_stream);
}
}
try out_stream.writeByte(']');
return;
},
else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
},
.Array => return stringify(&value, options, out_stream),
.Vector => |info| {
const array: [info.len]info.child = value;
return stringify(&array, options, out_stream);
},
else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
}
unreachable;
}
fn teststringify(expected: []const u8, value: anytype, options: StringifyOptions) !void {
const ValidationWriter = struct {
const Self = @This();
pub const Writer = std.io.Writer(*Self, Error, write);
pub const Error = error{
TooMuchData,
DifferentData,
};
expected_remaining: []const u8,
fn init(exp: []const u8) Self {
return .{ .expected_remaining = exp };
}
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
fn write(self: *Self, bytes: []const u8) Error!usize {
if (self.expected_remaining.len < bytes.len) {
std.debug.warn(
\\====== expected this output: =========
\\{s}
\\======== instead found this: =========
\\{s}
\\======================================
, .{
self.expected_remaining,
bytes,
});
return error.TooMuchData;
}
if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) {
std.debug.warn(
\\====== expected this output: =========
\\{s}
\\======== instead found this: =========
\\{s}
\\======================================
, .{
self.expected_remaining[0..bytes.len],
bytes,
});
return error.DifferentData;
}
self.expected_remaining = self.expected_remaining[bytes.len..];
return bytes.len;
}
};
var vos = ValidationWriter.init(expected);
try stringify(value, options, vos.writer());
if (vos.expected_remaining.len > 0) return error.NotEnoughData;
}
test "stringify basic types" {
try teststringify("false", false, StringifyOptions{});
try teststringify("true", true, StringifyOptions{});
try teststringify("null", @as(?u8, null), StringifyOptions{});
try teststringify("null", @as(?*u32, null), StringifyOptions{});
try teststringify("42", 42, StringifyOptions{});
try teststringify("4.2e+01", 42.0, StringifyOptions{});
try teststringify("42", @as(u8, 42), StringifyOptions{});
try teststringify("42", @as(u128, 42), StringifyOptions{});
try teststringify("4.2e+01", @as(f32, 42), StringifyOptions{});
try teststringify("4.2e+01", @as(f64, 42), StringifyOptions{});
try teststringify("\"ItBroke\"", @as(anyerror, error.ItBroke), StringifyOptions{});
}
test "stringify string" {
try teststringify("\"hello\"", "hello", StringifyOptions{});
try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{});
try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{});
try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{});
try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{});
try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{});
try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{});
try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{});
try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{});
try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{});
try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{});
try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
try teststringify("\"/\"", "/", StringifyOptions{});
try teststringify("\"\\/\"", "/", StringifyOptions{ .string = .{ .String = .{ .escape_solidus = true } } });
}
test "stringify tagged unions" {
try teststringify("42", union(enum) {
Foo: u32,
Bar: bool,
}{ .Foo = 42 }, StringifyOptions{});
}
test "stringify struct" {
try teststringify("{\"foo\":42}", struct {
foo: u32,
}{ .foo = 42 }, StringifyOptions{});
}
test "stringify struct with indentation" {
try teststringify(
\\{
\\ "foo": 42,
\\ "bar": [
\\ 1,
\\ 2,
\\ 3
\\ ]
\\}
,
struct {
foo: u32,
bar: [3]u32,
}{
.foo = 42,
.bar = .{ 1, 2, 3 },
},
StringifyOptions{
.whitespace = .{},
},
);
try teststringify(
"{\n\t\"foo\":42,\n\t\"bar\":[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}",
struct {
foo: u32,
bar: [3]u32,
}{
.foo = 42,
.bar = .{ 1, 2, 3 },
},
StringifyOptions{
.whitespace = .{
.indent = .Tab,
.separator = false,
},
},
);
}
test "stringify struct with void field" {
try teststringify("{\"foo\":42}", struct {
foo: u32,
bar: void = {},
}{ .foo = 42 }, StringifyOptions{});
}
test "stringify array of structs" {
const MyStruct = struct {
foo: u32,
};
try teststringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
MyStruct{ .foo = 42 },
MyStruct{ .foo = 100 },
MyStruct{ .foo = 1000 },
}, StringifyOptions{});
}
test "stringify struct with custom stringifier" {
try teststringify("[\"something special\",42]", struct {
foo: u32,
const Self = @This();
pub fn jsonStringify(
value: Self,
options: StringifyOptions,
out_stream: anytype,
) !void {
try out_stream.writeAll("[\"something special\",");
try stringify(42, options, out_stream);
try out_stream.writeByte(']');
}
}{ .foo = 42 }, StringifyOptions{});
}
test "stringify vector" {
try teststringify("[1,1]", @splat(2, @as(u32, 1)), StringifyOptions{});
}