add benchmarking (we are faster than json if using FBA)
This commit is contained in:
parent
6acf4f4fb1
commit
0aacc8b37b
3 changed files with 334 additions and 1 deletions
151
build.zig
151
build.zig
|
|
@ -153,4 +153,155 @@ pub fn build(b: *std.Build) void {
|
||||||
//
|
//
|
||||||
// Lastly, the Zig build system is relatively simple and self-contained,
|
// Lastly, the Zig build system is relatively simple and self-contained,
|
||||||
// and reading its source code will allow you to master it.
|
// and reading its source code will allow you to master it.
|
||||||
|
|
||||||
|
// Benchmark step
|
||||||
|
const benchmark_step = b.step("benchmark", "Run benchmarks with hyperfine");
|
||||||
|
const benchmark_optimize = if (optimize == .Debug) .ReleaseSafe else optimize;
|
||||||
|
const benchmark_record_count = 100_000;
|
||||||
|
const include_jsonl = b.option(bool, "benchmark-jsonl", "Include JSONL in benchmarks (slow)") orelse false;
|
||||||
|
|
||||||
|
// Check for hyperfine
|
||||||
|
const check_hyperfine = b.addSystemCommand(&.{ "sh", "-c", "command -v hyperfine >/dev/null 2>&1 || (echo 'Error: hyperfine not found. Install it with: cargo install hyperfine' >&2 && exit 1)" });
|
||||||
|
benchmark_step.dependOn(&check_hyperfine.step);
|
||||||
|
|
||||||
|
// Build test data generator
|
||||||
|
const gen_exe = b.addExecutable(.{
|
||||||
|
.name = "generate_test_data",
|
||||||
|
.root_module = b.createModule(.{
|
||||||
|
.root_source_file = b.path("src/generate_test_data.zig"),
|
||||||
|
.target = target,
|
||||||
|
.optimize = benchmark_optimize,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
const install_gen = b.addInstallArtifact(gen_exe, .{});
|
||||||
|
check_hyperfine.step.dependOn(&install_gen.step);
|
||||||
|
|
||||||
|
// Rebuild main executable with benchmark optimization
|
||||||
|
const benchmark_exe = b.addExecutable(.{
|
||||||
|
.name = "srf",
|
||||||
|
.root_module = b.createModule(.{
|
||||||
|
.root_source_file = b.path("src/main.zig"),
|
||||||
|
.target = target,
|
||||||
|
.optimize = benchmark_optimize,
|
||||||
|
.imports = &.{
|
||||||
|
.{ .name = "srf", .module = mod },
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
const install_benchmark_exe = b.addInstallArtifact(benchmark_exe, .{});
|
||||||
|
check_hyperfine.step.dependOn(&install_benchmark_exe.step);
|
||||||
|
|
||||||
|
const run_benchmark = BenchmarkStep.create(b, .{
|
||||||
|
.gen_exe = gen_exe,
|
||||||
|
.srf_exe = benchmark_exe,
|
||||||
|
.record_count = benchmark_record_count,
|
||||||
|
.include_jsonl = include_jsonl,
|
||||||
|
});
|
||||||
|
run_benchmark.step.dependOn(&check_hyperfine.step);
|
||||||
|
benchmark_step.dependOn(&run_benchmark.step);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const BenchmarkStep = struct {
|
||||||
|
step: std.Build.Step,
|
||||||
|
gen_exe: *std.Build.Step.Compile,
|
||||||
|
srf_exe: *std.Build.Step.Compile,
|
||||||
|
record_count: usize,
|
||||||
|
include_jsonl: bool,
|
||||||
|
|
||||||
|
pub fn create(owner: *std.Build, options: struct {
|
||||||
|
gen_exe: *std.Build.Step.Compile,
|
||||||
|
srf_exe: *std.Build.Step.Compile,
|
||||||
|
record_count: usize,
|
||||||
|
include_jsonl: bool,
|
||||||
|
}) *BenchmarkStep {
|
||||||
|
const self = owner.allocator.create(BenchmarkStep) catch @panic("OOM");
|
||||||
|
self.* = .{
|
||||||
|
.step = std.Build.Step.init(.{
|
||||||
|
.id = .custom,
|
||||||
|
.name = "run benchmark",
|
||||||
|
.owner = owner,
|
||||||
|
.makeFn = make,
|
||||||
|
}),
|
||||||
|
.gen_exe = options.gen_exe,
|
||||||
|
.srf_exe = options.srf_exe,
|
||||||
|
.record_count = options.record_count,
|
||||||
|
.include_jsonl = options.include_jsonl,
|
||||||
|
};
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make(step: *std.Build.Step, _: std.Build.Step.MakeOptions) !void {
|
||||||
|
const b = step.owner;
|
||||||
|
const self: *BenchmarkStep = @fieldParentPtr("step", step);
|
||||||
|
|
||||||
|
const gen_path = b.getInstallPath(.bin, self.gen_exe.name);
|
||||||
|
const exe_path = b.getInstallPath(.bin, self.srf_exe.name);
|
||||||
|
const count_str = b.fmt("{d}", .{self.record_count});
|
||||||
|
|
||||||
|
const formats = [_]struct { name: []const u8, ext: []const u8 }{
|
||||||
|
.{ .name = "srf-compact", .ext = "srf" },
|
||||||
|
.{ .name = "srf-long", .ext = "srf" },
|
||||||
|
.{ .name = "jsonl", .ext = "jsonl" },
|
||||||
|
.{ .name = "json", .ext = "json" },
|
||||||
|
};
|
||||||
|
|
||||||
|
var test_files: [4][]const u8 = undefined;
|
||||||
|
for (formats, 0..) |fmt, i| {
|
||||||
|
// Create hash from format name and record count
|
||||||
|
var hasher = std.hash.Wyhash.init(0);
|
||||||
|
hasher.update(fmt.name);
|
||||||
|
hasher.update(count_str);
|
||||||
|
const hash = hasher.final();
|
||||||
|
|
||||||
|
const hash_str = b.fmt("{x}", .{hash});
|
||||||
|
const cache_dir = b.cache_root.join(b.allocator, &.{ "o", hash_str }) catch @panic("OOM");
|
||||||
|
std.fs.cwd().makePath(cache_dir) catch {};
|
||||||
|
|
||||||
|
const filename = b.fmt("test-{s}.{s}", .{ fmt.name, fmt.ext });
|
||||||
|
const filepath = b.pathJoin(&.{ cache_dir, filename });
|
||||||
|
test_files[i] = filepath;
|
||||||
|
|
||||||
|
// Check if file exists
|
||||||
|
if (std.fs.cwd().access(filepath, .{})) {
|
||||||
|
continue; // File exists, skip generation
|
||||||
|
} else |_| {}
|
||||||
|
|
||||||
|
// Generate file
|
||||||
|
var child = std.process.Child.init(&.{ gen_path, fmt.name, count_str }, b.allocator);
|
||||||
|
child.stdout_behavior = .Pipe;
|
||||||
|
try child.spawn();
|
||||||
|
|
||||||
|
const output = try child.stdout.?.readToEndAlloc(b.allocator, 100 * 1024 * 1024);
|
||||||
|
defer b.allocator.free(output);
|
||||||
|
|
||||||
|
const term = try child.wait();
|
||||||
|
if (term != .Exited or term.Exited != 0) return error.GenerationFailed;
|
||||||
|
|
||||||
|
try std.fs.cwd().writeFile(.{ .sub_path = filepath, .data = output });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run hyperfine
|
||||||
|
var argv: std.ArrayList([]const u8) = .empty;
|
||||||
|
defer argv.deinit(b.allocator);
|
||||||
|
|
||||||
|
try argv.appendSlice(b.allocator, &.{ "hyperfine", "-w", "2" });
|
||||||
|
try argv.append(b.allocator, b.fmt("{s} srf <{s}", .{ exe_path, test_files[0] }));
|
||||||
|
try argv.append(b.allocator, b.fmt("{s} srf <{s}", .{ exe_path, test_files[1] }));
|
||||||
|
try argv.append(b.allocator, b.fmt("{s} json <{s}", .{ exe_path, test_files[3] }));
|
||||||
|
if (self.include_jsonl) {
|
||||||
|
try argv.append(b.allocator, b.fmt("{s} jsonl <{s}", .{ exe_path, test_files[2] }));
|
||||||
|
}
|
||||||
|
|
||||||
|
var child = std.process.Child.init(argv.items, b.allocator);
|
||||||
|
|
||||||
|
// We need to lock stderror so hyperfine can output progress in place
|
||||||
|
std.debug.lockStdErr();
|
||||||
|
defer std.debug.unlockStdErr();
|
||||||
|
|
||||||
|
try child.spawn();
|
||||||
|
const term = try child.wait();
|
||||||
|
|
||||||
|
if (term != .Exited or term.Exited != 0)
|
||||||
|
return error.BenchmarkFailed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
||||||
61
src/generate_test_data.zig
Normal file
61
src/generate_test_data.zig
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
const record_count = 100_000;
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
|
defer _ = gpa.deinit();
|
||||||
|
const allocator = gpa.allocator();
|
||||||
|
|
||||||
|
const args = try std.process.argsAlloc(allocator);
|
||||||
|
defer std.process.argsFree(allocator, args);
|
||||||
|
|
||||||
|
if (args.len < 2) {
|
||||||
|
std.debug.print("Usage: {s} <srf-compact|srf-long|jsonl|json> [record_count]\n", .{args[0]});
|
||||||
|
std.process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const format = args[1];
|
||||||
|
const count = if (args.len >= 3)
|
||||||
|
try std.fmt.parseInt(usize, args[2], 10)
|
||||||
|
else
|
||||||
|
record_count;
|
||||||
|
|
||||||
|
var stdout_buffer: [1024]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
|
||||||
|
if (std.mem.eql(u8, format, "srf-compact")) {
|
||||||
|
try stdout.writeAll("#!srfv1\n");
|
||||||
|
for (0..count) |i| {
|
||||||
|
try stdout.print("id:num:{d},name::User {d},email::user{d}@example.com,active:bool:true,score:num:{d}.5,bio:49:A \"complex\" string with\nnewlines and \\backslashes,status::active\n", .{ i, i, i, i });
|
||||||
|
}
|
||||||
|
} else if (std.mem.eql(u8, format, "srf-long")) {
|
||||||
|
try stdout.writeAll("#!srfv1\n#!long\n");
|
||||||
|
for (0..count) |i| {
|
||||||
|
try stdout.print("id:num:{d}\n", .{i});
|
||||||
|
try stdout.print("name::User {d}\n", .{i});
|
||||||
|
try stdout.print("email::user{d}@example.com\n", .{i});
|
||||||
|
try stdout.writeAll("active:bool:true\n");
|
||||||
|
try stdout.print("score:num:{d}.5\n", .{i});
|
||||||
|
try stdout.writeAll("bio:49:A \"complex\" string with\nnewlines and \\backslashes\n");
|
||||||
|
try stdout.writeAll("status::active\n\n");
|
||||||
|
}
|
||||||
|
} else if (std.mem.eql(u8, format, "jsonl")) {
|
||||||
|
for (0..count) |i| {
|
||||||
|
try stdout.print("{{\"id\":{d},\"name\":\"User {d}\",\"email\":\"user{d}@example.com\",\"active\":true,\"score\":{d}.5,\"bio\":\"A \\\"complex\\\" string with\\nnewlines and \\\\backslashes\",\"status\":\"active\"}}\n", .{ i, i, i, i });
|
||||||
|
}
|
||||||
|
} else if (std.mem.eql(u8, format, "json")) {
|
||||||
|
try stdout.writeAll("[\n");
|
||||||
|
for (0..count) |i| {
|
||||||
|
if (i > 0) try stdout.writeAll(",\n");
|
||||||
|
try stdout.print("{{\"id\":{d},\"name\":\"User {d}\",\"email\":\"user{d}@example.com\",\"active\":true,\"score\":{d}.5,\"bio\":\"A \\\"complex\\\" string with\\nnewlines and \\\\backslashes\",\"status\":\"active\"}}", .{ i, i, i, i });
|
||||||
|
}
|
||||||
|
try stdout.writeAll("\n]\n");
|
||||||
|
} else {
|
||||||
|
std.debug.print("Unknown format: {s}\n", .{format});
|
||||||
|
std.process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
try stdout.flush();
|
||||||
|
}
|
||||||
123
src/main.zig
123
src/main.zig
|
|
@ -1,3 +1,124 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
const srf = @import("srf.zig");
|
||||||
|
|
||||||
pub fn main() !void {}
|
const CountingAllocator = struct {
|
||||||
|
child_allocator: std.mem.Allocator,
|
||||||
|
alloc_count: usize = 0,
|
||||||
|
free_count: usize = 0,
|
||||||
|
bytes_allocated: usize = 0,
|
||||||
|
|
||||||
|
fn alloc(ctx: *anyopaque, len: usize, ptr_align: std.mem.Alignment, ret_addr: usize) ?[*]u8 {
|
||||||
|
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||||
|
self.alloc_count += 1;
|
||||||
|
self.bytes_allocated += len;
|
||||||
|
if (self.alloc_count <= 25) {
|
||||||
|
std.debug.print("Alloc #{}: {} bytes\n", .{ self.alloc_count, len });
|
||||||
|
}
|
||||||
|
return self.child_allocator.rawAlloc(len, ptr_align, ret_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resize(ctx: *anyopaque, buf: []u8, buf_align: std.mem.Alignment, new_len: usize, ret_addr: usize) bool {
|
||||||
|
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||||
|
return self.child_allocator.rawResize(buf, buf_align, new_len, ret_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn free(ctx: *anyopaque, buf: []u8, buf_align: std.mem.Alignment, ret_addr: usize) void {
|
||||||
|
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||||
|
self.free_count += 1;
|
||||||
|
return self.child_allocator.rawFree(buf, buf_align, ret_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remap(ctx: *anyopaque, buf: []u8, buf_align: std.mem.Alignment, new_len: usize, ret_addr: usize) ?[*]u8 {
|
||||||
|
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||||
|
return self.child_allocator.rawRemap(buf, buf_align, new_len, ret_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn allocator(self: *CountingAllocator) std.mem.Allocator {
|
||||||
|
return .{
|
||||||
|
.ptr = self,
|
||||||
|
.vtable = &.{
|
||||||
|
.alloc = alloc,
|
||||||
|
.resize = resize,
|
||||||
|
.free = free,
|
||||||
|
.remap = remap,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
|
defer _ = gpa.deinit();
|
||||||
|
const base_allocator = gpa.allocator();
|
||||||
|
|
||||||
|
const args = try std.process.argsAlloc(base_allocator);
|
||||||
|
defer std.process.argsFree(base_allocator, args);
|
||||||
|
|
||||||
|
if (args.len < 2) {
|
||||||
|
std.debug.print("Usage: {s} <srf|json|jsonl>\n", .{args[0]});
|
||||||
|
std.process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const format = args[1];
|
||||||
|
|
||||||
|
const debug_allocs = std.process.hasEnvVarConstant("DEBUG_ALLOCATIONS");
|
||||||
|
|
||||||
|
var counting = CountingAllocator{ .child_allocator = base_allocator };
|
||||||
|
const allocator = if (debug_allocs) counting.allocator() else base_allocator;
|
||||||
|
|
||||||
|
var stdin_buffer: [1024]u8 = undefined;
|
||||||
|
var stdin_reader = std.fs.File.stdin().reader(&stdin_buffer);
|
||||||
|
const stdin = &stdin_reader.interface;
|
||||||
|
|
||||||
|
// Load all data into memory first for fair comparison
|
||||||
|
var data: std.ArrayList(u8) = .empty;
|
||||||
|
defer data.deinit(base_allocator);
|
||||||
|
try stdin.appendRemaining(base_allocator, &data, @enumFromInt(100 * 1024 * 1024));
|
||||||
|
|
||||||
|
if (std.mem.eql(u8, format, "srf")) {
|
||||||
|
// TODO: Remove this code. SRF should be using an Arena allocator instead
|
||||||
|
const buffer = try base_allocator.alloc(u8, 200 * 1024 * 1024);
|
||||||
|
defer base_allocator.free(buffer);
|
||||||
|
var fba = std.heap.FixedBufferAllocator.init(buffer);
|
||||||
|
const srf_allocator = fba.allocator();
|
||||||
|
// remove ^^
|
||||||
|
|
||||||
|
var reader = std.Io.Reader.fixed(data.items);
|
||||||
|
const records = try srf.parse(&reader, srf_allocator, .{});
|
||||||
|
defer {
|
||||||
|
for (records.items) |r| r.deinit(srf_allocator);
|
||||||
|
srf_allocator.free(records.items);
|
||||||
|
}
|
||||||
|
} else if (std.mem.eql(u8, format, "jsonl")) {
|
||||||
|
var lines = std.mem.splitScalar(u8, data.items, '\n');
|
||||||
|
while (lines.next()) |line| {
|
||||||
|
if (line.len == 0) continue;
|
||||||
|
const parsed = try std.json.parseFromSlice(std.json.Value, allocator, line, .{});
|
||||||
|
defer parsed.deinit();
|
||||||
|
}
|
||||||
|
} else if (std.mem.eql(u8, format, "json")) {
|
||||||
|
const parsed = try std.json.parseFromSlice(std.json.Value, allocator, data.items, .{});
|
||||||
|
defer parsed.deinit();
|
||||||
|
var count: usize = 0;
|
||||||
|
for (parsed.value.array.items) |item| {
|
||||||
|
_ = item.object.get("id");
|
||||||
|
_ = item.object.get("name");
|
||||||
|
_ = item.object.get("email");
|
||||||
|
_ = item.object.get("active");
|
||||||
|
_ = item.object.get("score");
|
||||||
|
_ = item.object.get("bio");
|
||||||
|
_ = item.object.get("status");
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
std.mem.doNotOptimizeAway(&count);
|
||||||
|
} else {
|
||||||
|
std.debug.print("Unknown format: {s}\n", .{format});
|
||||||
|
std.process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (debug_allocs) {
|
||||||
|
std.debug.print("Allocations: {}\n", .{counting.alloc_count});
|
||||||
|
std.debug.print("Frees: {}\n", .{counting.free_count});
|
||||||
|
std.debug.print("Bytes allocated: {}\n", .{counting.bytes_allocated});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue