Compare commits

..

No commits in common. "fb36dc83d57f413eb22fcbd20322c465a8e60b23" and "db4037111f378c2de0d3e0be153489b1caf5bcd8" have entirely different histories.

4 changed files with 217 additions and 327 deletions

View File

@ -1,11 +1,56 @@
const builtin = @import("builtin"); const builtin = @import("builtin");
const std = @import("std"); const std = @import("std");
const testing = std.testing; const testing = std.testing;
const Hasher = @import("codegen/src/Hasher.zig");
/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6 /// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
const bin_digest_len = 16; const bin_digest_len = 16;
const hex_digest_len = bin_digest_len * 2; const hex_digest_len = bin_digest_len * 2;
const hex_multihash_len = 2 * multihash_len;
const MultiHashHexDigest = [hex_multihash_len]u8;
const hex_charset = "0123456789abcdef";
const Hash = std.crypto.hash.sha2.Sha256;
const multihash_len = 1 + 1 + Hash.digest_length;
const MultihashFunction = enum(u16) {
identity = 0x00,
sha1 = 0x11,
@"sha2-256" = 0x12,
@"sha2-512" = 0x13,
@"sha3-512" = 0x14,
@"sha3-384" = 0x15,
@"sha3-256" = 0x16,
@"sha3-224" = 0x17,
@"sha2-384" = 0x20,
@"sha2-256-trunc254-padded" = 0x1012,
@"sha2-224" = 0x1013,
@"sha2-512-224" = 0x1014,
@"sha2-512-256" = 0x1015,
@"blake2b-256" = 0xb220,
_,
};
const HashedFile = struct {
fs_path: []const u8,
normalized_path: []const u8,
hash: [Hash.digest_length]u8,
failure: Error!void,
const Error = std.fs.File.OpenError || std.fs.File.ReadError || std.fs.File.StatError;
fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
_ = context;
return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
}
};
const multihash_function: MultihashFunction = switch (Hash) {
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
else => @compileError("unreachable"),
};
comptime {
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
// values are small enough to be contained in the one-byte encoding.
std.debug.assert(@intFromEnum(multihash_function) < 127);
std.debug.assert(Hash.digest_length < 127);
}
const Package = @This(); const Package = @This();
@ -67,7 +112,7 @@ pub fn fetchAndUnpack(
// Check if the expected_hash is already present in the global package // Check if the expected_hash is already present in the global package
// cache, and thereby avoid both fetching and unpacking. // cache, and thereby avoid both fetching and unpacking.
if (dep.hash) |h| cached: { if (dep.hash) |h| cached: {
const hex_digest = h[0..Hasher.hex_multihash_len]; const hex_digest = h[0..hex_multihash_len];
const pkg_dir_sub_path = "p" ++ s ++ hex_digest; const pkg_dir_sub_path = "p" ++ s ++ hex_digest;
const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path}); const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path});
@ -86,7 +131,7 @@ pub fn fetchAndUnpack(
ptr.* = .{ ptr.* = .{
.root_src_directory = .{ .root_src_directory = .{
.path = build_root, // TODO: This leaks memory somehow (should be cleaned in deinit() .path = build_root,
.handle = pkg_dir, .handle = pkg_dir,
}, },
.root_src_directory_owned = true, .root_src_directory_owned = true,
@ -104,7 +149,7 @@ pub fn fetchAndUnpack(
const uri = try std.Uri.parse(dep.url); const uri = try std.Uri.parse(dep.url);
const rand_int = std.crypto.random.int(u64); const rand_int = std.crypto.random.int(u64);
const tmp_dir_sub_path = "tmp" ++ s ++ Hasher.hex64(rand_int); const tmp_dir_sub_path = "tmp" ++ s ++ hex64(rand_int);
const actual_hash = a: { const actual_hash = a: {
var tmp_directory: std.Build.Cache.Directory = d: { var tmp_directory: std.Build.Cache.Directory = d: {
@ -202,13 +247,13 @@ pub fn fetchAndUnpack(
// Of course, if the ignore rules above omit the file from the package, then everything // Of course, if the ignore rules above omit the file from the package, then everything
// is fine and no error should be raised. // is fine and no error should be raised.
break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle }, &.{}); break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle });
}; };
const pkg_dir_sub_path = "p" ++ s ++ Hasher.hexDigest(actual_hash); const pkg_dir_sub_path = "p" ++ s ++ hexDigest(actual_hash);
try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path); try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path);
const actual_hex = Hasher.hexDigest(actual_hash); const actual_hex = hexDigest(actual_hash);
if (dep.hash) |h| { if (dep.hash) |h| {
if (!std.mem.eql(u8, h, &actual_hex)) { if (!std.mem.eql(u8, h, &actual_hex)) {
std.log.err("hash mismatch: expected: {s}, found: {s}", .{ std.log.err("hash mismatch: expected: {s}, found: {s}", .{
@ -227,6 +272,16 @@ pub fn fetchAndUnpack(
const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path); const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path);
return mod; return mod;
} }
fn hex64(x: u64) [16]u8 {
var result: [16]u8 = undefined;
var i: usize = 0;
while (i < 8) : (i += 1) {
const byte = @as(u8, @truncate(x >> @as(u6, @intCast(8 * i))));
result[i * 2 + 0] = hex_charset[byte >> 4];
result[i * 2 + 1] = hex_charset[byte & 15];
}
return result;
}
fn ProgressReader(comptime ReaderType: type) type { fn ProgressReader(comptime ReaderType: type) type {
return struct { return struct {
child_reader: ReaderType, child_reader: ReaderType,
@ -285,6 +340,81 @@ fn isTarAttachment(content_disposition: []const u8) bool {
} }
return std.ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz"); return std.ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz");
} }
fn computePackageHash(
thread_pool: *std.Thread.Pool,
pkg_dir: std.fs.IterableDir,
) ![Hash.digest_length]u8 {
const gpa = thread_pool.allocator;
// We'll use an arena allocator for the path name strings since they all
// need to be in memory for sorting.
var arena_instance = std.heap.ArenaAllocator.init(gpa);
defer arena_instance.deinit();
const arena = arena_instance.allocator();
// Collect all files, recursively, then sort.
var all_files = std.ArrayList(*HashedFile).init(gpa);
defer all_files.deinit();
var walker = try pkg_dir.walk(gpa);
defer walker.deinit();
{
// The final hash will be a hash of each file hashed independently. This
// allows hashing in parallel.
var wait_group: std.Thread.WaitGroup = .{};
defer wait_group.wait();
while (try walker.next()) |entry| {
switch (entry.kind) {
.directory => continue,
.file => {},
else => return error.IllegalFileTypeInPackage,
}
const hashed_file = try arena.create(HashedFile);
const fs_path = try arena.dupe(u8, entry.path);
hashed_file.* = .{
.fs_path = fs_path,
.normalized_path = try normalizePath(arena, fs_path),
.hash = undefined, // to be populated by the worker
.failure = undefined, // to be populated by the worker
};
wait_group.start();
try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group });
try all_files.append(hashed_file);
}
}
std.mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan);
var hasher = Hash.init(.{});
var any_failures = false;
for (all_files.items) |hashed_file| {
hashed_file.failure catch |err| {
any_failures = true;
std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
};
hasher.update(&hashed_file.hash);
}
if (any_failures) return error.PackageHashUnavailable;
return hasher.finalResult();
}
fn hexDigest(digest: [Hash.digest_length]u8) [multihash_len * 2]u8 {
var result: [multihash_len * 2]u8 = undefined;
result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
result[2] = hex_charset[Hash.digest_length >> 4];
result[3] = hex_charset[Hash.digest_length & 15];
for (digest, 0..) |byte, i| {
result[4 + i * 2] = hex_charset[byte >> 4];
result[5 + i * 2] = hex_charset[byte & 15];
}
return result;
}
fn renameTmpIntoCache( fn renameTmpIntoCache(
cache_dir: std.fs.Dir, cache_dir: std.fs.Dir,
tmp_dir_sub_path: []const u8, tmp_dir_sub_path: []const u8,
@ -345,6 +475,57 @@ fn createWithDir(
} }
return ptr; return ptr;
} }
/// Make a file system path identical independently of operating system path inconsistencies.
/// This converts backslashes into forward slashes.
fn normalizePath(arena: std.mem.Allocator, fs_path: []const u8) ![]const u8 {
const canonical_sep = '/';
if (std.fs.path.sep == canonical_sep)
return fs_path;
const normalized = try arena.dupe(u8, fs_path);
for (normalized) |*byte| {
switch (byte.*) {
std.fs.path.sep => byte.* = canonical_sep,
else => continue,
}
}
return normalized;
}
fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {
defer wg.finish();
hashed_file.failure = hashFileFallible(dir, hashed_file);
}
fn hashFileFallible(dir: std.fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
var buf: [8000]u8 = undefined;
var file = try dir.openFile(hashed_file.fs_path, .{});
defer file.close();
var hasher = Hash.init(.{});
hasher.update(hashed_file.normalized_path);
hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
while (true) {
const bytes_read = try file.read(&buf);
if (bytes_read == 0) break;
hasher.update(buf[0..bytes_read]);
}
hasher.final(&hashed_file.hash);
}
fn isExecutable(file: std.fs.File) !bool {
if (builtin.os.tag == .windows) {
// TODO check the ACL on Windows.
// Until this is implemented, this could be a false negative on
// Windows, which is why we do not yet set executable_bit_only above
// when unpacking the tarball.
return false;
} else {
const stat = try file.stat();
return (stat.mode & std.os.S.IXUSR) != 0;
}
}
// Create/Write a file, close it, then grab its stat.mtime timestamp. // Create/Write a file, close it, then grab its stat.mtime timestamp.
fn testGetCurrentFileTimestamp(dir: std.fs.Dir) !i128 { fn testGetCurrentFileTimestamp(dir: std.fs.Dir) !i128 {
const test_out_file = "test-filetimestamp.tmp"; const test_out_file = "test-filetimestamp.tmp";

View File

@ -7,18 +7,19 @@ as they do in other languages. We can combine all models from AWS into a single
comptime constant even, however, we're keeping zig files 1:1 with json files comptime constant even, however, we're keeping zig files 1:1 with json files
for now. for now.
Optimization plan will be done by the placing of a json file in the output The main executable, run with a "-s" first argument, will simply parse the
directory. The json file will contain a mapping between input files and generated Smithy json files passed by the rest of the arguments and save each one as
outputs, as well as a top level directory hash. We can skip the output generation its own file.json.zig. We will rely on shell commands to do the rest of the
entirely if the top level hash matches, otherwise, individual hashes will be renaming (and moving if necessary).
compared and output files will only regenerate if the input or output has changed.
To run this, we can use `codegen -s models/*.json`, which takes 20 seconds
Todo or so on my i5 chromebook and probably significantly faster on a real machine.
---- No attempt has been made to optimize. Also, there are several bugs:
* I do not think all the optional types have been sorted. * I do not think all the optional types have been sorted.
* I think there is necessary metadata missing from EC2Query style services * I think there is necessary metadata missing from EC2Query style services
* The output will compile and is close to what `zig fmt` likes to see, but it
has not yet been functionally tested
* It handles all the types in existing AWS services, but it does not handle * It handles all the types in existing AWS services, but it does not handle
all known Smithy types (e.g. blob and document are missing) all known Smithy types (e.g. blob and document are missing)
* It would be awesome to bring over the documentation from the model into * It would be awesome to bring over the documentation from the model into
@ -28,7 +29,13 @@ Todo
realistically I'm not sure if that will matter long term, and it's a fair realistically I'm not sure if that will matter long term, and it's a fair
amount of work as everything now can be done in a single pass without post amount of work as everything now can be done in a single pass without post
processing. processing.
* This doesn't seem to build on 0.7.1 - you need master branch. I hope that
0.8.0 will be out soon. If not, a few syntax changes need to be accommodated.
Some of these will likely be addressed as I integrate the code generated files into
the SDK engine.
The models are Smithy json files, sourced from the AWS v2 go sdk The models are Smithy json files, sourced from the AWS v2 go sdk
for lack of a better place. Details are in build.zig of the parent project for lack of a better place. I've just downloaded the main branch and copied
that is now responsible for downloading/caching the project. the files from the tree in place.

View File

@ -1,211 +0,0 @@
const builtin = @import("builtin");
const std = @import("std");
const Hash = std.crypto.hash.sha2.Sha256;
pub const HashedFile = struct {
fs_path: []const u8,
normalized_path: []const u8,
hash: [Hash.digest_length]u8,
failure: Error!void,
const Error = std.fs.File.OpenError || std.fs.File.ReadError || std.fs.File.StatError;
fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
_ = context;
return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
}
};
const multihash_len = 1 + 1 + Hash.digest_length;
pub const hex_multihash_len = 2 * multihash_len;
pub const digest_len = Hash.digest_length;
const MultiHashHexDigest = [hex_multihash_len]u8;
const MultihashFunction = enum(u16) {
identity = 0x00,
sha1 = 0x11,
@"sha2-256" = 0x12,
@"sha2-512" = 0x13,
@"sha3-512" = 0x14,
@"sha3-384" = 0x15,
@"sha3-256" = 0x16,
@"sha3-224" = 0x17,
@"sha2-384" = 0x20,
@"sha2-256-trunc254-padded" = 0x1012,
@"sha2-224" = 0x1013,
@"sha2-512-224" = 0x1014,
@"sha2-512-256" = 0x1015,
@"blake2b-256" = 0xb220,
_,
};
const multihash_function: MultihashFunction = switch (Hash) {
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
else => @compileError("unreachable"),
};
comptime {
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
// values are small enough to be contained in the one-byte encoding.
std.debug.assert(@intFromEnum(multihash_function) < 127);
std.debug.assert(Hash.digest_length < 127);
}
const hex_charset = "0123456789abcdef";
pub fn hexDigest(digest: [Hash.digest_length]u8) [multihash_len * 2]u8 {
var result: [multihash_len * 2]u8 = undefined;
result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
result[2] = hex_charset[Hash.digest_length >> 4];
result[3] = hex_charset[Hash.digest_length & 15];
for (digest, 0..) |byte, i| {
result[4 + i * 2] = hex_charset[byte >> 4];
result[5 + i * 2] = hex_charset[byte & 15];
}
return result;
}
pub fn hex64(x: u64) [16]u8 {
var result: [16]u8 = undefined;
var i: usize = 0;
while (i < 8) : (i += 1) {
const byte = @as(u8, @truncate(x >> @as(u6, @intCast(8 * i))));
result[i * 2 + 0] = hex_charset[byte >> 4];
result[i * 2 + 1] = hex_charset[byte & 15];
}
return result;
}
pub const walkerFn = *const fn (std.fs.IterableDir.Walker.WalkerEntry) bool;
fn included(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
_ = entry;
return true;
}
fn excluded(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
_ = entry;
return false;
}
pub const ComputeDirectoryOptions = struct {
isIncluded: walkerFn = included,
isExcluded: walkerFn = excluded,
fileHashes: []*HashedFile = undefined,
needFileHashes: bool = false,
};
pub fn computeDirectoryHash(
thread_pool: *std.Thread.Pool,
dir: std.fs.IterableDir,
options: *ComputeDirectoryOptions,
) ![Hash.digest_length]u8 {
const gpa = thread_pool.allocator;
// We'll use an arena allocator for the path name strings since they all
// need to be in memory for sorting.
var arena_instance = std.heap.ArenaAllocator.init(gpa);
defer arena_instance.deinit();
const arena = arena_instance.allocator();
// Collect all files, recursively, then sort.
var all_files = std.ArrayList(*HashedFile).init(gpa);
defer all_files.deinit();
var walker = try dir.walk(gpa);
defer walker.deinit();
{
// The final hash will be a hash of each file hashed independently. This
// allows hashing in parallel.
var wait_group: std.Thread.WaitGroup = .{};
defer wait_group.wait();
while (try walker.next()) |entry| {
switch (entry.kind) {
.directory => continue,
.file => {},
else => return error.IllegalFileTypeInPackage,
}
if (options.isExcluded(entry) or !options.isIncluded(entry))
continue;
const alloc = if (options.needFileHashes) gpa else arena;
const hashed_file = try alloc.create(HashedFile);
const fs_path = try alloc.dupe(u8, entry.path);
hashed_file.* = .{
.fs_path = fs_path,
.normalized_path = try normalizePath(alloc, fs_path),
.hash = undefined, // to be populated by the worker
.failure = undefined, // to be populated by the worker
};
wait_group.start();
try thread_pool.spawn(workerHashFile, .{ dir.dir, hashed_file, &wait_group });
try all_files.append(hashed_file);
}
}
std.mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan);
var hasher = Hash.init(.{});
var any_failures = false;
for (all_files.items) |hashed_file| {
hashed_file.failure catch |err| {
any_failures = true;
std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
};
hasher.update(&hashed_file.hash);
}
if (any_failures) return error.DirectoryHashUnavailable;
if (options.needFileHashes) options.fileHashes = try all_files.toOwnedSlice();
return hasher.finalResult();
}
fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {
defer wg.finish();
hashed_file.failure = hashFileFallible(dir, hashed_file);
}
fn hashFileFallible(dir: std.fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
var buf: [8000]u8 = undefined;
var file = try dir.openFile(hashed_file.fs_path, .{});
defer file.close();
var hasher = Hash.init(.{});
hasher.update(hashed_file.normalized_path);
hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
while (true) {
const bytes_read = try file.read(&buf);
if (bytes_read == 0) break;
hasher.update(buf[0..bytes_read]);
}
hasher.final(&hashed_file.hash);
}
/// Make a file system path identical independently of operating system path inconsistencies.
/// This converts backslashes into forward slashes.
fn normalizePath(arena: std.mem.Allocator, fs_path: []const u8) ![]const u8 {
const canonical_sep = '/';
if (std.fs.path.sep == canonical_sep)
return fs_path;
const normalized = try arena.dupe(u8, fs_path);
for (normalized) |*byte| {
switch (byte.*) {
std.fs.path.sep => byte.* = canonical_sep,
else => continue,
}
}
return normalized;
}
fn isExecutable(file: std.fs.File) !bool {
if (builtin.os.tag == .windows) {
// TODO check the ACL on Windows.
// Until this is implemented, this could be a false negative on
// Windows, which is why we do not yet set executable_bit_only above
// when unpacking the tarball.
return false;
} else {
const stat = try file.stat();
return (stat.mode & std.os.S.IXUSR) != 0;
}
}

View File

@ -1,7 +1,6 @@
const std = @import("std"); const std = @import("std");
const smithy = @import("smithy"); const smithy = @import("smithy");
const snake = @import("snake.zig"); const snake = @import("snake.zig");
const Hasher = @import("Hasher.zig");
const json_zig = @embedFile("json.zig"); const json_zig = @embedFile("json.zig");
var verbose = false; var verbose = false;
@ -34,13 +33,12 @@ pub fn main() anyerror!void {
models_dir = try std.fs.cwd().openIterableDir(args[i + 1], .{}); models_dir = try std.fs.cwd().openIterableDir(args[i + 1], .{});
} }
// TODO: Seems like we should remove this in favor of a package // TODO: Seems like we should remove this in favor of a package
try output_dir.writeFile("json.zig", json_zig); const json_file = try output_dir.createFile("json.zig", .{});
defer json_file.close();
// TODO: We need a different way to handle this file... try json_file.writer().writeAll(json_zig);
var manifest_file_started = false; const manifest_file = try output_dir.createFile("service_manifest.zig", .{});
var manifest_file: std.fs.File = undefined; defer manifest_file.close();
defer if (manifest_file_started) manifest_file.close(); const manifest = manifest_file.writer();
var manifest: std.fs.File.Writer = undefined;
var files_processed: usize = 0; var files_processed: usize = 0;
var skip_next = true; var skip_next = true;
for (args) |arg| { for (args) |arg| {
@ -59,115 +57,30 @@ pub fn main() anyerror!void {
skip_next = true; skip_next = true;
continue; continue;
} }
if (!manifest_file_started) {
manifest_file = try output_dir.createFile("service_manifest.zig", .{});
manifest = manifest_file.writer();
}
try processFile(arg, stdout, output_dir, manifest); try processFile(arg, stdout, output_dir, manifest);
files_processed += 1; files_processed += 1;
} }
if (files_processed == 0) { if (files_processed == 0) {
// no files specified, look for json files in models directory or cwd // no files specified, look for json files in models directory or cwd
// this is our normal mode of operation and where initial optimizations
// can be made
if (models_dir) |m| { if (models_dir) |m| {
var cwd = try std.fs.cwd().openDir(".", .{}); var cwd = try std.fs.cwd().openDir(".", .{});
defer cwd.close(); defer cwd.close();
defer cwd.setAsCwd() catch unreachable; defer cwd.setAsCwd() catch unreachable;
try m.dir.setAsCwd(); try m.dir.setAsCwd();
try processDirectories(m, output_dir, stdout); var mi = m.iterate();
while (try mi.next()) |e| {
if ((e.kind == .file or e.kind == .sym_link) and
std.mem.endsWith(u8, e.name, ".json"))
try processFile(e.name, stdout, output_dir, manifest);
}
} }
} }
if (args.len == 0) if (args.len == 0)
_ = try generateServices(allocator, ";", std.io.getStdIn(), stdout); _ = try generateServices(allocator, ";", std.io.getStdIn(), stdout);
} }
const OutputManifest = struct {
model_dir_hash_digest: [Hasher.hex_multihash_len]u8,
output_dir_hash_digest: [Hasher.hex_multihash_len]u8,
};
fn processDirectories(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, stdout: anytype) !void {
// Let's get ready to hash!!
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var thread_pool: std.Thread.Pool = undefined;
try thread_pool.init(.{ .allocator = allocator });
defer thread_pool.deinit();
var calculated_manifest = try calculateDigests(models_dir, output_dir, &thread_pool);
const output_stored_manifest = output_dir.readFileAlloc(allocator, "output_manifest.json", std.math.maxInt(usize)) catch null;
if (output_stored_manifest) |o| {
// we have a stored manifest. Parse it and compare to our calculations
// we can leak as we're using an arena allocator
const stored_manifest = try std.json.parseFromSliceLeaky(OutputManifest, allocator, o, .{});
if (std.mem.eql(u8, &stored_manifest.model_dir_hash_digest, &calculated_manifest.model_dir_hash_digest) and
std.mem.eql(u8, &stored_manifest.output_dir_hash_digest, &calculated_manifest.output_dir_hash_digest))
{
// hashes all match, we can end now
if (verbose)
std.log.info("calculated hashes match output_manifest.json. Nothing to do", .{});
return;
}
}
// Do this in a brain dead fashion from here, no optimization
const manifest_file = try output_dir.createFile("service_manifest.zig", .{});
defer manifest_file.close();
const manifest = manifest_file.writer();
var mi = models_dir.iterate();
while (try mi.next()) |e| {
if ((e.kind == .file or e.kind == .sym_link) and
std.mem.endsWith(u8, e.name, ".json"))
try processFile(e.name, stdout, output_dir, manifest);
}
// re-calculate so we can store the manifest
model_digest = calculated_manifest.model_dir_hash_digest;
calculated_manifest = try calculateDigests(models_dir, output_dir, &thread_pool);
try output_dir.writeFile("output_manifest.json", try std.json.stringifyAlloc(
allocator,
calculated_manifest,
.{ .whitespace = .indent_2 },
));
}
var model_digest: ?[Hasher.hex_multihash_len]u8 = null;
fn calculateDigests(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, thread_pool: *std.Thread.Pool) !OutputManifest {
const model_hash = if (model_digest) |m| m[0..Hasher.digest_len].* else try Hasher.computeDirectoryHash(thread_pool, models_dir, @constCast(&Hasher.ComputeDirectoryOptions{
.isIncluded = struct {
pub fn include(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
return std.mem.endsWith(u8, entry.basename, ".json");
}
}.include,
.isExcluded = struct {
pub fn exclude(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
_ = entry;
return false;
}
}.exclude,
.needFileHashes = false,
}));
if (verbose) std.log.info("Model directory hash: {s}", .{model_digest orelse Hasher.hexDigest(model_hash)});
const output_hash = try Hasher.computeDirectoryHash(thread_pool, try output_dir.openIterableDir(".", .{}), @constCast(&Hasher.ComputeDirectoryOptions{
.isIncluded = struct {
pub fn include(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
return std.mem.endsWith(u8, entry.basename, ".zig");
}
}.include,
.isExcluded = struct {
pub fn exclude(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
_ = entry;
return false;
}
}.exclude,
.needFileHashes = false,
}));
if (verbose) std.log.info("Output directory hash: {s}", .{Hasher.hexDigest(output_hash)});
return .{
.model_dir_hash_digest = model_digest orelse Hasher.hexDigest(model_hash),
.output_dir_hash_digest = Hasher.hexDigest(output_hash),
};
}
fn processFile(file_name: []const u8, stdout: anytype, output_dir: std.fs.Dir, manifest: anytype) !void { fn processFile(file_name: []const u8, stdout: anytype, output_dir: std.fs.Dir, manifest: anytype) !void {
// It's probably best to create our own allocator here so we can deint at the end and // It's probably best to create our own allocator here so we can deint at the end and
// toss all allocations related to the services in this file // toss all allocations related to the services in this file