Compare commits
5 Commits
db4037111f
...
fb36dc83d5
Author | SHA1 | Date | |
---|---|---|---|
fb36dc83d5 | |||
a58a2cba41 | |||
26daedbd72 | |||
6a109f2a5a | |||
2f36f82363 |
195
Package.zig
195
Package.zig
|
@ -1,56 +1,11 @@
|
||||||
const builtin = @import("builtin");
|
const builtin = @import("builtin");
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
|
const Hasher = @import("codegen/src/Hasher.zig");
|
||||||
|
|
||||||
/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
|
/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
|
||||||
const bin_digest_len = 16;
|
const bin_digest_len = 16;
|
||||||
const hex_digest_len = bin_digest_len * 2;
|
const hex_digest_len = bin_digest_len * 2;
|
||||||
const hex_multihash_len = 2 * multihash_len;
|
|
||||||
const MultiHashHexDigest = [hex_multihash_len]u8;
|
|
||||||
const hex_charset = "0123456789abcdef";
|
|
||||||
const Hash = std.crypto.hash.sha2.Sha256;
|
|
||||||
const multihash_len = 1 + 1 + Hash.digest_length;
|
|
||||||
const MultihashFunction = enum(u16) {
|
|
||||||
identity = 0x00,
|
|
||||||
sha1 = 0x11,
|
|
||||||
@"sha2-256" = 0x12,
|
|
||||||
@"sha2-512" = 0x13,
|
|
||||||
@"sha3-512" = 0x14,
|
|
||||||
@"sha3-384" = 0x15,
|
|
||||||
@"sha3-256" = 0x16,
|
|
||||||
@"sha3-224" = 0x17,
|
|
||||||
@"sha2-384" = 0x20,
|
|
||||||
@"sha2-256-trunc254-padded" = 0x1012,
|
|
||||||
@"sha2-224" = 0x1013,
|
|
||||||
@"sha2-512-224" = 0x1014,
|
|
||||||
@"sha2-512-256" = 0x1015,
|
|
||||||
@"blake2b-256" = 0xb220,
|
|
||||||
_,
|
|
||||||
};
|
|
||||||
const HashedFile = struct {
|
|
||||||
fs_path: []const u8,
|
|
||||||
normalized_path: []const u8,
|
|
||||||
hash: [Hash.digest_length]u8,
|
|
||||||
failure: Error!void,
|
|
||||||
|
|
||||||
const Error = std.fs.File.OpenError || std.fs.File.ReadError || std.fs.File.StatError;
|
|
||||||
|
|
||||||
fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
|
|
||||||
_ = context;
|
|
||||||
return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const multihash_function: MultihashFunction = switch (Hash) {
|
|
||||||
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
|
|
||||||
else => @compileError("unreachable"),
|
|
||||||
};
|
|
||||||
comptime {
|
|
||||||
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
|
|
||||||
// values are small enough to be contained in the one-byte encoding.
|
|
||||||
std.debug.assert(@intFromEnum(multihash_function) < 127);
|
|
||||||
std.debug.assert(Hash.digest_length < 127);
|
|
||||||
}
|
|
||||||
|
|
||||||
const Package = @This();
|
const Package = @This();
|
||||||
|
|
||||||
|
@ -112,7 +67,7 @@ pub fn fetchAndUnpack(
|
||||||
// Check if the expected_hash is already present in the global package
|
// Check if the expected_hash is already present in the global package
|
||||||
// cache, and thereby avoid both fetching and unpacking.
|
// cache, and thereby avoid both fetching and unpacking.
|
||||||
if (dep.hash) |h| cached: {
|
if (dep.hash) |h| cached: {
|
||||||
const hex_digest = h[0..hex_multihash_len];
|
const hex_digest = h[0..Hasher.hex_multihash_len];
|
||||||
const pkg_dir_sub_path = "p" ++ s ++ hex_digest;
|
const pkg_dir_sub_path = "p" ++ s ++ hex_digest;
|
||||||
|
|
||||||
const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path});
|
const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path});
|
||||||
|
@ -131,7 +86,7 @@ pub fn fetchAndUnpack(
|
||||||
|
|
||||||
ptr.* = .{
|
ptr.* = .{
|
||||||
.root_src_directory = .{
|
.root_src_directory = .{
|
||||||
.path = build_root,
|
.path = build_root, // TODO: This leaks memory somehow (should be cleaned in deinit()
|
||||||
.handle = pkg_dir,
|
.handle = pkg_dir,
|
||||||
},
|
},
|
||||||
.root_src_directory_owned = true,
|
.root_src_directory_owned = true,
|
||||||
|
@ -149,7 +104,7 @@ pub fn fetchAndUnpack(
|
||||||
const uri = try std.Uri.parse(dep.url);
|
const uri = try std.Uri.parse(dep.url);
|
||||||
|
|
||||||
const rand_int = std.crypto.random.int(u64);
|
const rand_int = std.crypto.random.int(u64);
|
||||||
const tmp_dir_sub_path = "tmp" ++ s ++ hex64(rand_int);
|
const tmp_dir_sub_path = "tmp" ++ s ++ Hasher.hex64(rand_int);
|
||||||
|
|
||||||
const actual_hash = a: {
|
const actual_hash = a: {
|
||||||
var tmp_directory: std.Build.Cache.Directory = d: {
|
var tmp_directory: std.Build.Cache.Directory = d: {
|
||||||
|
@ -247,13 +202,13 @@ pub fn fetchAndUnpack(
|
||||||
// Of course, if the ignore rules above omit the file from the package, then everything
|
// Of course, if the ignore rules above omit the file from the package, then everything
|
||||||
// is fine and no error should be raised.
|
// is fine and no error should be raised.
|
||||||
|
|
||||||
break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle });
|
break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle }, &.{});
|
||||||
};
|
};
|
||||||
|
|
||||||
const pkg_dir_sub_path = "p" ++ s ++ hexDigest(actual_hash);
|
const pkg_dir_sub_path = "p" ++ s ++ Hasher.hexDigest(actual_hash);
|
||||||
try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path);
|
try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path);
|
||||||
|
|
||||||
const actual_hex = hexDigest(actual_hash);
|
const actual_hex = Hasher.hexDigest(actual_hash);
|
||||||
if (dep.hash) |h| {
|
if (dep.hash) |h| {
|
||||||
if (!std.mem.eql(u8, h, &actual_hex)) {
|
if (!std.mem.eql(u8, h, &actual_hex)) {
|
||||||
std.log.err("hash mismatch: expected: {s}, found: {s}", .{
|
std.log.err("hash mismatch: expected: {s}, found: {s}", .{
|
||||||
|
@ -272,16 +227,6 @@ pub fn fetchAndUnpack(
|
||||||
const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path);
|
const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path);
|
||||||
return mod;
|
return mod;
|
||||||
}
|
}
|
||||||
fn hex64(x: u64) [16]u8 {
|
|
||||||
var result: [16]u8 = undefined;
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < 8) : (i += 1) {
|
|
||||||
const byte = @as(u8, @truncate(x >> @as(u6, @intCast(8 * i))));
|
|
||||||
result[i * 2 + 0] = hex_charset[byte >> 4];
|
|
||||||
result[i * 2 + 1] = hex_charset[byte & 15];
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
fn ProgressReader(comptime ReaderType: type) type {
|
fn ProgressReader(comptime ReaderType: type) type {
|
||||||
return struct {
|
return struct {
|
||||||
child_reader: ReaderType,
|
child_reader: ReaderType,
|
||||||
|
@ -340,81 +285,6 @@ fn isTarAttachment(content_disposition: []const u8) bool {
|
||||||
}
|
}
|
||||||
return std.ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz");
|
return std.ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz");
|
||||||
}
|
}
|
||||||
fn computePackageHash(
|
|
||||||
thread_pool: *std.Thread.Pool,
|
|
||||||
pkg_dir: std.fs.IterableDir,
|
|
||||||
) ![Hash.digest_length]u8 {
|
|
||||||
const gpa = thread_pool.allocator;
|
|
||||||
|
|
||||||
// We'll use an arena allocator for the path name strings since they all
|
|
||||||
// need to be in memory for sorting.
|
|
||||||
var arena_instance = std.heap.ArenaAllocator.init(gpa);
|
|
||||||
defer arena_instance.deinit();
|
|
||||||
const arena = arena_instance.allocator();
|
|
||||||
|
|
||||||
// Collect all files, recursively, then sort.
|
|
||||||
var all_files = std.ArrayList(*HashedFile).init(gpa);
|
|
||||||
defer all_files.deinit();
|
|
||||||
|
|
||||||
var walker = try pkg_dir.walk(gpa);
|
|
||||||
defer walker.deinit();
|
|
||||||
|
|
||||||
{
|
|
||||||
// The final hash will be a hash of each file hashed independently. This
|
|
||||||
// allows hashing in parallel.
|
|
||||||
var wait_group: std.Thread.WaitGroup = .{};
|
|
||||||
defer wait_group.wait();
|
|
||||||
|
|
||||||
while (try walker.next()) |entry| {
|
|
||||||
switch (entry.kind) {
|
|
||||||
.directory => continue,
|
|
||||||
.file => {},
|
|
||||||
else => return error.IllegalFileTypeInPackage,
|
|
||||||
}
|
|
||||||
const hashed_file = try arena.create(HashedFile);
|
|
||||||
const fs_path = try arena.dupe(u8, entry.path);
|
|
||||||
hashed_file.* = .{
|
|
||||||
.fs_path = fs_path,
|
|
||||||
.normalized_path = try normalizePath(arena, fs_path),
|
|
||||||
.hash = undefined, // to be populated by the worker
|
|
||||||
.failure = undefined, // to be populated by the worker
|
|
||||||
};
|
|
||||||
wait_group.start();
|
|
||||||
try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group });
|
|
||||||
|
|
||||||
try all_files.append(hashed_file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std.mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan);
|
|
||||||
|
|
||||||
var hasher = Hash.init(.{});
|
|
||||||
var any_failures = false;
|
|
||||||
for (all_files.items) |hashed_file| {
|
|
||||||
hashed_file.failure catch |err| {
|
|
||||||
any_failures = true;
|
|
||||||
std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
|
|
||||||
};
|
|
||||||
hasher.update(&hashed_file.hash);
|
|
||||||
}
|
|
||||||
if (any_failures) return error.PackageHashUnavailable;
|
|
||||||
return hasher.finalResult();
|
|
||||||
}
|
|
||||||
fn hexDigest(digest: [Hash.digest_length]u8) [multihash_len * 2]u8 {
|
|
||||||
var result: [multihash_len * 2]u8 = undefined;
|
|
||||||
|
|
||||||
result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
|
|
||||||
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
|
|
||||||
|
|
||||||
result[2] = hex_charset[Hash.digest_length >> 4];
|
|
||||||
result[3] = hex_charset[Hash.digest_length & 15];
|
|
||||||
|
|
||||||
for (digest, 0..) |byte, i| {
|
|
||||||
result[4 + i * 2] = hex_charset[byte >> 4];
|
|
||||||
result[5 + i * 2] = hex_charset[byte & 15];
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
fn renameTmpIntoCache(
|
fn renameTmpIntoCache(
|
||||||
cache_dir: std.fs.Dir,
|
cache_dir: std.fs.Dir,
|
||||||
tmp_dir_sub_path: []const u8,
|
tmp_dir_sub_path: []const u8,
|
||||||
|
@ -475,57 +345,6 @@ fn createWithDir(
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
/// Make a file system path identical independently of operating system path inconsistencies.
|
|
||||||
/// This converts backslashes into forward slashes.
|
|
||||||
fn normalizePath(arena: std.mem.Allocator, fs_path: []const u8) ![]const u8 {
|
|
||||||
const canonical_sep = '/';
|
|
||||||
|
|
||||||
if (std.fs.path.sep == canonical_sep)
|
|
||||||
return fs_path;
|
|
||||||
|
|
||||||
const normalized = try arena.dupe(u8, fs_path);
|
|
||||||
for (normalized) |*byte| {
|
|
||||||
switch (byte.*) {
|
|
||||||
std.fs.path.sep => byte.* = canonical_sep,
|
|
||||||
else => continue,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return normalized;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {
|
|
||||||
defer wg.finish();
|
|
||||||
hashed_file.failure = hashFileFallible(dir, hashed_file);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn hashFileFallible(dir: std.fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
|
|
||||||
var buf: [8000]u8 = undefined;
|
|
||||||
var file = try dir.openFile(hashed_file.fs_path, .{});
|
|
||||||
defer file.close();
|
|
||||||
var hasher = Hash.init(.{});
|
|
||||||
hasher.update(hashed_file.normalized_path);
|
|
||||||
hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
|
|
||||||
while (true) {
|
|
||||||
const bytes_read = try file.read(&buf);
|
|
||||||
if (bytes_read == 0) break;
|
|
||||||
hasher.update(buf[0..bytes_read]);
|
|
||||||
}
|
|
||||||
hasher.final(&hashed_file.hash);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn isExecutable(file: std.fs.File) !bool {
|
|
||||||
if (builtin.os.tag == .windows) {
|
|
||||||
// TODO check the ACL on Windows.
|
|
||||||
// Until this is implemented, this could be a false negative on
|
|
||||||
// Windows, which is why we do not yet set executable_bit_only above
|
|
||||||
// when unpacking the tarball.
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
const stat = try file.stat();
|
|
||||||
return (stat.mode & std.os.S.IXUSR) != 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create/Write a file, close it, then grab its stat.mtime timestamp.
|
// Create/Write a file, close it, then grab its stat.mtime timestamp.
|
||||||
fn testGetCurrentFileTimestamp(dir: std.fs.Dir) !i128 {
|
fn testGetCurrentFileTimestamp(dir: std.fs.Dir) !i128 {
|
||||||
const test_out_file = "test-filetimestamp.tmp";
|
const test_out_file = "test-filetimestamp.tmp";
|
||||||
|
|
|
@ -7,19 +7,18 @@ as they do in other languages. We can combine all models from AWS into a single
|
||||||
comptime constant even, however, we're keeping zig files 1:1 with json files
|
comptime constant even, however, we're keeping zig files 1:1 with json files
|
||||||
for now.
|
for now.
|
||||||
|
|
||||||
The main executable, run with a "-s" first argument, will simply parse the
|
Optimization plan will be done by the placing of a json file in the output
|
||||||
Smithy json files passed by the rest of the arguments and save each one as
|
directory. The json file will contain a mapping between input files and generated
|
||||||
its own file.json.zig. We will rely on shell commands to do the rest of the
|
outputs, as well as a top level directory hash. We can skip the output generation
|
||||||
renaming (and moving if necessary).
|
entirely if the top level hash matches, otherwise, individual hashes will be
|
||||||
|
compared and output files will only regenerate if the input or output has changed.
|
||||||
|
|
||||||
To run this, we can use `codegen -s models/*.json`, which takes 20 seconds
|
|
||||||
or so on my i5 chromebook and probably significantly faster on a real machine.
|
Todo
|
||||||
No attempt has been made to optimize. Also, there are several bugs:
|
----
|
||||||
|
|
||||||
* I do not think all the optional types have been sorted.
|
* I do not think all the optional types have been sorted.
|
||||||
* I think there is necessary metadata missing from EC2Query style services
|
* I think there is necessary metadata missing from EC2Query style services
|
||||||
* The output will compile and is close to what `zig fmt` likes to see, but it
|
|
||||||
has not yet been functionally tested
|
|
||||||
* It handles all the types in existing AWS services, but it does not handle
|
* It handles all the types in existing AWS services, but it does not handle
|
||||||
all known Smithy types (e.g. blob and document are missing)
|
all known Smithy types (e.g. blob and document are missing)
|
||||||
* It would be awesome to bring over the documentation from the model into
|
* It would be awesome to bring over the documentation from the model into
|
||||||
|
@ -29,13 +28,7 @@ No attempt has been made to optimize. Also, there are several bugs:
|
||||||
realistically I'm not sure if that will matter long term, and it's a fair
|
realistically I'm not sure if that will matter long term, and it's a fair
|
||||||
amount of work as everything now can be done in a single pass without post
|
amount of work as everything now can be done in a single pass without post
|
||||||
processing.
|
processing.
|
||||||
* This doesn't seem to build on 0.7.1 - you need master branch. I hope that
|
|
||||||
0.8.0 will be out soon. If not, a few syntax changes need to be accommodated.
|
|
||||||
|
|
||||||
Some of these will likely be addressed as I integrate the code generated files into
|
|
||||||
the SDK engine.
|
|
||||||
|
|
||||||
The models are Smithy json files, sourced from the AWS v2 go sdk
|
The models are Smithy json files, sourced from the AWS v2 go sdk
|
||||||
for lack of a better place. I've just downloaded the main branch and copied
|
for lack of a better place. Details are in build.zig of the parent project
|
||||||
the files from the tree in place.
|
that is now responsible for downloading/caching the project.
|
||||||
|
|
211
codegen/src/Hasher.zig
Normal file
211
codegen/src/Hasher.zig
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
const builtin = @import("builtin");
|
||||||
|
const std = @import("std");
|
||||||
|
const Hash = std.crypto.hash.sha2.Sha256;
|
||||||
|
|
||||||
|
pub const HashedFile = struct {
|
||||||
|
fs_path: []const u8,
|
||||||
|
normalized_path: []const u8,
|
||||||
|
hash: [Hash.digest_length]u8,
|
||||||
|
failure: Error!void,
|
||||||
|
|
||||||
|
const Error = std.fs.File.OpenError || std.fs.File.ReadError || std.fs.File.StatError;
|
||||||
|
|
||||||
|
fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
|
||||||
|
_ = context;
|
||||||
|
return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const multihash_len = 1 + 1 + Hash.digest_length;
|
||||||
|
pub const hex_multihash_len = 2 * multihash_len;
|
||||||
|
pub const digest_len = Hash.digest_length;
|
||||||
|
|
||||||
|
const MultiHashHexDigest = [hex_multihash_len]u8;
|
||||||
|
const MultihashFunction = enum(u16) {
|
||||||
|
identity = 0x00,
|
||||||
|
sha1 = 0x11,
|
||||||
|
@"sha2-256" = 0x12,
|
||||||
|
@"sha2-512" = 0x13,
|
||||||
|
@"sha3-512" = 0x14,
|
||||||
|
@"sha3-384" = 0x15,
|
||||||
|
@"sha3-256" = 0x16,
|
||||||
|
@"sha3-224" = 0x17,
|
||||||
|
@"sha2-384" = 0x20,
|
||||||
|
@"sha2-256-trunc254-padded" = 0x1012,
|
||||||
|
@"sha2-224" = 0x1013,
|
||||||
|
@"sha2-512-224" = 0x1014,
|
||||||
|
@"sha2-512-256" = 0x1015,
|
||||||
|
@"blake2b-256" = 0xb220,
|
||||||
|
_,
|
||||||
|
};
|
||||||
|
|
||||||
|
const multihash_function: MultihashFunction = switch (Hash) {
|
||||||
|
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
|
||||||
|
else => @compileError("unreachable"),
|
||||||
|
};
|
||||||
|
comptime {
|
||||||
|
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
|
||||||
|
// values are small enough to be contained in the one-byte encoding.
|
||||||
|
std.debug.assert(@intFromEnum(multihash_function) < 127);
|
||||||
|
std.debug.assert(Hash.digest_length < 127);
|
||||||
|
}
|
||||||
|
const hex_charset = "0123456789abcdef";
|
||||||
|
|
||||||
|
pub fn hexDigest(digest: [Hash.digest_length]u8) [multihash_len * 2]u8 {
|
||||||
|
var result: [multihash_len * 2]u8 = undefined;
|
||||||
|
|
||||||
|
result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
|
||||||
|
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
|
||||||
|
|
||||||
|
result[2] = hex_charset[Hash.digest_length >> 4];
|
||||||
|
result[3] = hex_charset[Hash.digest_length & 15];
|
||||||
|
|
||||||
|
for (digest, 0..) |byte, i| {
|
||||||
|
result[4 + i * 2] = hex_charset[byte >> 4];
|
||||||
|
result[5 + i * 2] = hex_charset[byte & 15];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
pub fn hex64(x: u64) [16]u8 {
|
||||||
|
var result: [16]u8 = undefined;
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < 8) : (i += 1) {
|
||||||
|
const byte = @as(u8, @truncate(x >> @as(u6, @intCast(8 * i))));
|
||||||
|
result[i * 2 + 0] = hex_charset[byte >> 4];
|
||||||
|
result[i * 2 + 1] = hex_charset[byte & 15];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const walkerFn = *const fn (std.fs.IterableDir.Walker.WalkerEntry) bool;
|
||||||
|
|
||||||
|
fn included(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
|
||||||
|
_ = entry;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
fn excluded(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
|
||||||
|
_ = entry;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pub const ComputeDirectoryOptions = struct {
|
||||||
|
isIncluded: walkerFn = included,
|
||||||
|
isExcluded: walkerFn = excluded,
|
||||||
|
fileHashes: []*HashedFile = undefined,
|
||||||
|
needFileHashes: bool = false,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn computeDirectoryHash(
|
||||||
|
thread_pool: *std.Thread.Pool,
|
||||||
|
dir: std.fs.IterableDir,
|
||||||
|
options: *ComputeDirectoryOptions,
|
||||||
|
) ![Hash.digest_length]u8 {
|
||||||
|
const gpa = thread_pool.allocator;
|
||||||
|
|
||||||
|
// We'll use an arena allocator for the path name strings since they all
|
||||||
|
// need to be in memory for sorting.
|
||||||
|
var arena_instance = std.heap.ArenaAllocator.init(gpa);
|
||||||
|
defer arena_instance.deinit();
|
||||||
|
const arena = arena_instance.allocator();
|
||||||
|
|
||||||
|
// Collect all files, recursively, then sort.
|
||||||
|
var all_files = std.ArrayList(*HashedFile).init(gpa);
|
||||||
|
defer all_files.deinit();
|
||||||
|
|
||||||
|
var walker = try dir.walk(gpa);
|
||||||
|
defer walker.deinit();
|
||||||
|
|
||||||
|
{
|
||||||
|
// The final hash will be a hash of each file hashed independently. This
|
||||||
|
// allows hashing in parallel.
|
||||||
|
var wait_group: std.Thread.WaitGroup = .{};
|
||||||
|
defer wait_group.wait();
|
||||||
|
|
||||||
|
while (try walker.next()) |entry| {
|
||||||
|
switch (entry.kind) {
|
||||||
|
.directory => continue,
|
||||||
|
.file => {},
|
||||||
|
else => return error.IllegalFileTypeInPackage,
|
||||||
|
}
|
||||||
|
if (options.isExcluded(entry) or !options.isIncluded(entry))
|
||||||
|
continue;
|
||||||
|
const alloc = if (options.needFileHashes) gpa else arena;
|
||||||
|
const hashed_file = try alloc.create(HashedFile);
|
||||||
|
const fs_path = try alloc.dupe(u8, entry.path);
|
||||||
|
hashed_file.* = .{
|
||||||
|
.fs_path = fs_path,
|
||||||
|
.normalized_path = try normalizePath(alloc, fs_path),
|
||||||
|
.hash = undefined, // to be populated by the worker
|
||||||
|
.failure = undefined, // to be populated by the worker
|
||||||
|
};
|
||||||
|
wait_group.start();
|
||||||
|
try thread_pool.spawn(workerHashFile, .{ dir.dir, hashed_file, &wait_group });
|
||||||
|
|
||||||
|
try all_files.append(hashed_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std.mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan);
|
||||||
|
|
||||||
|
var hasher = Hash.init(.{});
|
||||||
|
var any_failures = false;
|
||||||
|
for (all_files.items) |hashed_file| {
|
||||||
|
hashed_file.failure catch |err| {
|
||||||
|
any_failures = true;
|
||||||
|
std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
|
||||||
|
};
|
||||||
|
hasher.update(&hashed_file.hash);
|
||||||
|
}
|
||||||
|
if (any_failures) return error.DirectoryHashUnavailable;
|
||||||
|
if (options.needFileHashes) options.fileHashes = try all_files.toOwnedSlice();
|
||||||
|
return hasher.finalResult();
|
||||||
|
}
|
||||||
|
fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {
|
||||||
|
defer wg.finish();
|
||||||
|
hashed_file.failure = hashFileFallible(dir, hashed_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hashFileFallible(dir: std.fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
|
||||||
|
var buf: [8000]u8 = undefined;
|
||||||
|
var file = try dir.openFile(hashed_file.fs_path, .{});
|
||||||
|
defer file.close();
|
||||||
|
var hasher = Hash.init(.{});
|
||||||
|
hasher.update(hashed_file.normalized_path);
|
||||||
|
hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
|
||||||
|
while (true) {
|
||||||
|
const bytes_read = try file.read(&buf);
|
||||||
|
if (bytes_read == 0) break;
|
||||||
|
hasher.update(buf[0..bytes_read]);
|
||||||
|
}
|
||||||
|
hasher.final(&hashed_file.hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Make a file system path identical independently of operating system path inconsistencies.
|
||||||
|
/// This converts backslashes into forward slashes.
|
||||||
|
fn normalizePath(arena: std.mem.Allocator, fs_path: []const u8) ![]const u8 {
|
||||||
|
const canonical_sep = '/';
|
||||||
|
|
||||||
|
if (std.fs.path.sep == canonical_sep)
|
||||||
|
return fs_path;
|
||||||
|
|
||||||
|
const normalized = try arena.dupe(u8, fs_path);
|
||||||
|
for (normalized) |*byte| {
|
||||||
|
switch (byte.*) {
|
||||||
|
std.fs.path.sep => byte.* = canonical_sep,
|
||||||
|
else => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn isExecutable(file: std.fs.File) !bool {
|
||||||
|
if (builtin.os.tag == .windows) {
|
||||||
|
// TODO check the ACL on Windows.
|
||||||
|
// Until this is implemented, this could be a false negative on
|
||||||
|
// Windows, which is why we do not yet set executable_bit_only above
|
||||||
|
// when unpacking the tarball.
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
const stat = try file.stat();
|
||||||
|
return (stat.mode & std.os.S.IXUSR) != 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const smithy = @import("smithy");
|
const smithy = @import("smithy");
|
||||||
const snake = @import("snake.zig");
|
const snake = @import("snake.zig");
|
||||||
|
const Hasher = @import("Hasher.zig");
|
||||||
const json_zig = @embedFile("json.zig");
|
const json_zig = @embedFile("json.zig");
|
||||||
|
|
||||||
var verbose = false;
|
var verbose = false;
|
||||||
|
@ -33,12 +34,13 @@ pub fn main() anyerror!void {
|
||||||
models_dir = try std.fs.cwd().openIterableDir(args[i + 1], .{});
|
models_dir = try std.fs.cwd().openIterableDir(args[i + 1], .{});
|
||||||
}
|
}
|
||||||
// TODO: Seems like we should remove this in favor of a package
|
// TODO: Seems like we should remove this in favor of a package
|
||||||
const json_file = try output_dir.createFile("json.zig", .{});
|
try output_dir.writeFile("json.zig", json_zig);
|
||||||
defer json_file.close();
|
|
||||||
try json_file.writer().writeAll(json_zig);
|
// TODO: We need a different way to handle this file...
|
||||||
const manifest_file = try output_dir.createFile("service_manifest.zig", .{});
|
var manifest_file_started = false;
|
||||||
defer manifest_file.close();
|
var manifest_file: std.fs.File = undefined;
|
||||||
const manifest = manifest_file.writer();
|
defer if (manifest_file_started) manifest_file.close();
|
||||||
|
var manifest: std.fs.File.Writer = undefined;
|
||||||
var files_processed: usize = 0;
|
var files_processed: usize = 0;
|
||||||
var skip_next = true;
|
var skip_next = true;
|
||||||
for (args) |arg| {
|
for (args) |arg| {
|
||||||
|
@ -57,30 +59,115 @@ pub fn main() anyerror!void {
|
||||||
skip_next = true;
|
skip_next = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (!manifest_file_started) {
|
||||||
|
manifest_file = try output_dir.createFile("service_manifest.zig", .{});
|
||||||
|
manifest = manifest_file.writer();
|
||||||
|
}
|
||||||
try processFile(arg, stdout, output_dir, manifest);
|
try processFile(arg, stdout, output_dir, manifest);
|
||||||
files_processed += 1;
|
files_processed += 1;
|
||||||
}
|
}
|
||||||
if (files_processed == 0) {
|
if (files_processed == 0) {
|
||||||
// no files specified, look for json files in models directory or cwd
|
// no files specified, look for json files in models directory or cwd
|
||||||
|
// this is our normal mode of operation and where initial optimizations
|
||||||
|
// can be made
|
||||||
if (models_dir) |m| {
|
if (models_dir) |m| {
|
||||||
var cwd = try std.fs.cwd().openDir(".", .{});
|
var cwd = try std.fs.cwd().openDir(".", .{});
|
||||||
defer cwd.close();
|
defer cwd.close();
|
||||||
defer cwd.setAsCwd() catch unreachable;
|
defer cwd.setAsCwd() catch unreachable;
|
||||||
|
|
||||||
try m.dir.setAsCwd();
|
try m.dir.setAsCwd();
|
||||||
var mi = m.iterate();
|
try processDirectories(m, output_dir, stdout);
|
||||||
while (try mi.next()) |e| {
|
|
||||||
if ((e.kind == .file or e.kind == .sym_link) and
|
|
||||||
std.mem.endsWith(u8, e.name, ".json"))
|
|
||||||
try processFile(e.name, stdout, output_dir, manifest);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args.len == 0)
|
if (args.len == 0)
|
||||||
_ = try generateServices(allocator, ";", std.io.getStdIn(), stdout);
|
_ = try generateServices(allocator, ";", std.io.getStdIn(), stdout);
|
||||||
}
|
}
|
||||||
|
const OutputManifest = struct {
|
||||||
|
model_dir_hash_digest: [Hasher.hex_multihash_len]u8,
|
||||||
|
output_dir_hash_digest: [Hasher.hex_multihash_len]u8,
|
||||||
|
};
|
||||||
|
fn processDirectories(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, stdout: anytype) !void {
|
||||||
|
// Let's get ready to hash!!
|
||||||
|
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||||
|
defer arena.deinit();
|
||||||
|
const allocator = arena.allocator();
|
||||||
|
var thread_pool: std.Thread.Pool = undefined;
|
||||||
|
try thread_pool.init(.{ .allocator = allocator });
|
||||||
|
defer thread_pool.deinit();
|
||||||
|
var calculated_manifest = try calculateDigests(models_dir, output_dir, &thread_pool);
|
||||||
|
const output_stored_manifest = output_dir.readFileAlloc(allocator, "output_manifest.json", std.math.maxInt(usize)) catch null;
|
||||||
|
if (output_stored_manifest) |o| {
|
||||||
|
// we have a stored manifest. Parse it and compare to our calculations
|
||||||
|
// we can leak as we're using an arena allocator
|
||||||
|
const stored_manifest = try std.json.parseFromSliceLeaky(OutputManifest, allocator, o, .{});
|
||||||
|
if (std.mem.eql(u8, &stored_manifest.model_dir_hash_digest, &calculated_manifest.model_dir_hash_digest) and
|
||||||
|
std.mem.eql(u8, &stored_manifest.output_dir_hash_digest, &calculated_manifest.output_dir_hash_digest))
|
||||||
|
{
|
||||||
|
// hashes all match, we can end now
|
||||||
|
if (verbose)
|
||||||
|
std.log.info("calculated hashes match output_manifest.json. Nothing to do", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Do this in a brain dead fashion from here, no optimization
|
||||||
|
const manifest_file = try output_dir.createFile("service_manifest.zig", .{});
|
||||||
|
defer manifest_file.close();
|
||||||
|
const manifest = manifest_file.writer();
|
||||||
|
var mi = models_dir.iterate();
|
||||||
|
while (try mi.next()) |e| {
|
||||||
|
if ((e.kind == .file or e.kind == .sym_link) and
|
||||||
|
std.mem.endsWith(u8, e.name, ".json"))
|
||||||
|
try processFile(e.name, stdout, output_dir, manifest);
|
||||||
|
}
|
||||||
|
// re-calculate so we can store the manifest
|
||||||
|
model_digest = calculated_manifest.model_dir_hash_digest;
|
||||||
|
calculated_manifest = try calculateDigests(models_dir, output_dir, &thread_pool);
|
||||||
|
try output_dir.writeFile("output_manifest.json", try std.json.stringifyAlloc(
|
||||||
|
allocator,
|
||||||
|
calculated_manifest,
|
||||||
|
.{ .whitespace = .indent_2 },
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
var model_digest: ?[Hasher.hex_multihash_len]u8 = null;
|
||||||
|
fn calculateDigests(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, thread_pool: *std.Thread.Pool) !OutputManifest {
|
||||||
|
const model_hash = if (model_digest) |m| m[0..Hasher.digest_len].* else try Hasher.computeDirectoryHash(thread_pool, models_dir, @constCast(&Hasher.ComputeDirectoryOptions{
|
||||||
|
.isIncluded = struct {
|
||||||
|
pub fn include(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
|
||||||
|
return std.mem.endsWith(u8, entry.basename, ".json");
|
||||||
|
}
|
||||||
|
}.include,
|
||||||
|
.isExcluded = struct {
|
||||||
|
pub fn exclude(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
|
||||||
|
_ = entry;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}.exclude,
|
||||||
|
.needFileHashes = false,
|
||||||
|
}));
|
||||||
|
if (verbose) std.log.info("Model directory hash: {s}", .{model_digest orelse Hasher.hexDigest(model_hash)});
|
||||||
|
|
||||||
|
const output_hash = try Hasher.computeDirectoryHash(thread_pool, try output_dir.openIterableDir(".", .{}), @constCast(&Hasher.ComputeDirectoryOptions{
|
||||||
|
.isIncluded = struct {
|
||||||
|
pub fn include(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
|
||||||
|
return std.mem.endsWith(u8, entry.basename, ".zig");
|
||||||
|
}
|
||||||
|
}.include,
|
||||||
|
.isExcluded = struct {
|
||||||
|
pub fn exclude(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
|
||||||
|
_ = entry;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}.exclude,
|
||||||
|
.needFileHashes = false,
|
||||||
|
}));
|
||||||
|
if (verbose) std.log.info("Output directory hash: {s}", .{Hasher.hexDigest(output_hash)});
|
||||||
|
return .{
|
||||||
|
.model_dir_hash_digest = model_digest orelse Hasher.hexDigest(model_hash),
|
||||||
|
.output_dir_hash_digest = Hasher.hexDigest(output_hash),
|
||||||
|
};
|
||||||
|
}
|
||||||
fn processFile(file_name: []const u8, stdout: anytype, output_dir: std.fs.Dir, manifest: anytype) !void {
|
fn processFile(file_name: []const u8, stdout: anytype, output_dir: std.fs.Dir, manifest: anytype) !void {
|
||||||
// It's probably best to create our own allocator here so we can deint at the end and
|
// It's probably best to create our own allocator here so we can deint at the end and
|
||||||
// toss all allocations related to the services in this file
|
// toss all allocations related to the services in this file
|
||||||
|
|
Loading…
Reference in New Issue
Block a user