adjustments in prep for hash optimization

This commit is contained in:
Emil Lerch 2023-08-25 15:34:52 -07:00
parent 26daedbd72
commit a58a2cba41
Signed by: lobo
GPG Key ID: A7B62D657EF764F8
3 changed files with 41 additions and 11 deletions

View File

@ -202,7 +202,7 @@ pub fn fetchAndUnpack(
// Of course, if the ignore rules above omit the file from the package, then everything
// is fine and no error should be raised.
break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle });
break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle }, &.{});
};
const pkg_dir_sub_path = "p" ++ s ++ Hasher.hexDigest(actual_hash);

View File

@ -1,7 +1,8 @@
const builtin = @import("builtin");
const std = @import("std");
const Hash = std.crypto.hash.sha2.Sha256;
const HashedFile = struct {
pub const HashedFile = struct {
fs_path: []const u8,
normalized_path: []const u8,
hash: [Hash.digest_length]u8,
@ -74,9 +75,28 @@ pub fn hex64(x: u64) [16]u8 {
}
return result;
}
pub const walkerFn = *const fn (std.fs.IterableDir.Walker.WalkerEntry) bool;
fn included(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
_ = entry;
return true;
}
fn excluded(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
_ = entry;
return false;
}
pub const ComputeDirectoryOptions = struct {
isIncluded: walkerFn = included,
isExcluded: walkerFn = excluded,
fileHashes: []*HashedFile = undefined,
needFileHashes: bool = false,
};
pub fn computeDirectoryHash(
thread_pool: *std.Thread.Pool,
dir: std.fs.IterableDir,
options: *ComputeDirectoryOptions,
) ![Hash.digest_length]u8 {
const gpa = thread_pool.allocator;
@ -105,11 +125,14 @@ pub fn computeDirectoryHash(
.file => {},
else => return error.IllegalFileTypeInPackage,
}
const hashed_file = try arena.create(HashedFile);
const fs_path = try arena.dupe(u8, entry.path);
if (options.isExcluded(entry) or !options.isIncluded(entry))
continue;
const alloc = if (options.needFileHashes) gpa else arena;
const hashed_file = try alloc.create(HashedFile);
const fs_path = try alloc.dupe(u8, entry.path);
hashed_file.* = .{
.fs_path = fs_path,
.normalized_path = try normalizePath(arena, fs_path),
.normalized_path = try normalizePath(alloc, fs_path),
.hash = undefined, // to be populated by the worker
.failure = undefined, // to be populated by the worker
};
@ -132,6 +155,7 @@ pub fn computeDirectoryHash(
hasher.update(&hashed_file.hash);
}
if (any_failures) return error.DirectoryHashUnavailable;
if (options.needFileHashes) options.fileHashes = try all_files.toOwnedSlice();
return hasher.finalResult();
}
fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {

View File

@ -62,24 +62,30 @@ pub fn main() anyerror!void {
}
if (files_processed == 0) {
// no files specified, look for json files in models directory or cwd
// this is our normal mode of operation and where initial optimizations
// can be made
if (models_dir) |m| {
var cwd = try std.fs.cwd().openDir(".", .{});
defer cwd.close();
defer cwd.setAsCwd() catch unreachable;
try m.dir.setAsCwd();
var mi = m.iterate();
while (try mi.next()) |e| {
if ((e.kind == .file or e.kind == .sym_link) and
std.mem.endsWith(u8, e.name, ".json"))
try processFile(e.name, stdout, output_dir, manifest);
}
try processDirectories(m, output_dir, stdout, manifest);
}
}
if (args.len == 0)
_ = try generateServices(allocator, ";", std.io.getStdIn(), stdout);
}
fn processDirectories(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, stdout: anytype, manifest: anytype) !void {
// Do this in a brain dead fashion, no optimization
var mi = models_dir.iterate();
while (try mi.next()) |e| {
if ((e.kind == .file or e.kind == .sym_link) and
std.mem.endsWith(u8, e.name, ".json"))
try processFile(e.name, stdout, output_dir, manifest);
}
}
fn processFile(file_name: []const u8, stdout: anytype, output_dir: std.fs.Dir, manifest: anytype) !void {
// It's probably best to create our own allocator here so we can deint at the end and