adjustments in prep for hash optimization

2023-08-25 15:34:52 -07:00 · 2023-08-25 15:34:52 -07:00 · a58a2cba41
commit a58a2cba41
parent 26daedbd72
3 changed files with 41 additions and 11 deletions
--- a/Package.zig
+++ b/Package.zig
@ -202,7 +202,7 @@ pub fn fetchAndUnpack(
        // Of course, if the ignore rules above omit the file from the package, then everything
        // is fine and no error should be raised.
-        break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle });
+        break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle }, &.{});
    };
    const pkg_dir_sub_path = "p" ++ s ++ Hasher.hexDigest(actual_hash);
--- a/codegen/src/Hasher.zig
+++ b/codegen/src/Hasher.zig
@ -1,7 +1,8 @@
 const builtin = @import("builtin");
 const std = @import("std");
 const Hash = std.crypto.hash.sha2.Sha256;
-const HashedFile = struct {
+
 pub const HashedFile = struct {
    fs_path: []const u8,
    normalized_path: []const u8,
    hash: [Hash.digest_length]u8,
@ -74,9 +75,28 @@ pub fn hex64(x: u64) [16]u8 {
    }
    return result;
 }
 pub const walkerFn = *const fn (std.fs.IterableDir.Walker.WalkerEntry) bool;
 fn included(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
    _ = entry;
    return true;
 }
 fn excluded(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
    _ = entry;
    return false;
 }
 pub const ComputeDirectoryOptions = struct {
    isIncluded: walkerFn = included,
    isExcluded: walkerFn = excluded,
    fileHashes: []*HashedFile = undefined,
    needFileHashes: bool = false,
 };
 pub fn computeDirectoryHash(
    thread_pool: *std.Thread.Pool,
    dir: std.fs.IterableDir,
    options: *ComputeDirectoryOptions,
 ) ![Hash.digest_length]u8 {
    const gpa = thread_pool.allocator;
@ -105,11 +125,14 @@ pub fn computeDirectoryHash(
                .file => {},
                else => return error.IllegalFileTypeInPackage,
            }
-            const hashed_file = try arena.create(HashedFile);
+            if (options.isExcluded(entry) or !options.isIncluded(entry))
-            const fs_path = try arena.dupe(u8, entry.path);
+                continue;
            const alloc = if (options.needFileHashes) gpa else arena;
            const hashed_file = try alloc.create(HashedFile);
            const fs_path = try alloc.dupe(u8, entry.path);
            hashed_file.* = .{
                .fs_path = fs_path,
-                .normalized_path = try normalizePath(arena, fs_path),
+                .normalized_path = try normalizePath(alloc, fs_path),
                .hash = undefined, // to be populated by the worker
                .failure = undefined, // to be populated by the worker
            };
@ -132,6 +155,7 @@ pub fn computeDirectoryHash(
        hasher.update(&hashed_file.hash);
    }
    if (any_failures) return error.DirectoryHashUnavailable;
    if (options.needFileHashes) options.fileHashes = try all_files.toOwnedSlice();
    return hasher.finalResult();
 }
 fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {
--- a/codegen/src/main.zig
+++ b/codegen/src/main.zig
@ -62,24 +62,30 @@ pub fn main() anyerror!void {
    }
    if (files_processed == 0) {
        // no files specified, look for json files in models directory or cwd
        // this is our normal mode of operation and where initial optimizations
        // can be made
        if (models_dir) |m| {
            var cwd = try std.fs.cwd().openDir(".", .{});
            defer cwd.close();
            defer cwd.setAsCwd() catch unreachable;
            try m.dir.setAsCwd();
-            var mi = m.iterate();
+            try processDirectories(m, output_dir, stdout, manifest);
            while (try mi.next()) |e| {
                if ((e.kind == .file or e.kind == .sym_link) and
                    std.mem.endsWith(u8, e.name, ".json"))
                    try processFile(e.name, stdout, output_dir, manifest);
            }
        }
    }
    if (args.len == 0)
        _ = try generateServices(allocator, ";", std.io.getStdIn(), stdout);
 }
 fn processDirectories(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, stdout: anytype, manifest: anytype) !void {
    // Do this in a brain dead fashion, no optimization
    var mi = models_dir.iterate();
    while (try mi.next()) |e| {
        if ((e.kind == .file or e.kind == .sym_link) and
            std.mem.endsWith(u8, e.name, ".json"))
            try processFile(e.name, stdout, output_dir, manifest);
    }
 }
 fn processFile(file_name: []const u8, stdout: anytype, output_dir: std.fs.Dir, manifest: anytype) !void {
    // It's probably best to create our own allocator here so we can deint at the end and