skip processing if all hashes match

adjustments in prep for hash optimization
rename model readme as project does not have its own readme
2023-08-25 17:20:51 -07:00 · 2023-08-25 15:34:52 -07:00 · 2023-08-25 14:52:00 -07:00 · 2023-08-25 14:51:33 -07:00 · 2023-08-25 14:43:40 -07:00
4 changed files with 327 additions and 217 deletions
--- a/Package.zig
+++ b/Package.zig
@ -1,56 +1,11 @@
 const builtin = @import("builtin");
 const std = @import("std");
 const testing = std.testing;
+const Hasher = @import("codegen/src/Hasher.zig");

 /// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
 const bin_digest_len = 16;
 const hex_digest_len = bin_digest_len * 2;
-const hex_multihash_len = 2 * multihash_len;
-const MultiHashHexDigest = [hex_multihash_len]u8;
-const hex_charset = "0123456789abcdef";
-const Hash = std.crypto.hash.sha2.Sha256;
-const multihash_len = 1 + 1 + Hash.digest_length;
-const MultihashFunction = enum(u16) {
-    identity = 0x00,
-    sha1 = 0x11,
-    @"sha2-256" = 0x12,
-    @"sha2-512" = 0x13,
-    @"sha3-512" = 0x14,
-    @"sha3-384" = 0x15,
-    @"sha3-256" = 0x16,
-    @"sha3-224" = 0x17,
-    @"sha2-384" = 0x20,
-    @"sha2-256-trunc254-padded" = 0x1012,
-    @"sha2-224" = 0x1013,
-    @"sha2-512-224" = 0x1014,
-    @"sha2-512-256" = 0x1015,
-    @"blake2b-256" = 0xb220,
-    _,
-};
-const HashedFile = struct {
-    fs_path: []const u8,
-    normalized_path: []const u8,
-    hash: [Hash.digest_length]u8,
-    failure: Error!void,
-
-    const Error = std.fs.File.OpenError || std.fs.File.ReadError || std.fs.File.StatError;
-
-    fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
-        _ = context;
-        return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
-    }
-};
-
-const multihash_function: MultihashFunction = switch (Hash) {
-    std.crypto.hash.sha2.Sha256 => .@"sha2-256",
-    else => @compileError("unreachable"),
-};
-comptime {
-    // We avoid unnecessary uleb128 code in hexDigest by asserting here the
-    // values are small enough to be contained in the one-byte encoding.
-    std.debug.assert(@intFromEnum(multihash_function) < 127);
-    std.debug.assert(Hash.digest_length < 127);
-}

 const Package = @This();

@ -112,7 +67,7 @@ pub fn fetchAndUnpack(
    // Check if the expected_hash is already present in the global package
    // cache, and thereby avoid both fetching and unpacking.
    if (dep.hash) |h| cached: {
-        const hex_digest = h[0..hex_multihash_len];
+        const hex_digest = h[0..Hasher.hex_multihash_len];
        const pkg_dir_sub_path = "p" ++ s ++ hex_digest;

        const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path});
@ -131,7 +86,7 @@ pub fn fetchAndUnpack(

        ptr.* = .{
            .root_src_directory = .{
-                .path = build_root,
+                .path = build_root, // TODO: This leaks memory somehow (should be cleaned in deinit()
                .handle = pkg_dir,
            },
            .root_src_directory_owned = true,
@ -149,7 +104,7 @@ pub fn fetchAndUnpack(
    const uri = try std.Uri.parse(dep.url);

    const rand_int = std.crypto.random.int(u64);
-    const tmp_dir_sub_path = "tmp" ++ s ++ hex64(rand_int);
+    const tmp_dir_sub_path = "tmp" ++ s ++ Hasher.hex64(rand_int);

    const actual_hash = a: {
        var tmp_directory: std.Build.Cache.Directory = d: {
@ -247,13 +202,13 @@ pub fn fetchAndUnpack(
        // Of course, if the ignore rules above omit the file from the package, then everything
        // is fine and no error should be raised.

-        break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle });
+        break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle }, &.{});
    };

-    const pkg_dir_sub_path = "p" ++ s ++ hexDigest(actual_hash);
+    const pkg_dir_sub_path = "p" ++ s ++ Hasher.hexDigest(actual_hash);
    try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path);

-    const actual_hex = hexDigest(actual_hash);
+    const actual_hex = Hasher.hexDigest(actual_hash);
    if (dep.hash) |h| {
        if (!std.mem.eql(u8, h, &actual_hex)) {
            std.log.err("hash mismatch: expected: {s}, found: {s}", .{
@ -272,16 +227,6 @@ pub fn fetchAndUnpack(
    const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path);
    return mod;
 }
-fn hex64(x: u64) [16]u8 {
-    var result: [16]u8 = undefined;
-    var i: usize = 0;
-    while (i < 8) : (i += 1) {
-        const byte = @as(u8, @truncate(x >> @as(u6, @intCast(8 * i))));
-        result[i * 2 + 0] = hex_charset[byte >> 4];
-        result[i * 2 + 1] = hex_charset[byte & 15];
-    }
-    return result;
-}
 fn ProgressReader(comptime ReaderType: type) type {
    return struct {
        child_reader: ReaderType,
@ -340,81 +285,6 @@ fn isTarAttachment(content_disposition: []const u8) bool {
    }
    return std.ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz");
 }
-fn computePackageHash(
-    thread_pool: *std.Thread.Pool,
-    pkg_dir: std.fs.IterableDir,
-) ![Hash.digest_length]u8 {
-    const gpa = thread_pool.allocator;
-
-    // We'll use an arena allocator for the path name strings since they all
-    // need to be in memory for sorting.
-    var arena_instance = std.heap.ArenaAllocator.init(gpa);
-    defer arena_instance.deinit();
-    const arena = arena_instance.allocator();
-
-    // Collect all files, recursively, then sort.
-    var all_files = std.ArrayList(*HashedFile).init(gpa);
-    defer all_files.deinit();
-
-    var walker = try pkg_dir.walk(gpa);
-    defer walker.deinit();
-
-    {
-        // The final hash will be a hash of each file hashed independently. This
-        // allows hashing in parallel.
-        var wait_group: std.Thread.WaitGroup = .{};
-        defer wait_group.wait();
-
-        while (try walker.next()) |entry| {
-            switch (entry.kind) {
-                .directory => continue,
-                .file => {},
-                else => return error.IllegalFileTypeInPackage,
-            }
-            const hashed_file = try arena.create(HashedFile);
-            const fs_path = try arena.dupe(u8, entry.path);
-            hashed_file.* = .{
-                .fs_path = fs_path,
-                .normalized_path = try normalizePath(arena, fs_path),
-                .hash = undefined, // to be populated by the worker
-                .failure = undefined, // to be populated by the worker
-            };
-            wait_group.start();
-            try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group });
-
-            try all_files.append(hashed_file);
-        }
-    }
-
-    std.mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan);
-
-    var hasher = Hash.init(.{});
-    var any_failures = false;
-    for (all_files.items) |hashed_file| {
-        hashed_file.failure catch |err| {
-            any_failures = true;
-            std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
-        };
-        hasher.update(&hashed_file.hash);
-    }
-    if (any_failures) return error.PackageHashUnavailable;
-    return hasher.finalResult();
-}
-fn hexDigest(digest: [Hash.digest_length]u8) [multihash_len * 2]u8 {
-    var result: [multihash_len * 2]u8 = undefined;
-
-    result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
-    result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
-
-    result[2] = hex_charset[Hash.digest_length >> 4];
-    result[3] = hex_charset[Hash.digest_length & 15];
-
-    for (digest, 0..) |byte, i| {
-        result[4 + i * 2] = hex_charset[byte >> 4];
-        result[5 + i * 2] = hex_charset[byte & 15];
-    }
-    return result;
-}
 fn renameTmpIntoCache(
    cache_dir: std.fs.Dir,
    tmp_dir_sub_path: []const u8,
@ -475,57 +345,6 @@ fn createWithDir(
    }
    return ptr;
 }
-/// Make a file system path identical independently of operating system path inconsistencies.
-/// This converts backslashes into forward slashes.
-fn normalizePath(arena: std.mem.Allocator, fs_path: []const u8) ![]const u8 {
-    const canonical_sep = '/';
-
-    if (std.fs.path.sep == canonical_sep)
-        return fs_path;
-
-    const normalized = try arena.dupe(u8, fs_path);
-    for (normalized) |*byte| {
-        switch (byte.*) {
-            std.fs.path.sep => byte.* = canonical_sep,
-            else => continue,
-        }
-    }
-    return normalized;
-}
-
-fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {
-    defer wg.finish();
-    hashed_file.failure = hashFileFallible(dir, hashed_file);
-}
-
-fn hashFileFallible(dir: std.fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
-    var buf: [8000]u8 = undefined;
-    var file = try dir.openFile(hashed_file.fs_path, .{});
-    defer file.close();
-    var hasher = Hash.init(.{});
-    hasher.update(hashed_file.normalized_path);
-    hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
-    while (true) {
-        const bytes_read = try file.read(&buf);
-        if (bytes_read == 0) break;
-        hasher.update(buf[0..bytes_read]);
-    }
-    hasher.final(&hashed_file.hash);
-}
-
-fn isExecutable(file: std.fs.File) !bool {
-    if (builtin.os.tag == .windows) {
-        // TODO check the ACL on Windows.
-        // Until this is implemented, this could be a false negative on
-        // Windows, which is why we do not yet set executable_bit_only above
-        // when unpacking the tarball.
-        return false;
-    } else {
-        const stat = try file.stat();
-        return (stat.mode & std.os.S.IXUSR) != 0;
-    }
-}
-
 // Create/Write a file, close it, then grab its stat.mtime timestamp.
 fn testGetCurrentFileTimestamp(dir: std.fs.Dir) !i128 {
    const test_out_file = "test-filetimestamp.tmp";
--- a/codegen/README-models.md
+++ b/codegen/README-models.md
@ -7,19 +7,18 @@ as they do in other languages. We can combine all models from AWS into a single
 comptime constant even, however, we're keeping zig files 1:1 with json files
 for now.

-The main executable, run with a "-s" first argument, will simply parse the
-Smithy json files passed by the rest of the arguments and save each one as
-its own file.json.zig. We will rely on shell commands to do the rest of the
-renaming (and moving if necessary).
+Optimization plan will be done by the placing of a json file in the output
+directory. The json file will contain a mapping between input files and generated
+outputs, as well as a top level directory hash. We can skip the output generation
+entirely if the top level hash matches, otherwise, individual hashes will be
+compared and output files will only regenerate if the input or output has changed.

-To run this, we can use `codegen -s models/*.json`, which takes 20 seconds
-or so on my i5 chromebook and probably significantly faster on a real machine.
-No attempt has been made to optimize. Also, there are several bugs:
+
+Todo
+----

 * I do not think all the optional types have been sorted.
 * I think there is necessary metadata missing from EC2Query style services
-* The output will compile and is close to what `zig fmt` likes to see, but it
-  has not yet been functionally tested
 * It handles all the types in existing AWS services, but it does not handle
  all known Smithy types (e.g. blob and document are missing)
 * It would be awesome to bring over the documentation from the model into
@ -29,13 +28,7 @@ No attempt has been made to optimize. Also, there are several bugs:
  realistically I'm not sure if that will matter long term, and it's a fair
  amount of work as everything now can be done in a single pass without post
  processing.
-* This doesn't seem to build on 0.7.1 - you need master branch. I hope that
-  0.8.0 will be out soon. If not, a few syntax changes need to be accommodated.
-
-Some of these will likely be addressed as I integrate the code generated files into
-the SDK engine.

 The models are Smithy json files, sourced from the AWS v2 go sdk
-for lack of a better place. I've just downloaded the main branch and copied
-the files from the tree in place.
-
+for lack of a better place. Details are in build.zig of the parent project
+that is now responsible for downloading/caching the project.
--- a/codegen/src/Hasher.zig
+++ b/codegen/src/Hasher.zig
@ -0,0 +1,211 @@
+const builtin = @import("builtin");
+const std = @import("std");
+const Hash = std.crypto.hash.sha2.Sha256;
+
+pub const HashedFile = struct {
+    fs_path: []const u8,
+    normalized_path: []const u8,
+    hash: [Hash.digest_length]u8,
+    failure: Error!void,
+
+    const Error = std.fs.File.OpenError || std.fs.File.ReadError || std.fs.File.StatError;
+
+    fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
+        _ = context;
+        return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
+    }
+};
+
+const multihash_len = 1 + 1 + Hash.digest_length;
+pub const hex_multihash_len = 2 * multihash_len;
+pub const digest_len = Hash.digest_length;
+
+const MultiHashHexDigest = [hex_multihash_len]u8;
+const MultihashFunction = enum(u16) {
+    identity = 0x00,
+    sha1 = 0x11,
+    @"sha2-256" = 0x12,
+    @"sha2-512" = 0x13,
+    @"sha3-512" = 0x14,
+    @"sha3-384" = 0x15,
+    @"sha3-256" = 0x16,
+    @"sha3-224" = 0x17,
+    @"sha2-384" = 0x20,
+    @"sha2-256-trunc254-padded" = 0x1012,
+    @"sha2-224" = 0x1013,
+    @"sha2-512-224" = 0x1014,
+    @"sha2-512-256" = 0x1015,
+    @"blake2b-256" = 0xb220,
+    _,
+};
+
+const multihash_function: MultihashFunction = switch (Hash) {
+    std.crypto.hash.sha2.Sha256 => .@"sha2-256",
+    else => @compileError("unreachable"),
+};
+comptime {
+    // We avoid unnecessary uleb128 code in hexDigest by asserting here the
+    // values are small enough to be contained in the one-byte encoding.
+    std.debug.assert(@intFromEnum(multihash_function) < 127);
+    std.debug.assert(Hash.digest_length < 127);
+}
+const hex_charset = "0123456789abcdef";
+
+pub fn hexDigest(digest: [Hash.digest_length]u8) [multihash_len * 2]u8 {
+    var result: [multihash_len * 2]u8 = undefined;
+
+    result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
+    result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
+
+    result[2] = hex_charset[Hash.digest_length >> 4];
+    result[3] = hex_charset[Hash.digest_length & 15];
+
+    for (digest, 0..) |byte, i| {
+        result[4 + i * 2] = hex_charset[byte >> 4];
+        result[5 + i * 2] = hex_charset[byte & 15];
+    }
+    return result;
+}
+pub fn hex64(x: u64) [16]u8 {
+    var result: [16]u8 = undefined;
+    var i: usize = 0;
+    while (i < 8) : (i += 1) {
+        const byte = @as(u8, @truncate(x >> @as(u6, @intCast(8 * i))));
+        result[i * 2 + 0] = hex_charset[byte >> 4];
+        result[i * 2 + 1] = hex_charset[byte & 15];
+    }
+    return result;
+}
+
+pub const walkerFn = *const fn (std.fs.IterableDir.Walker.WalkerEntry) bool;
+
+fn included(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
+    _ = entry;
+    return true;
+}
+fn excluded(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
+    _ = entry;
+    return false;
+}
+pub const ComputeDirectoryOptions = struct {
+    isIncluded: walkerFn = included,
+    isExcluded: walkerFn = excluded,
+    fileHashes: []*HashedFile = undefined,
+    needFileHashes: bool = false,
+};
+
+pub fn computeDirectoryHash(
+    thread_pool: *std.Thread.Pool,
+    dir: std.fs.IterableDir,
+    options: *ComputeDirectoryOptions,
+) ![Hash.digest_length]u8 {
+    const gpa = thread_pool.allocator;
+
+    // We'll use an arena allocator for the path name strings since they all
+    // need to be in memory for sorting.
+    var arena_instance = std.heap.ArenaAllocator.init(gpa);
+    defer arena_instance.deinit();
+    const arena = arena_instance.allocator();
+
+    // Collect all files, recursively, then sort.
+    var all_files = std.ArrayList(*HashedFile).init(gpa);
+    defer all_files.deinit();
+
+    var walker = try dir.walk(gpa);
+    defer walker.deinit();
+
+    {
+        // The final hash will be a hash of each file hashed independently. This
+        // allows hashing in parallel.
+        var wait_group: std.Thread.WaitGroup = .{};
+        defer wait_group.wait();
+
+        while (try walker.next()) |entry| {
+            switch (entry.kind) {
+                .directory => continue,
+                .file => {},
+                else => return error.IllegalFileTypeInPackage,
+            }
+            if (options.isExcluded(entry) or !options.isIncluded(entry))
+                continue;
+            const alloc = if (options.needFileHashes) gpa else arena;
+            const hashed_file = try alloc.create(HashedFile);
+            const fs_path = try alloc.dupe(u8, entry.path);
+            hashed_file.* = .{
+                .fs_path = fs_path,
+                .normalized_path = try normalizePath(alloc, fs_path),
+                .hash = undefined, // to be populated by the worker
+                .failure = undefined, // to be populated by the worker
+            };
+            wait_group.start();
+            try thread_pool.spawn(workerHashFile, .{ dir.dir, hashed_file, &wait_group });
+
+            try all_files.append(hashed_file);
+        }
+    }
+
+    std.mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan);
+
+    var hasher = Hash.init(.{});
+    var any_failures = false;
+    for (all_files.items) |hashed_file| {
+        hashed_file.failure catch |err| {
+            any_failures = true;
+            std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
+        };
+        hasher.update(&hashed_file.hash);
+    }
+    if (any_failures) return error.DirectoryHashUnavailable;
+    if (options.needFileHashes) options.fileHashes = try all_files.toOwnedSlice();
+    return hasher.finalResult();
+}
+fn workerHashFile(dir: std.fs.Dir, hashed_file: *HashedFile, wg: *std.Thread.WaitGroup) void {
+    defer wg.finish();
+    hashed_file.failure = hashFileFallible(dir, hashed_file);
+}
+
+fn hashFileFallible(dir: std.fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
+    var buf: [8000]u8 = undefined;
+    var file = try dir.openFile(hashed_file.fs_path, .{});
+    defer file.close();
+    var hasher = Hash.init(.{});
+    hasher.update(hashed_file.normalized_path);
+    hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
+    while (true) {
+        const bytes_read = try file.read(&buf);
+        if (bytes_read == 0) break;
+        hasher.update(buf[0..bytes_read]);
+    }
+    hasher.final(&hashed_file.hash);
+}
+
+/// Make a file system path identical independently of operating system path inconsistencies.
+/// This converts backslashes into forward slashes.
+fn normalizePath(arena: std.mem.Allocator, fs_path: []const u8) ![]const u8 {
+    const canonical_sep = '/';
+
+    if (std.fs.path.sep == canonical_sep)
+        return fs_path;
+
+    const normalized = try arena.dupe(u8, fs_path);
+    for (normalized) |*byte| {
+        switch (byte.*) {
+            std.fs.path.sep => byte.* = canonical_sep,
+            else => continue,
+        }
+    }
+    return normalized;
+}
+
+fn isExecutable(file: std.fs.File) !bool {
+    if (builtin.os.tag == .windows) {
+        // TODO check the ACL on Windows.
+        // Until this is implemented, this could be a false negative on
+        // Windows, which is why we do not yet set executable_bit_only above
+        // when unpacking the tarball.
+        return false;
+    } else {
+        const stat = try file.stat();
+        return (stat.mode & std.os.S.IXUSR) != 0;
+    }
+}
--- a/codegen/src/main.zig
+++ b/codegen/src/main.zig
@ -1,6 +1,7 @@
 const std = @import("std");
 const smithy = @import("smithy");
 const snake = @import("snake.zig");
+const Hasher = @import("Hasher.zig");
 const json_zig = @embedFile("json.zig");

 var verbose = false;
@ -33,12 +34,13 @@ pub fn main() anyerror!void {
            models_dir = try std.fs.cwd().openIterableDir(args[i + 1], .{});
    }
    // TODO: Seems like we should remove this in favor of a package
-    const json_file = try output_dir.createFile("json.zig", .{});
-    defer json_file.close();
-    try json_file.writer().writeAll(json_zig);
-    const manifest_file = try output_dir.createFile("service_manifest.zig", .{});
-    defer manifest_file.close();
-    const manifest = manifest_file.writer();
+    try output_dir.writeFile("json.zig", json_zig);
+
+    // TODO: We need a different way to handle this file...
+    var manifest_file_started = false;
+    var manifest_file: std.fs.File = undefined;
+    defer if (manifest_file_started) manifest_file.close();
+    var manifest: std.fs.File.Writer = undefined;
    var files_processed: usize = 0;
    var skip_next = true;
    for (args) |arg| {
@ -57,30 +59,115 @@ pub fn main() anyerror!void {
            skip_next = true;
            continue;
        }
+        if (!manifest_file_started) {
+            manifest_file = try output_dir.createFile("service_manifest.zig", .{});
+            manifest = manifest_file.writer();
+        }
        try processFile(arg, stdout, output_dir, manifest);
        files_processed += 1;
    }
    if (files_processed == 0) {
        // no files specified, look for json files in models directory or cwd
+        // this is our normal mode of operation and where initial optimizations
+        // can be made
        if (models_dir) |m| {
            var cwd = try std.fs.cwd().openDir(".", .{});
            defer cwd.close();
            defer cwd.setAsCwd() catch unreachable;

            try m.dir.setAsCwd();
-            var mi = m.iterate();
-            while (try mi.next()) |e| {
-                if ((e.kind == .file or e.kind == .sym_link) and
-                    std.mem.endsWith(u8, e.name, ".json"))
-                    try processFile(e.name, stdout, output_dir, manifest);
-            }
+            try processDirectories(m, output_dir, stdout);
        }
    }

    if (args.len == 0)
        _ = try generateServices(allocator, ";", std.io.getStdIn(), stdout);
 }
+const OutputManifest = struct {
+    model_dir_hash_digest: [Hasher.hex_multihash_len]u8,
+    output_dir_hash_digest: [Hasher.hex_multihash_len]u8,
+};
+fn processDirectories(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, stdout: anytype) !void {
+    // Let's get ready to hash!!
+    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena.deinit();
+    const allocator = arena.allocator();
+    var thread_pool: std.Thread.Pool = undefined;
+    try thread_pool.init(.{ .allocator = allocator });
+    defer thread_pool.deinit();
+    var calculated_manifest = try calculateDigests(models_dir, output_dir, &thread_pool);
+    const output_stored_manifest = output_dir.readFileAlloc(allocator, "output_manifest.json", std.math.maxInt(usize)) catch null;
+    if (output_stored_manifest) |o| {
+        // we have a stored manifest. Parse it and compare to our calculations
+        // we can leak as we're using an arena allocator
+        const stored_manifest = try std.json.parseFromSliceLeaky(OutputManifest, allocator, o, .{});
+        if (std.mem.eql(u8, &stored_manifest.model_dir_hash_digest, &calculated_manifest.model_dir_hash_digest) and
+            std.mem.eql(u8, &stored_manifest.output_dir_hash_digest, &calculated_manifest.output_dir_hash_digest))
+        {
+            // hashes all match, we can end now
+            if (verbose)
+                std.log.info("calculated hashes match output_manifest.json. Nothing to do", .{});
+            return;
+        }
+    }
+    // Do this in a brain dead fashion from here, no optimization
+    const manifest_file = try output_dir.createFile("service_manifest.zig", .{});
+    defer manifest_file.close();
+    const manifest = manifest_file.writer();
+    var mi = models_dir.iterate();
+    while (try mi.next()) |e| {
+        if ((e.kind == .file or e.kind == .sym_link) and
+            std.mem.endsWith(u8, e.name, ".json"))
+            try processFile(e.name, stdout, output_dir, manifest);
+    }
+    // re-calculate so we can store the manifest
+    model_digest = calculated_manifest.model_dir_hash_digest;
+    calculated_manifest = try calculateDigests(models_dir, output_dir, &thread_pool);
+    try output_dir.writeFile("output_manifest.json", try std.json.stringifyAlloc(
+        allocator,
+        calculated_manifest,
+        .{ .whitespace = .indent_2 },
+    ));
+}

+var model_digest: ?[Hasher.hex_multihash_len]u8 = null;
+fn calculateDigests(models_dir: std.fs.IterableDir, output_dir: std.fs.Dir, thread_pool: *std.Thread.Pool) !OutputManifest {
+    const model_hash = if (model_digest) |m| m[0..Hasher.digest_len].* else try Hasher.computeDirectoryHash(thread_pool, models_dir, @constCast(&Hasher.ComputeDirectoryOptions{
+        .isIncluded = struct {
+            pub fn include(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
+                return std.mem.endsWith(u8, entry.basename, ".json");
+            }
+        }.include,
+        .isExcluded = struct {
+            pub fn exclude(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
+                _ = entry;
+                return false;
+            }
+        }.exclude,
+        .needFileHashes = false,
+    }));
+    if (verbose) std.log.info("Model directory hash: {s}", .{model_digest orelse Hasher.hexDigest(model_hash)});
+
+    const output_hash = try Hasher.computeDirectoryHash(thread_pool, try output_dir.openIterableDir(".", .{}), @constCast(&Hasher.ComputeDirectoryOptions{
+        .isIncluded = struct {
+            pub fn include(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
+                return std.mem.endsWith(u8, entry.basename, ".zig");
+            }
+        }.include,
+        .isExcluded = struct {
+            pub fn exclude(entry: std.fs.IterableDir.Walker.WalkerEntry) bool {
+                _ = entry;
+                return false;
+            }
+        }.exclude,
+        .needFileHashes = false,
+    }));
+    if (verbose) std.log.info("Output directory hash: {s}", .{Hasher.hexDigest(output_hash)});
+    return .{
+        .model_dir_hash_digest = model_digest orelse Hasher.hexDigest(model_hash),
+        .output_dir_hash_digest = Hasher.hexDigest(output_hash),
+    };
+}
 fn processFile(file_name: []const u8, stdout: anytype, output_dir: std.fs.Dir, manifest: anytype) !void {
    // It's probably best to create our own allocator here so we can deint at the end and
    // toss all allocations related to the services in this file
Author	SHA1	Message	Date
Emil Lerch	fb36dc83d5	skip processing if all hashes match All checks were successful AWS-Zig Build / build-zig-0.11.0-amd64-host (push) Successful in 7m8s Details	2023-08-25 17:20:51 -07:00
Emil Lerch	a58a2cba41	adjustments in prep for hash optimization	2023-08-25 15:34:52 -07:00
Emil Lerch	26daedbd72	rename model readme as project does not have its own readme	2023-08-25 14:52:00 -07:00
Emil Lerch	6a109f2a5a	update model readme	2023-08-25 14:51:33 -07:00
Emil Lerch	2f36f82363	factor out hashing so we can use it in codegen	2023-08-25 14:43:40 -07:00