522 lines
19 KiB
Zig
522 lines
19 KiB
Zig
const builtin = @import("builtin");
|
|
const std = @import("std");
|
|
const testing = std.testing;
|
|
const Hasher = @import("codegen/src/Hasher.zig");
|
|
|
|
/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
|
|
const bin_digest_len = 16;
|
|
const hex_digest_len = bin_digest_len * 2;
|
|
|
|
const Package = @This();
|
|
|
|
root_src_directory: std.Build.Cache.Directory,
|
|
|
|
/// Whether to free `root_src_directory` on `destroy`.
|
|
root_src_directory_owned: bool = false,
|
|
allocator: std.mem.Allocator,
|
|
|
|
pub const Dependency = struct {
|
|
url: []const u8,
|
|
hash: ?[]const u8,
|
|
};
|
|
|
|
pub fn deinit(self: *Package) void {
|
|
if (self.root_src_directory_owned)
|
|
self.root_src_directory.closeAndFree(self.allocator);
|
|
}
|
|
|
|
pub fn fetchOneAndUnpack(
|
|
allocator: std.mem.Allocator,
|
|
cache_directory: []const u8, // directory to store things
|
|
dep: Dependency, // thing to download
|
|
) !*Package {
|
|
var http_client: std.http.Client = .{ .allocator = allocator };
|
|
defer http_client.deinit();
|
|
|
|
var global_cache_directory: std.Build.Cache.Directory = .{
|
|
.handle = try std.fs.cwd().makeOpenPath(cache_directory, .{}),
|
|
.path = cache_directory,
|
|
};
|
|
var thread_pool: std.Thread.Pool = undefined;
|
|
try thread_pool.init(.{ .allocator = allocator });
|
|
defer thread_pool.deinit();
|
|
var progress: std.Progress = .{ .dont_print_on_dumb = true };
|
|
const root_prog_node = progress.start("Fetch Packages", 0);
|
|
defer root_prog_node.end();
|
|
return try fetchAndUnpack(
|
|
&thread_pool,
|
|
&http_client,
|
|
global_cache_directory,
|
|
dep,
|
|
dep.url,
|
|
root_prog_node,
|
|
);
|
|
}
|
|
|
|
pub fn fetchAndUnpack(
|
|
thread_pool: *std.Thread.Pool, // thread pool for hashing things in parallel
|
|
http_client: *std.http.Client, // client to download stuff
|
|
global_cache_directory: std.Build.Cache.Directory, // directory to store things
|
|
dep: Dependency, // thing to download
|
|
fqn: []const u8, // used as name for thing downloaded
|
|
root_prog_node: *std.Progress.Node, // used for outputting to terminal
|
|
) !*Package {
|
|
const gpa = http_client.allocator;
|
|
const s = std.fs.path.sep_str;
|
|
|
|
// Check if the expected_hash is already present in the global package
|
|
// cache, and thereby avoid both fetching and unpacking.
|
|
if (dep.hash) |h| cached: {
|
|
const hex_digest = h[0..Hasher.hex_multihash_len];
|
|
const pkg_dir_sub_path = "p" ++ s ++ hex_digest;
|
|
|
|
const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path});
|
|
errdefer gpa.free(build_root);
|
|
|
|
var pkg_dir = global_cache_directory.handle.openDir(pkg_dir_sub_path, .{}) catch |err| switch (err) {
|
|
error.FileNotFound => break :cached,
|
|
else => |e| return e,
|
|
};
|
|
errdefer pkg_dir.close();
|
|
|
|
root_prog_node.completeOne();
|
|
|
|
const ptr = try gpa.create(Package);
|
|
errdefer gpa.destroy(ptr);
|
|
|
|
ptr.* = .{
|
|
.root_src_directory = .{
|
|
.path = build_root, // TODO: This leaks memory somehow (should be cleaned in deinit()
|
|
.handle = pkg_dir,
|
|
},
|
|
.root_src_directory_owned = true,
|
|
.allocator = gpa,
|
|
};
|
|
|
|
return ptr;
|
|
}
|
|
|
|
var pkg_prog_node = root_prog_node.start(fqn, 0);
|
|
defer pkg_prog_node.end();
|
|
pkg_prog_node.activate();
|
|
pkg_prog_node.context.refresh();
|
|
|
|
const uri = try std.Uri.parse(dep.url);
|
|
|
|
const rand_int = std.crypto.random.int(u64);
|
|
const tmp_dir_sub_path = "tmp" ++ s ++ Hasher.hex64(rand_int);
|
|
|
|
const actual_hash = a: {
|
|
var tmp_directory: std.Build.Cache.Directory = d: {
|
|
const path = try global_cache_directory.join(gpa, &.{tmp_dir_sub_path});
|
|
errdefer gpa.free(path);
|
|
|
|
const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{});
|
|
errdefer iterable_dir.close();
|
|
|
|
break :d .{
|
|
.path = path,
|
|
.handle = iterable_dir.dir,
|
|
};
|
|
};
|
|
defer tmp_directory.closeAndFree(gpa);
|
|
|
|
var h = std.http.Headers{ .allocator = gpa };
|
|
defer h.deinit();
|
|
|
|
var req = try http_client.request(.GET, uri, h, .{});
|
|
defer req.deinit();
|
|
|
|
try req.start();
|
|
try req.wait();
|
|
|
|
if (req.response.status != .ok) {
|
|
std.log.err("Expected response status '200 OK' got '{} {s}'", .{
|
|
@intFromEnum(req.response.status),
|
|
req.response.status.phrase() orelse "",
|
|
});
|
|
return error.UnexpectedResponseStatus;
|
|
}
|
|
|
|
const content_type = req.response.headers.getFirstValue("Content-Type") orelse
|
|
return error.MissingContentTypeHeader;
|
|
|
|
var prog_reader: ProgressReader(std.http.Client.Request.Reader) = .{
|
|
.child_reader = req.reader(),
|
|
.prog_node = &pkg_prog_node,
|
|
.unit = if (req.response.content_length) |content_length| unit: {
|
|
const kib = content_length / 1024;
|
|
const mib = kib / 1024;
|
|
if (mib > 0) {
|
|
pkg_prog_node.setEstimatedTotalItems(@intCast(mib));
|
|
pkg_prog_node.setUnit("MiB");
|
|
break :unit .mib;
|
|
} else {
|
|
pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib)));
|
|
pkg_prog_node.setUnit("KiB");
|
|
break :unit .kib;
|
|
}
|
|
} else .any,
|
|
};
|
|
pkg_prog_node.context.refresh();
|
|
|
|
if (std.ascii.eqlIgnoreCase(content_type, "application/gzip") or
|
|
std.ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
|
|
std.ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
|
|
{
|
|
// I observed the gzip stream to read 1 byte at a time, so I am using a
|
|
// buffered reader on the front of it.
|
|
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
|
|
} else if (std.ascii.eqlIgnoreCase(content_type, "application/x-xz")) {
|
|
// I have not checked what buffer sizes the xz decompression implementation uses
|
|
// by default, so the same logic applies for buffering the reader as for gzip.
|
|
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.xz);
|
|
} else if (std.ascii.eqlIgnoreCase(content_type, "application/octet-stream")) {
|
|
// support gitlab tarball urls such as https://gitlab.com/<namespace>/<project>/-/archive/<sha>/<project>-<sha>.tar.gz
|
|
// whose content-disposition header is: 'attachment; filename="<project>-<sha>.tar.gz"'
|
|
const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse
|
|
return error.@"Missing 'Content-Disposition' header for Content-Type=application/octet-stream";
|
|
if (isTarAttachment(content_disposition)) {
|
|
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
|
|
} else {
|
|
std.log.err("Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition});
|
|
return error.UnsupportedContentDispositionHeader;
|
|
}
|
|
} else {
|
|
std.log.err("Unsupported 'Content-Type' header value: '{s}'", .{content_type});
|
|
return error.UnsupportedContentTypeHeader;
|
|
}
|
|
|
|
// Download completed - stop showing downloaded amount as progress
|
|
pkg_prog_node.setEstimatedTotalItems(0);
|
|
pkg_prog_node.setCompletedItems(0);
|
|
pkg_prog_node.context.refresh();
|
|
|
|
// TODO: delete files not included in the package prior to computing the package hash.
|
|
// for example, if the ini file has directives to include/not include certain files,
|
|
// apply those rules directly to the filesystem right here. This ensures that files
|
|
// not protected by the hash are not present on the file system.
|
|
|
|
// TODO: raise an error for files that have illegal paths on some operating systems.
|
|
// For example, on Linux a path with a backslash should raise an error here.
|
|
// Of course, if the ignore rules above omit the file from the package, then everything
|
|
// is fine and no error should be raised.
|
|
|
|
break :a try Hasher.computeDirectoryHash(thread_pool, .{ .dir = tmp_directory.handle });
|
|
};
|
|
|
|
const pkg_dir_sub_path = "p" ++ s ++ Hasher.hexDigest(actual_hash);
|
|
try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path);
|
|
|
|
const actual_hex = Hasher.hexDigest(actual_hash);
|
|
if (dep.hash) |h| {
|
|
if (!std.mem.eql(u8, h, &actual_hex)) {
|
|
std.log.err("hash mismatch: expected: {s}, found: {s}", .{
|
|
h, actual_hex,
|
|
});
|
|
return error.HashMismatch;
|
|
}
|
|
} else {
|
|
std.log.err("No hash supplied. Expecting hash \"{s}\"", .{actual_hex});
|
|
return error.NoHashSupplied;
|
|
}
|
|
|
|
const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path});
|
|
defer gpa.free(build_root);
|
|
|
|
const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path);
|
|
return mod;
|
|
}
|
|
fn ProgressReader(comptime ReaderType: type) type {
|
|
return struct {
|
|
child_reader: ReaderType,
|
|
bytes_read: u64 = 0,
|
|
prog_node: *std.Progress.Node,
|
|
unit: enum {
|
|
kib,
|
|
mib,
|
|
any,
|
|
},
|
|
|
|
pub const Error = ReaderType.Error;
|
|
pub const Reader = std.io.Reader(*@This(), Error, read);
|
|
|
|
pub fn read(self: *@This(), buf: []u8) Error!usize {
|
|
const amt = try self.child_reader.read(buf);
|
|
self.bytes_read += amt;
|
|
const kib = self.bytes_read / 1024;
|
|
const mib = kib / 1024;
|
|
switch (self.unit) {
|
|
.kib => self.prog_node.setCompletedItems(@intCast(kib)),
|
|
.mib => self.prog_node.setCompletedItems(@intCast(mib)),
|
|
.any => {
|
|
if (mib > 0) {
|
|
self.prog_node.setUnit("MiB");
|
|
self.prog_node.setCompletedItems(@intCast(mib));
|
|
} else {
|
|
self.prog_node.setUnit("KiB");
|
|
self.prog_node.setCompletedItems(@intCast(kib));
|
|
}
|
|
},
|
|
}
|
|
self.prog_node.context.maybeRefresh();
|
|
return amt;
|
|
}
|
|
|
|
pub fn reader(self: *@This()) Reader {
|
|
return .{ .context = self };
|
|
}
|
|
};
|
|
}
|
|
fn isTarAttachment(content_disposition: []const u8) bool {
|
|
const disposition_type_end = std.ascii.indexOfIgnoreCase(content_disposition, "attachment;") orelse return false;
|
|
|
|
var value_start = std.ascii.indexOfIgnoreCasePos(content_disposition, disposition_type_end + 1, "filename") orelse return false;
|
|
value_start += "filename".len;
|
|
if (content_disposition[value_start] == '*') {
|
|
value_start += 1;
|
|
}
|
|
if (content_disposition[value_start] != '=') return false;
|
|
value_start += 1;
|
|
|
|
var value_end = std.mem.indexOfPos(u8, content_disposition, value_start, ";") orelse content_disposition.len;
|
|
if (content_disposition[value_end - 1] == '\"') {
|
|
value_end -= 1;
|
|
}
|
|
return std.ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz");
|
|
}
|
|
fn renameTmpIntoCache(
|
|
cache_dir: std.fs.Dir,
|
|
tmp_dir_sub_path: []const u8,
|
|
dest_dir_sub_path: []const u8,
|
|
) !void {
|
|
std.debug.assert(dest_dir_sub_path[1] == std.fs.path.sep);
|
|
var handled_missing_dir = false;
|
|
while (true) {
|
|
cache_dir.rename(tmp_dir_sub_path, dest_dir_sub_path) catch |err| switch (err) {
|
|
error.FileNotFound => {
|
|
if (handled_missing_dir) return err;
|
|
cache_dir.makeDir(dest_dir_sub_path[0..1]) catch |mkd_err| switch (mkd_err) {
|
|
error.PathAlreadyExists => handled_missing_dir = true,
|
|
else => |e| return e,
|
|
};
|
|
continue;
|
|
},
|
|
error.PathAlreadyExists, error.AccessDenied => {
|
|
// Package has been already downloaded and may already be in use on the system.
|
|
cache_dir.deleteTree(tmp_dir_sub_path) catch |del_err| {
|
|
std.log.warn("unable to delete temp directory: {s}", .{@errorName(del_err)});
|
|
};
|
|
},
|
|
else => |e| return e,
|
|
};
|
|
break;
|
|
}
|
|
}
|
|
|
|
fn createWithDir(
|
|
gpa: std.mem.Allocator,
|
|
directory: std.Build.Cache.Directory,
|
|
/// Relative to `directory`. If null, means `directory` is the root src dir
|
|
/// and is owned externally.
|
|
root_src_dir_path: ?[]const u8,
|
|
) !*Package {
|
|
const ptr = try gpa.create(Package);
|
|
errdefer gpa.destroy(ptr);
|
|
|
|
if (root_src_dir_path) |p| {
|
|
const owned_dir_path = try directory.join(gpa, &[1][]const u8{p});
|
|
errdefer gpa.free(owned_dir_path);
|
|
|
|
ptr.* = .{
|
|
.root_src_directory = .{
|
|
.path = owned_dir_path,
|
|
.handle = try directory.handle.openDir(p, .{}),
|
|
},
|
|
.root_src_directory_owned = true,
|
|
.allocator = gpa,
|
|
};
|
|
} else {
|
|
ptr.* = .{
|
|
.root_src_directory = directory,
|
|
.root_src_directory_owned = false,
|
|
.allocator = gpa,
|
|
};
|
|
}
|
|
return ptr;
|
|
}
|
|
// Create/Write a file, close it, then grab its stat.mtime timestamp.
|
|
fn testGetCurrentFileTimestamp(dir: std.fs.Dir) !i128 {
|
|
const test_out_file = "test-filetimestamp.tmp";
|
|
|
|
var file = try dir.createFile(test_out_file, .{
|
|
.read = true,
|
|
.truncate = true,
|
|
});
|
|
defer {
|
|
file.close();
|
|
dir.deleteFile(test_out_file) catch {};
|
|
}
|
|
|
|
return (try file.stat()).mtime;
|
|
}
|
|
|
|
// These functions come from src/Package.zig, src/Manifest.zig in the compiler,
|
|
// not the standard library
|
|
fn unpackTarball(
|
|
gpa: std.mem.Allocator,
|
|
req_reader: anytype,
|
|
out_dir: std.fs.Dir,
|
|
comptime compression: type,
|
|
) !void {
|
|
var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req_reader);
|
|
|
|
var decompress = try compression.decompress(gpa, br.reader());
|
|
defer decompress.deinit();
|
|
|
|
try std.tar.pipeToFileSystem(out_dir, decompress.reader(), .{
|
|
.strip_components = 1,
|
|
// TODO: we would like to set this to executable_bit_only, but two
|
|
// things need to happen before that:
|
|
// 1. the tar implementation needs to support it
|
|
// 2. the hashing algorithm here needs to support detecting the is_executable
|
|
// bit on Windows from the ACLs (see the isExecutable function).
|
|
.mode_mode = .ignore,
|
|
});
|
|
}
|
|
|
|
test {
|
|
std.testing.refAllDecls(@This());
|
|
}
|
|
test "cache a file and recall it" {
|
|
if (builtin.os.tag == .wasi) {
|
|
// https://github.com/ziglang/zig/issues/5437
|
|
|
|
return error.SkipZigTest;
|
|
}
|
|
|
|
var tmp = testing.tmpDir(.{});
|
|
defer tmp.cleanup();
|
|
|
|
const temp_file = "test.txt";
|
|
const temp_file2 = "test2.txt";
|
|
const temp_manifest_dir = "temp_manifest_dir";
|
|
|
|
try tmp.dir.writeFile(temp_file, "Hello, world!\n");
|
|
try tmp.dir.writeFile(temp_file2, "yo mamma\n");
|
|
|
|
// Wait for file timestamps to tick
|
|
|
|
const initial_time = try testGetCurrentFileTimestamp(tmp.dir);
|
|
while ((try testGetCurrentFileTimestamp(tmp.dir)) == initial_time) {
|
|
std.time.sleep(1);
|
|
}
|
|
|
|
var digest1: [hex_digest_len]u8 = undefined;
|
|
var digest2: [hex_digest_len]u8 = undefined;
|
|
|
|
{
|
|
var cache = std.build.Cache{
|
|
.gpa = testing.allocator,
|
|
.manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
|
|
};
|
|
cache.addPrefix(.{ .path = null, .handle = tmp.dir });
|
|
defer cache.manifest_dir.close();
|
|
|
|
{
|
|
var ch = cache.obtain();
|
|
defer ch.deinit();
|
|
|
|
ch.hash.add(true);
|
|
ch.hash.add(@as(u16, 1234));
|
|
ch.hash.addBytes("1234");
|
|
_ = try ch.addFile(temp_file, null);
|
|
|
|
// There should be nothing in the cache
|
|
|
|
try testing.expectEqual(false, try ch.hit());
|
|
|
|
digest1 = ch.final();
|
|
try ch.writeManifest();
|
|
}
|
|
{
|
|
var ch = cache.obtain();
|
|
defer ch.deinit();
|
|
|
|
ch.hash.add(true);
|
|
ch.hash.add(@as(u16, 1234));
|
|
ch.hash.addBytes("1234");
|
|
_ = try ch.addFile(temp_file, null);
|
|
|
|
// Cache hit! We just "built" the same file
|
|
|
|
try testing.expect(try ch.hit());
|
|
digest2 = ch.final();
|
|
|
|
try testing.expectEqual(false, ch.have_exclusive_lock);
|
|
}
|
|
|
|
try testing.expectEqual(digest1, digest2);
|
|
}
|
|
}
|
|
test "fetch and unpack" {
|
|
const alloc = std.testing.allocator;
|
|
var http_client: std.http.Client = .{ .allocator = alloc };
|
|
defer http_client.deinit();
|
|
|
|
var global_cache_directory: std.Build.Cache.Directory = .{
|
|
.handle = try std.fs.cwd().makeOpenPath("test-pkg", .{}),
|
|
.path = "test-pkg",
|
|
};
|
|
var thread_pool: std.Thread.Pool = undefined;
|
|
try thread_pool.init(.{ .allocator = alloc });
|
|
defer thread_pool.deinit();
|
|
var progress: std.Progress = .{ .dont_print_on_dumb = true };
|
|
const root_prog_node = progress.start("Fetch Packages", 0);
|
|
defer root_prog_node.end();
|
|
const pkg = try fetchAndUnpack(
|
|
&thread_pool,
|
|
&http_client,
|
|
global_cache_directory,
|
|
.{
|
|
.url = "https://github.com/aws/aws-sdk-go-v2/archive/7502ff360b1c3b79cbe117437327f6ff5fb89f65.tar.gz",
|
|
.hash = "1220a414719bff14c9362fb1c695e3346fa12ec2e728bae5757a57aae7738916ffd2",
|
|
},
|
|
"https://github.com/aws/aws-sdk-go-v2/archive/7502ff360b1c3b79cbe117437327f6ff5fb89f65.tar.gz",
|
|
root_prog_node,
|
|
);
|
|
defer alloc.destroy(pkg);
|
|
defer pkg.deinit();
|
|
}
|
|
test "fetch one and unpack" {
|
|
const pkg = try fetchOneAndUnpack(
|
|
std.testing.allocator,
|
|
"test-pkg",
|
|
.{
|
|
.url = "https://github.com/aws/aws-sdk-go-v2/archive/7502ff360b1c3b79cbe117437327f6ff5fb89f65.tar.gz",
|
|
.hash = "1220a414719bff14c9362fb1c695e3346fa12ec2e728bae5757a57aae7738916ffd2",
|
|
},
|
|
);
|
|
defer std.testing.allocator.destroy(pkg);
|
|
defer pkg.deinit();
|
|
try std.testing.expectEqualStrings(
|
|
"test-pkg/p/1220a414719bff14c9362fb1c695e3346fa12ec2e728bae5757a57aae7738916ffd2",
|
|
pkg.root_src_directory.path.?,
|
|
);
|
|
}
|
|
test "isTarAttachment" {
|
|
try std.testing.expect(isTarAttachment("attaChment; FILENAME=\"stuff.tar.gz\"; size=42"));
|
|
try std.testing.expect(isTarAttachment("attachment; filename*=\"stuff.tar.gz\""));
|
|
try std.testing.expect(isTarAttachment("ATTACHMENT; filename=\"stuff.tar.gz\""));
|
|
try std.testing.expect(isTarAttachment("attachment; FileName=\"stuff.tar.gz\""));
|
|
try std.testing.expect(isTarAttachment("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz"));
|
|
|
|
try std.testing.expect(!isTarAttachment("attachment FileName=\"stuff.tar.gz\""));
|
|
try std.testing.expect(!isTarAttachment("attachment; FileName=\"stuff.tar\""));
|
|
try std.testing.expect(!isTarAttachment("attachment; FileName\"stuff.gz\""));
|
|
try std.testing.expect(!isTarAttachment("attachment; size=42"));
|
|
try std.testing.expect(!isTarAttachment("inline; size=42"));
|
|
try std.testing.expect(!isTarAttachment("FileName=\"stuff.tar.gz\"; attachment;"));
|
|
try std.testing.expect(!isTarAttachment("FileName=\"stuff.tar.gz\";"));
|
|
}
|