pos/build.zig
Emil Lerch 1f6cb4eb0c
All checks were successful
Generic zig build / build (push) Successful in 28s
add help
2025-10-15 11:24:01 -07:00

472 lines
20 KiB
Zig

const std = @import("std");
// Although this function looks imperative, it does not perform the build
// directly and instead it mutates the build graph (`b`) that will be then
// executed by an external runner. The functions in `std.Build` implement a DSL
// for defining build steps and express dependencies between them, allowing the
// build runner to parallelize the build automatically (and the cache system to
// know when a step doesn't need to be re-run).
pub fn build(b: *std.Build) !void {
// Standard target options allow the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const long_tests = b.option(bool, "long-tests", "Run long-running tests") orelse false;
// It's also possible to define more custom flags to toggle optional features
// of this build script using `b.option()`. All defined flags (including
// target and optimize options) will be listed when running `zig build --help`
// in this directory.
// We need to use curl for this as the domain doesn't work with zig TLS
const download_link_step = DownloadStep(
"link-4.1b",
"https://www.link.cs.cmu.edu/link/ftp-site/link-grammar/link-4.1b/unix/link-4.1b.tar.gz",
).create(b);
const upstream = download_link_step.dependency(b, .{});
// mod tests fail because the library has unconditional output to stdout,
// which messes with the test runner. So...we need to patch utilities.c with search:
// printf(" Opening
//
// replace:
// if ( verbosity > 0 ) printf(" Opening
//
const Substitutions = struct { orig: []const u8, new: []const u8 };
const patterns: [4]Substitutions = .{
.{
.orig = "\tprintf(\" Opening %s\\n\", filename); ",
.new = "\tif ( verbosity > 0 ) printf(\" Opening %s\\n\", filename); ",
},
.{
.orig = "\tprintf(\" Opening %s\\n\", filename);",
.new = "\tif ( verbosity > 0 ) printf(\" Opening %s\\n\", filename);",
},
.{
.orig = "\t printf(\" Opening %s\\n\", completename); ",
.new = "\t if ( verbosity > 0 ) printf(\" Opening %s\\n\", completename); ",
},
.{
.orig = " printf(\" Opening %s\\n\", completename); ",
.new = " if ( verbosity > 0 ) printf(\" Opening %s\\n\", completename); ",
},
};
// pat5 is same as pat3
//const pat5 = \t printf(" Opening %s\n", completename); ";
// Create sed-lite run step to patch utilities.c
const sed_lite = b.dependency("sed_lite", .{});
const sed_lite_exe = sed_lite.artifact("sed-lite");
const patch_cmd = b.addRunArtifact(sed_lite_exe);
patch_cmd.addArg("-sL");
for (patterns) |s| {
patch_cmd.addArg(s.orig);
patch_cmd.addArg(s.new);
}
const util_src = try std.fs.path.join(b.allocator, &.{
"src",
"utilities.c",
});
patch_cmd.addFileArg(upstream.path(util_src));
patch_cmd.step.dependOn(&download_link_step.step);
const lib = b.addLibrary(.{
.name = "link",
.linkage = .static,
.root_module = b.createModule(.{
.target = target,
.optimize = optimize,
.link_libc = true,
}),
});
lib.step.dependOn(&patch_cmd.step);
lib.addIncludePath(upstream.path("include"));
lib.addCSourceFiles(.{
.root = upstream.path("src"),
.files = &.{
"analyze-linkage.c",
"and.c",
"api.c",
"build-disjuncts.c",
"command-line.c",
"constituents.c",
"count.c",
"error.c",
"extract-links.c",
"fast-match.c",
"idiom.c",
"linkset.c",
"massage.c",
"post-process.c",
"pp_knowledge.c",
"pp_lexer.c",
"pp_linkset.c",
"preparation.c",
"print-util.c",
"print.c",
"prune.c",
"read-dict.c",
"resources.c",
"string-set.c",
"tokenize.c",
"utilities.c",
"word-file.c",
},
.flags = &.{
"-O2",
"-fwrapv",
},
});
// This creates a module, which represents a collection of source files alongside
// some compilation options, such as optimization mode and linked system libraries.
// Zig modules are the preferred way of making Zig code available to consumers.
// addModule defines a module that we intend to make available for importing
// to our consumers. We must give it a name because a Zig package can expose
// multiple modules and consumers will need to be able to specify which
// module they want to access.
const mod = b.addModule("pos", .{
// The root source file is the "entry point" of this module. Users of
// this module will only be able to access public declarations contained
// in this file, which means that if you have declarations that you
// intend to expose to consumers that were defined in other files part
// of this module, you will have to make sure to re-export them from
// the root file.
.root_source_file = b.path("src/root.zig"),
// Later on we'll use this module as the root module of a test executable
// which requires us to specify a target.
.target = target,
});
const options = b.addOptions();
options.addOption(bool, "long_tests", long_tests);
const git_describe = b.run(&.{ "git", "describe", "--always", "--dirty" });
options.addOption([]const u8, "version", git_describe);
const options_module = options.createModule();
mod.addImport("build_options", options_module);
mod.linkLibrary(lib);
mod.addIncludePath(upstream.path("include"));
// Here we define an executable. An executable needs to have a root module
// which needs to expose a `main` function. While we could add a main function
// to the module defined above, it's sometimes preferable to split business
// business logic and the CLI into two separate modules.
//
// If your goal is to create a Zig library for others to use, consider if
// it might benefit from also exposing a CLI tool. A parser library for a
// data serialization format could also bundle a CLI syntax checker, for example.
//
// If instead your goal is to create an executable, consider if users might
// be interested in also being able to embed the core functionality of your
// program in their own executable in order to avoid the overhead involved in
// subprocessing your CLI tool.
//
// If neither case applies to you, feel free to delete the declaration you
// don't need and to put everything under a single module.
const exe = b.addExecutable(.{
.name = "pos",
.root_module = b.createModule(.{
// b.createModule defines a new module just like b.addModule but,
// unlike b.addModule, it does not expose the module to consumers of
// this package, which is why in this case we don't have to give it a name.
.root_source_file = b.path("src/main.zig"),
// Target and optimization levels must be explicitly wired in when
// defining an executable or library (in the root module), and you
// can also hardcode a specific target for an executable or library
// definition if desireable (e.g. firmware for embedded devices).
.target = target,
.optimize = optimize,
// List of modules available for import in source files part of the
// root module.
.imports = &.{
// Here "pos" is the name you will use in your source code to
// import this module (e.g. `@import("pos")`). The name is
// repeated because you are allowed to rename your imports, which
// can be extremely useful in case of collisions (which can happen
// importing modules from different packages).
.{ .name = "pos", .module = mod },
.{ .name = "build_options", .module = options_module },
},
}),
});
// Copy data files to install directory
const install_data = b.addInstallDirectory(.{
.source_dir = upstream.path("data"),
.install_dir = .{ .custom = "share" },
.install_subdir = "link",
});
install_data.step.dependOn(&download_link_step.step);
// This declares intent for the executable to be installed into the
// install prefix when running `zig build` (i.e. when executing the default
// step). By default the install prefix is `zig-out/` but can be overridden
// by passing `--prefix` or `-p`.
b.installArtifact(exe);
b.getInstallStep().dependOn(&install_data.step);
// This creates a top level step. Top level steps have a name and can be
// invoked by name when running `zig build` (e.g. `zig build run`).
// This will evaluate the `run` step rather than the default step.
// For a top level step to actually do something, it must depend on other
// steps (e.g. a Run step, as we will see in a moment).
const run_step = b.step("run", "Run the app");
// This creates a RunArtifact step in the build graph. A RunArtifact step
// invokes an executable compiled by Zig. Steps will only be executed by the
// runner if invoked directly by the user (in the case of top level steps)
// or if another step depends on it, so it's up to you to define when and
// how this Run step will be executed. In our case we want to run it when
// the user runs `zig build run`, so we create a dependency link.
const run_cmd = b.addRunArtifact(exe);
run_step.dependOn(&run_cmd.step);
// By making the run step depend on the default step, it will be run from the
// installation directory rather than directly from within the cache directory.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// Creates an executable that will run `test` blocks from the provided module.
// Here `mod` needs to define a target, which is why earlier we made sure to
// set the releative field.
const mod_tests = b.addTest(.{
.root_module = mod,
});
// A run step that will run the test executable.
const run_mod_tests = b.addRunArtifact(mod_tests);
run_mod_tests.setCwd(.{ .cwd_relative = b.getInstallPath(.bin, "") });
run_mod_tests.step.dependOn(&install_data.step);
// Creates an executable that will run `test` blocks from the executable's
// root module. Note that test executables only test one module at a time,
// hence why we have to create two separate ones.
const exe_tests = b.addTest(.{
.root_module = exe.root_module,
});
// A run step that will run the second test executable.
const run_exe_tests = b.addRunArtifact(exe_tests);
run_exe_tests.setCwd(.{ .cwd_relative = b.getInstallPath(.bin, "") });
run_exe_tests.step.dependOn(&install_data.step);
// A top level step for running all tests. dependOn can be called multiple
// times and since the two run steps do not depend on one another, this will
// make the two of them run in parallel.
const test_step = b.step("test", "Run tests");
test_step.dependOn(&run_mod_tests.step);
test_step.dependOn(&run_exe_tests.step);
// Just like flags, top level steps are also listed in the `--help` menu.
//
// The Zig build system is entirely implemented in userland, which means
// that it cannot hook into private compiler APIs. All compilation work
// orchestrated by the build system will result in other Zig compiler
// subcommands being invoked with the right flags defined. You can observe
// these invocations when one fails (or you pass a flag to increase
// verbosity) to validate assumptions and diagnose problems.
//
// Lastly, the Zig build system is relatively simple and self-contained,
// and reading its source code will allow you to master it.
}
fn DownloadStep(comptime name: []const u8, comptime link: []const u8) type {
return struct {
step: std.Build.Step,
builder: *std.Build,
const download_link = link;
const download_uri = std.Uri.parse(link) catch @compileError("download link is not a valid Uri");
const file_type: enum {
targz,
zip,
} = if (std.mem.endsWith(u8, link, ".tar.gz")) .targz else if (std.mem.endsWith(u8, link, "zip")) .zip else @compileError("can only download tar.gz or zip files");
const Self = @This();
const Dependency = struct {
build_root: []const u8,
build: *std.Build,
download: *Self,
pub fn path(self: Dependency, sub_path: []const u8) std.Build.LazyPath {
const cache_path = (self.download.getOutputPath() catch @panic("OOM")).path;
const full_path = std.fs.path.join(self.build.allocator, &.{ cache_path, sub_path }) catch @panic("OOM");
return .{
.src_path = .{
.owner = self.build,
.sub_path = full_path,
},
};
}
};
fn fileName(uri: std.Uri) []const u8 {
const path = switch (uri.path) {
.raw => |r| r,
.percent_encoded => |p| p,
};
var it = std.mem.splitBackwardsScalar(u8, path, std.fs.path.sep);
return it.first();
}
fn fileNameNoExtension() []const u8 {
const file_name = fileName(download_uri);
return switch (file_type) {
.targz => file_name[0..std.mem.lastIndexOf(u8, file_name, ".tar.gz").?],
.zip => file_name[0..std.mem.lastIndexOf(u8, file_name, ".zip").?],
};
}
pub fn create(builder: *std.Build) *Self {
const self = builder.allocator.create(Self) catch @panic("OOM");
self.* = .{
.step = std.Build.Step.init(.{
.id = .custom,
.name = "download " ++ name,
.owner = builder,
.makeFn = make,
}),
.builder = builder,
};
return self;
}
const Algo = std.crypto.hash.sha2.Sha256;
pub fn getOutputPath(self: *Self) !struct { path: []const u8, hash: [Algo.digest_length]u8 } {
var hasher = Algo.init(.{});
hasher.update(download_link);
var cache_hash: [Algo.digest_length]u8 = undefined;
hasher.final(&cache_hash);
const cache_dir = try std.fs.path.join(self.builder.allocator, &[_][]const u8{ self.builder.cache_root.path.?, "o", try std.fmt.allocPrint(self.builder.allocator, "{s}", .{std.fmt.bytesToHex(cache_hash, .lower)}), fileNameNoExtension() });
return .{
.path = cache_dir,
.hash = cache_hash,
};
}
fn make(step: *std.Build.Step, options: std.Build.Step.MakeOptions) anyerror!void {
_ = options;
const self: *Self = @fieldParentPtr("step", step);
const model_dir = fileNameNoExtension();
// Create a cache hash based on the URL
var hasher = Algo.init(.{});
hasher.update(link);
var cache_hash: [Algo.digest_length]u8 = undefined;
hasher.final(&cache_hash);
const cache_dir = try std.fs.path.join(self.builder.allocator, &[_][]const u8{ self.builder.cache_root.path.?, "o", try std.fmt.allocPrint(self.builder.allocator, "{s}", .{std.fmt.bytesToHex(cache_hash, .lower)}) });
const cached_model_dir = try std.fs.path.join(
self.builder.allocator,
&[_][]const u8{ cache_dir, model_dir },
);
defer self.builder.allocator.free(cached_model_dir);
// Check if already cached
if (std.fs.cwd().access(cached_model_dir, .{})) |_| {
step.result_cached = true;
return;
} else |_| {}
// Not cached, need to download
std.fs.cwd().makePath(cache_dir) catch @panic("Could not create cache directory");
const archive = try std.fs.path.join(
self.builder.allocator,
&[_][]const u8{ cache_dir, fileName(download_uri) },
);
defer self.builder.allocator.free(archive);
// Download
const download_result = std.process.Child.run(.{
.allocator = self.builder.allocator,
.argv = &.{ "curl", "-s", "-o", archive, link },
}) catch return error.DownloadFailed;
if (download_result.term.Exited != 0) return error.DownloadFailed;
switch (file_type) {
.zip => {
// Extract to cache using stdlib
var zip_file = std.fs.cwd().openFile(archive, .{}) catch return error.UnzipFailed;
defer zip_file.close();
var cache_dir_handle = std.fs.cwd().openDir(cache_dir, .{}) catch return error.UnzipFailed;
defer cache_dir_handle.close();
var zip_file_buffer: [4096]u8 = undefined;
var zip_file_reader = zip_file.reader(&zip_file_buffer);
std.zip.extract(cache_dir_handle, &zip_file_reader, .{}) catch return error.UnzipFailed;
step.result_cached = false;
},
.targz => {
var archive_file = std.fs.cwd().openFile(archive, .{}) catch return error.ExtractFailed;
defer archive_file.close();
var buf: [4096]u8 = undefined;
var file_reader = archive_file.reader(&buf);
const reader = &file_reader.interface;
var cache_dir_handle = std.fs.cwd().openDir(cache_dir, .{}) catch return error.ExtractFailed;
defer cache_dir_handle.close();
var gz_buf: [std.compress.flate.max_window_len]u8 = undefined;
var decompress = std.compress.flate.Decompress.init(reader, .gzip, &gz_buf);
std.tar.pipeToFileSystem(
cache_dir_handle,
&decompress.reader,
.{ .mode_mode = .ignore },
) catch return error.ExtractFailed;
},
}
}
pub fn dependency(
self: *Self,
b: *std.Build,
args: anytype,
) *Dependency {
_ = args;
const output = self.getOutputPath() catch @panic("cannot get output path");
const dep = b.allocator.create(Dependency) catch @panic("OOM");
dep.* = .{
.download = self,
.build = b,
.build_root = output.path,
};
return dep;
}
const UserValue = union(enum) {
flag: void,
scalar: []const u8,
list: std.array_list.Managed([]const u8),
map: std.StringHashMap(*const UserValue),
lazy_path: std.Build.LazyPath,
lazy_path_list: std.array_list.Managed(std.Build.LazyPath),
};
};
}