const std = @import("std"); // Although this function looks imperative, it does not perform the build // directly and instead it mutates the build graph (`b`) that will be then // executed by an external runner. The functions in `std.Build` implement a DSL // for defining build steps and express dependencies between them, allowing the // build runner to parallelize the build automatically (and the cache system to // know when a step doesn't need to be re-run). pub fn build(b: *std.Build) void { // Standard target options allow the person running `zig build` to choose // what target to build for. Here we do not override the defaults, which // means any target is allowed, and the default is native. Other options // for restricting supported target set are available. const target = b.standardTargetOptions(.{}); // Standard optimization options allow the person running `zig build` to select // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not // set a preferred release mode, allowing the user to decide how to optimize. const optimize = b.standardOptimizeOption(.{}); // It's also possible to define more custom flags to toggle optional features // of this build script using `b.option()`. All defined flags (including // target and optimize options) will be listed when running `zig build --help` // in this directory. // We need to use curl for this as the domain doesn't work with zig TLS const download_link_step = DownloadStep("https://www.link.cs.cmu.edu/link/ftp-site/link-grammar/link-4.1b/unix/link-4.1b.tar.gz").create(b); const upstream = download_link_step.dependency(b, .{}); const lib = b.addLibrary(.{ .name = "link", .linkage = .static, .root_module = b.createModule(.{ .target = target, .optimize = optimize, .link_libc = true, }), }); lib.step.dependOn(&download_link_step.step); lib.addIncludePath(upstream.path("include")); lib.addCSourceFiles(.{ .root = upstream.path("src"), .files = &.{ "analyze-linkage.c", "and.c", "api-example.c", "api.c", "build-disjuncts.c", "command-line.c", "constituents.c", "count.c", "error.c", "extract-links.c", "fast-match.c", "idiom.c", "linkset.c", "massage.c", "parse.c", "post-process.c", "pp_knowledge.c", "pp_lexer.c", "pp_linkset.c", "preparation.c", "print-util.c", "print.c", "prune.c", "read-dict.c", "resources.c", "string-set.c", "tokenize.c", "utilities.c", "word-file.c", "www-parse.c", }, }); // This creates a module, which represents a collection of source files alongside // some compilation options, such as optimization mode and linked system libraries. // Zig modules are the preferred way of making Zig code available to consumers. // addModule defines a module that we intend to make available for importing // to our consumers. We must give it a name because a Zig package can expose // multiple modules and consumers will need to be able to specify which // module they want to access. const mod = b.addModule("pos", .{ // The root source file is the "entry point" of this module. Users of // this module will only be able to access public declarations contained // in this file, which means that if you have declarations that you // intend to expose to consumers that were defined in other files part // of this module, you will have to make sure to re-export them from // the root file. .root_source_file = b.path("src/root.zig"), // Later on we'll use this module as the root module of a test executable // which requires us to specify a target. .target = target, }); mod.linkLibrary(lib); mod.addIncludePath(upstream.path("include")); // Here we define an executable. An executable needs to have a root module // which needs to expose a `main` function. While we could add a main function // to the module defined above, it's sometimes preferable to split business // business logic and the CLI into two separate modules. // // If your goal is to create a Zig library for others to use, consider if // it might benefit from also exposing a CLI tool. A parser library for a // data serialization format could also bundle a CLI syntax checker, for example. // // If instead your goal is to create an executable, consider if users might // be interested in also being able to embed the core functionality of your // program in their own executable in order to avoid the overhead involved in // subprocessing your CLI tool. // // If neither case applies to you, feel free to delete the declaration you // don't need and to put everything under a single module. const exe = b.addExecutable(.{ .name = "pos", .root_module = b.createModule(.{ // b.createModule defines a new module just like b.addModule but, // unlike b.addModule, it does not expose the module to consumers of // this package, which is why in this case we don't have to give it a name. .root_source_file = b.path("src/main.zig"), // Target and optimization levels must be explicitly wired in when // defining an executable or library (in the root module), and you // can also hardcode a specific target for an executable or library // definition if desireable (e.g. firmware for embedded devices). .target = target, .optimize = optimize, // List of modules available for import in source files part of the // root module. .imports = &.{ // Here "pos" is the name you will use in your source code to // import this module (e.g. `@import("pos")`). The name is // repeated because you are allowed to rename your imports, which // can be extremely useful in case of collisions (which can happen // importing modules from different packages). .{ .name = "pos", .module = mod }, }, }), }); // Copy data files to install directory const install_data = b.addInstallDirectory(.{ .source_dir = upstream.path("data"), .install_dir = .bin, .install_subdir = "data", }); install_data.step.dependOn(&download_link_step.step); // This declares intent for the executable to be installed into the // install prefix when running `zig build` (i.e. when executing the default // step). By default the install prefix is `zig-out/` but can be overridden // by passing `--prefix` or `-p`. b.installArtifact(exe); b.getInstallStep().dependOn(&install_data.step); // This creates a top level step. Top level steps have a name and can be // invoked by name when running `zig build` (e.g. `zig build run`). // This will evaluate the `run` step rather than the default step. // For a top level step to actually do something, it must depend on other // steps (e.g. a Run step, as we will see in a moment). const run_step = b.step("run", "Run the app"); // This creates a RunArtifact step in the build graph. A RunArtifact step // invokes an executable compiled by Zig. Steps will only be executed by the // runner if invoked directly by the user (in the case of top level steps) // or if another step depends on it, so it's up to you to define when and // how this Run step will be executed. In our case we want to run it when // the user runs `zig build run`, so we create a dependency link. const run_cmd = b.addRunArtifact(exe); run_step.dependOn(&run_cmd.step); // By making the run step depend on the default step, it will be run from the // installation directory rather than directly from within the cache directory. run_cmd.step.dependOn(b.getInstallStep()); // This allows the user to pass arguments to the application in the build // command itself, like this: `zig build run -- arg1 arg2 etc` if (b.args) |args| { run_cmd.addArgs(args); } // Creates an executable that will run `test` blocks from the provided module. // Here `mod` needs to define a target, which is why earlier we made sure to // set the releative field. const mod_tests = b.addTest(.{ .root_module = mod, }); // A run step that will run the test executable. const run_mod_tests = b.addRunArtifact(mod_tests); run_mod_tests.setCwd(.{ .cwd_relative = b.getInstallPath(.bin, "") }); run_mod_tests.step.dependOn(&install_data.step); // Creates an executable that will run `test` blocks from the executable's // root module. Note that test executables only test one module at a time, // hence why we have to create two separate ones. const exe_tests = b.addTest(.{ .root_module = exe.root_module, }); // A run step that will run the second test executable. const run_exe_tests = b.addRunArtifact(exe_tests); run_exe_tests.setCwd(.{ .cwd_relative = b.getInstallPath(.bin, "") }); run_exe_tests.step.dependOn(&install_data.step); // A top level step for running all tests. dependOn can be called multiple // times and since the two run steps do not depend on one another, this will // make the two of them run in parallel. const test_step = b.step("test", "Run tests"); // mod tests fail because we need to patch utilities.c with search: // printf(" Opening // // replace: // if ( verbosity > 0 ) printf(" Opening //test_step.dependOn(&run_mod_tests.step); test_step.dependOn(&run_exe_tests.step); // Just like flags, top level steps are also listed in the `--help` menu. // // The Zig build system is entirely implemented in userland, which means // that it cannot hook into private compiler APIs. All compilation work // orchestrated by the build system will result in other Zig compiler // subcommands being invoked with the right flags defined. You can observe // these invocations when one fails (or you pass a flag to increase // verbosity) to validate assumptions and diagnose problems. // // Lastly, the Zig build system is relatively simple and self-contained, // and reading its source code will allow you to master it. } fn DownloadStep(comptime link: []const u8) type { return struct { step: std.Build.Step, builder: *std.Build, const download_link = link; const download_uri = std.Uri.parse(link) catch @compileError("download link is not a valid Uri"); const file_type: enum { targz, zip, } = if (std.mem.endsWith(u8, link, ".tar.gz")) .targz else if (std.mem.endsWith(u8, link, "zip")) .zip else @compileError("can only download tar.gz or zip files"); const Self = @This(); const Dependency = struct { build_root: []const u8, build: *std.Build, download: *Self, pub fn path(self: Dependency, sub_path: []const u8) std.Build.LazyPath { const cache_path = (self.download.getOutputPath() catch @panic("OOM")).path; const full_path = std.fs.path.join(self.build.allocator, &.{ cache_path, sub_path }) catch @panic("OOM"); return .{ .src_path = .{ .owner = self.build, .sub_path = full_path, }, }; } }; fn fileName(uri: std.Uri) []const u8 { const path = switch (uri.path) { .raw => |r| r, .percent_encoded => |p| p, }; var it = std.mem.splitBackwardsScalar(u8, path, '/'); return it.first(); } fn fileNameNoExtension() []const u8 { const file_name = fileName(download_uri); return switch (file_type) { .targz => file_name[0..std.mem.lastIndexOf(u8, file_name, ".tar.gz").?], .zip => file_name[0..std.mem.lastIndexOf(u8, file_name, ".zip").?], }; } pub fn create(builder: *std.Build) *Self { const self = builder.allocator.create(Self) catch @panic("OOM"); self.* = .{ .step = std.Build.Step.init(.{ .id = .custom, .name = "download-model", .owner = builder, .makeFn = make, }), .builder = builder, }; return self; } pub fn getOutputPath(self: *Self) !struct { path: []const u8, hash: u64 } { var hasher = std.hash.Wyhash.init(0); hasher.update(download_link); const cache_hash = hasher.final(); var cache_dir_buf: [std.fs.max_path_bytes]u8 = undefined; const cache_dir = std.fmt.bufPrint(&cache_dir_buf, "{s}/o/{x}/{s}", .{ self.builder.cache_root.path.?, cache_hash, fileNameNoExtension() }) catch @panic("path too long"); return .{ .path = try self.builder.allocator.dupe(u8, cache_dir), .hash = cache_hash, }; } fn make(step: *std.Build.Step, options: std.Build.Step.MakeOptions) anyerror!void { _ = options; const self: *Self = @fieldParentPtr("step", step); const model_dir = fileNameNoExtension(); // Create a cache hash based on the URL var hasher = std.hash.Wyhash.init(0); hasher.update(link); const cache_hash = hasher.final(); var cache_dir_buf: [std.fs.max_path_bytes]u8 = undefined; const cache_dir = std.fmt.bufPrint(&cache_dir_buf, "{s}/o/{x}", .{ self.builder.cache_root.path.?, cache_hash }) catch @panic("path too long"); const cached_model_dir = std.fmt.allocPrint( self.builder.allocator, "{s}/{s}", .{ cache_dir, model_dir }, ) catch @panic("OOM"); defer self.builder.allocator.free(cached_model_dir); // Check if already cached if (std.fs.cwd().access(cached_model_dir, .{})) |_| { step.result_cached = true; return; } else |_| {} // Not cached, need to download std.fs.cwd().makePath(cache_dir) catch @panic("Could not create cache directory"); const archive = std.fmt.allocPrint( self.builder.allocator, "{s}/{s}", .{ cache_dir, fileName(download_uri) }, ) catch @panic("OOM"); defer self.builder.allocator.free(archive); // Download const download_result = std.process.Child.run(.{ .allocator = self.builder.allocator, .argv = &.{ "curl", "-s", "-o", archive, link }, }) catch return error.DownloadFailed; if (download_result.term.Exited != 0) return error.DownloadFailed; switch (file_type) { .zip => { // Extract to cache using stdlib var zip_file = std.fs.cwd().openFile(archive, .{}) catch return error.UnzipFailed; defer zip_file.close(); var cache_dir_handle = std.fs.cwd().openDir(cache_dir, .{}) catch return error.UnzipFailed; defer cache_dir_handle.close(); var zip_file_buffer: [4096]u8 = undefined; var zip_file_reader = zip_file.reader(&zip_file_buffer); std.zip.extract(cache_dir_handle, &zip_file_reader, .{}) catch return error.UnzipFailed; step.result_cached = false; }, .targz => { var archive_file = std.fs.cwd().openFile(archive, .{}) catch return error.ExtractFailed; defer archive_file.close(); var buf: [4096]u8 = undefined; var file_reader = archive_file.reader(&buf); const reader = &file_reader.interface; var cache_dir_handle = std.fs.cwd().openDir(cache_dir, .{}) catch return error.ExtractFailed; defer cache_dir_handle.close(); var gz_buf: [std.compress.flate.max_window_len]u8 = undefined; var decompress = std.compress.flate.Decompress.init(reader, .gzip, &gz_buf); std.tar.pipeToFileSystem( cache_dir_handle, &decompress.reader, .{ .mode_mode = .ignore }, ) catch return error.ExtractFailed; }, } } pub fn dependency( self: *Self, b: *std.Build, args: anytype, ) *Dependency { _ = args; const output = self.getOutputPath() catch @panic("cannot get output path"); const dep = b.allocator.create(Dependency) catch @panic("OOM"); dep.* = .{ .download = self, .build = b, .build_root = output.path, }; return dep; } const UserValue = union(enum) { flag: void, scalar: []const u8, list: std.array_list.Managed([]const u8), map: std.StringHashMap(*const UserValue), lazy_path: std.Build.LazyPath, lazy_path_list: std.array_list.Managed(std.Build.LazyPath), }; }; }