472 lines
20 KiB
Zig
472 lines
20 KiB
Zig
const std = @import("std");
|
|
|
|
// Although this function looks imperative, it does not perform the build
|
|
// directly and instead it mutates the build graph (`b`) that will be then
|
|
// executed by an external runner. The functions in `std.Build` implement a DSL
|
|
// for defining build steps and express dependencies between them, allowing the
|
|
// build runner to parallelize the build automatically (and the cache system to
|
|
// know when a step doesn't need to be re-run).
|
|
pub fn build(b: *std.Build) !void {
|
|
// Standard target options allow the person running `zig build` to choose
|
|
// what target to build for. Here we do not override the defaults, which
|
|
// means any target is allowed, and the default is native. Other options
|
|
// for restricting supported target set are available.
|
|
const target = b.standardTargetOptions(.{});
|
|
// Standard optimization options allow the person running `zig build` to select
|
|
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
|
|
// set a preferred release mode, allowing the user to decide how to optimize.
|
|
const optimize = b.standardOptimizeOption(.{});
|
|
|
|
const long_tests = b.option(bool, "long-tests", "Run long-running tests") orelse false;
|
|
// It's also possible to define more custom flags to toggle optional features
|
|
// of this build script using `b.option()`. All defined flags (including
|
|
// target and optimize options) will be listed when running `zig build --help`
|
|
// in this directory.
|
|
|
|
// We need to use curl for this as the domain doesn't work with zig TLS
|
|
const download_link_step = DownloadStep(
|
|
"link-4.1b",
|
|
"https://www.link.cs.cmu.edu/link/ftp-site/link-grammar/link-4.1b/unix/link-4.1b.tar.gz",
|
|
).create(b);
|
|
|
|
const upstream = download_link_step.dependency(b, .{});
|
|
|
|
// mod tests fail because the library has unconditional output to stdout,
|
|
// which messes with the test runner. So...we need to patch utilities.c with search:
|
|
// printf(" Opening
|
|
//
|
|
// replace:
|
|
// if ( verbosity > 0 ) printf(" Opening
|
|
//
|
|
|
|
const Substitutions = struct { orig: []const u8, new: []const u8 };
|
|
const patterns: [4]Substitutions = .{
|
|
.{
|
|
.orig = "\tprintf(\" Opening %s\\n\", filename); ",
|
|
.new = "\tif ( verbosity > 0 ) printf(\" Opening %s\\n\", filename); ",
|
|
},
|
|
.{
|
|
.orig = "\tprintf(\" Opening %s\\n\", filename);",
|
|
.new = "\tif ( verbosity > 0 ) printf(\" Opening %s\\n\", filename);",
|
|
},
|
|
.{
|
|
.orig = "\t printf(\" Opening %s\\n\", completename); ",
|
|
.new = "\t if ( verbosity > 0 ) printf(\" Opening %s\\n\", completename); ",
|
|
},
|
|
.{
|
|
.orig = " printf(\" Opening %s\\n\", completename); ",
|
|
.new = " if ( verbosity > 0 ) printf(\" Opening %s\\n\", completename); ",
|
|
},
|
|
};
|
|
|
|
// pat5 is same as pat3
|
|
//const pat5 = \t printf(" Opening %s\n", completename); ";
|
|
|
|
// Create sed-lite run step to patch utilities.c
|
|
const sed_lite = b.dependency("sed_lite", .{});
|
|
const sed_lite_exe = sed_lite.artifact("sed-lite");
|
|
|
|
const patch_cmd = b.addRunArtifact(sed_lite_exe);
|
|
patch_cmd.addArg("-sL");
|
|
for (patterns) |s| {
|
|
patch_cmd.addArg(s.orig);
|
|
patch_cmd.addArg(s.new);
|
|
}
|
|
const util_src = try std.fs.path.join(b.allocator, &.{
|
|
"src",
|
|
"utilities.c",
|
|
});
|
|
patch_cmd.addFileArg(upstream.path(util_src));
|
|
patch_cmd.step.dependOn(&download_link_step.step);
|
|
|
|
const lib = b.addLibrary(.{
|
|
.name = "link",
|
|
.linkage = .static,
|
|
.root_module = b.createModule(.{
|
|
.target = target,
|
|
.optimize = optimize,
|
|
.link_libc = true,
|
|
}),
|
|
});
|
|
|
|
lib.step.dependOn(&patch_cmd.step);
|
|
lib.addIncludePath(upstream.path("include"));
|
|
|
|
lib.addCSourceFiles(.{
|
|
.root = upstream.path("src"),
|
|
.files = &.{
|
|
"analyze-linkage.c",
|
|
"and.c",
|
|
"api.c",
|
|
"build-disjuncts.c",
|
|
"command-line.c",
|
|
"constituents.c",
|
|
"count.c",
|
|
"error.c",
|
|
"extract-links.c",
|
|
"fast-match.c",
|
|
"idiom.c",
|
|
"linkset.c",
|
|
"massage.c",
|
|
"post-process.c",
|
|
"pp_knowledge.c",
|
|
"pp_lexer.c",
|
|
"pp_linkset.c",
|
|
"preparation.c",
|
|
"print-util.c",
|
|
"print.c",
|
|
"prune.c",
|
|
"read-dict.c",
|
|
"resources.c",
|
|
"string-set.c",
|
|
"tokenize.c",
|
|
"utilities.c",
|
|
"word-file.c",
|
|
},
|
|
.flags = &.{
|
|
"-O2",
|
|
"-fwrapv",
|
|
},
|
|
});
|
|
|
|
// This creates a module, which represents a collection of source files alongside
|
|
// some compilation options, such as optimization mode and linked system libraries.
|
|
// Zig modules are the preferred way of making Zig code available to consumers.
|
|
// addModule defines a module that we intend to make available for importing
|
|
// to our consumers. We must give it a name because a Zig package can expose
|
|
// multiple modules and consumers will need to be able to specify which
|
|
// module they want to access.
|
|
const mod = b.addModule("pos", .{
|
|
// The root source file is the "entry point" of this module. Users of
|
|
// this module will only be able to access public declarations contained
|
|
// in this file, which means that if you have declarations that you
|
|
// intend to expose to consumers that were defined in other files part
|
|
// of this module, you will have to make sure to re-export them from
|
|
// the root file.
|
|
.root_source_file = b.path("src/root.zig"),
|
|
// Later on we'll use this module as the root module of a test executable
|
|
// which requires us to specify a target.
|
|
.target = target,
|
|
});
|
|
const options = b.addOptions();
|
|
options.addOption(bool, "long_tests", long_tests);
|
|
|
|
const git_describe = b.run(&.{ "git", "describe", "--always", "--dirty" });
|
|
options.addOption([]const u8, "version", git_describe);
|
|
|
|
const options_module = options.createModule();
|
|
mod.addImport("build_options", options_module);
|
|
mod.linkLibrary(lib);
|
|
mod.addIncludePath(upstream.path("include"));
|
|
|
|
// Here we define an executable. An executable needs to have a root module
|
|
// which needs to expose a `main` function. While we could add a main function
|
|
// to the module defined above, it's sometimes preferable to split business
|
|
// business logic and the CLI into two separate modules.
|
|
//
|
|
// If your goal is to create a Zig library for others to use, consider if
|
|
// it might benefit from also exposing a CLI tool. A parser library for a
|
|
// data serialization format could also bundle a CLI syntax checker, for example.
|
|
//
|
|
// If instead your goal is to create an executable, consider if users might
|
|
// be interested in also being able to embed the core functionality of your
|
|
// program in their own executable in order to avoid the overhead involved in
|
|
// subprocessing your CLI tool.
|
|
//
|
|
// If neither case applies to you, feel free to delete the declaration you
|
|
// don't need and to put everything under a single module.
|
|
const exe = b.addExecutable(.{
|
|
.name = "pos",
|
|
.root_module = b.createModule(.{
|
|
// b.createModule defines a new module just like b.addModule but,
|
|
// unlike b.addModule, it does not expose the module to consumers of
|
|
// this package, which is why in this case we don't have to give it a name.
|
|
.root_source_file = b.path("src/main.zig"),
|
|
// Target and optimization levels must be explicitly wired in when
|
|
// defining an executable or library (in the root module), and you
|
|
// can also hardcode a specific target for an executable or library
|
|
// definition if desireable (e.g. firmware for embedded devices).
|
|
.target = target,
|
|
.optimize = optimize,
|
|
// List of modules available for import in source files part of the
|
|
// root module.
|
|
.imports = &.{
|
|
// Here "pos" is the name you will use in your source code to
|
|
// import this module (e.g. `@import("pos")`). The name is
|
|
// repeated because you are allowed to rename your imports, which
|
|
// can be extremely useful in case of collisions (which can happen
|
|
// importing modules from different packages).
|
|
.{ .name = "pos", .module = mod },
|
|
.{ .name = "build_options", .module = options_module },
|
|
},
|
|
}),
|
|
});
|
|
|
|
// Copy data files to install directory
|
|
const install_data = b.addInstallDirectory(.{
|
|
.source_dir = upstream.path("data"),
|
|
.install_dir = .{ .custom = "share" },
|
|
.install_subdir = "link",
|
|
});
|
|
install_data.step.dependOn(&download_link_step.step);
|
|
|
|
// This declares intent for the executable to be installed into the
|
|
// install prefix when running `zig build` (i.e. when executing the default
|
|
// step). By default the install prefix is `zig-out/` but can be overridden
|
|
// by passing `--prefix` or `-p`.
|
|
b.installArtifact(exe);
|
|
b.getInstallStep().dependOn(&install_data.step);
|
|
|
|
// This creates a top level step. Top level steps have a name and can be
|
|
// invoked by name when running `zig build` (e.g. `zig build run`).
|
|
// This will evaluate the `run` step rather than the default step.
|
|
// For a top level step to actually do something, it must depend on other
|
|
// steps (e.g. a Run step, as we will see in a moment).
|
|
const run_step = b.step("run", "Run the app");
|
|
|
|
// This creates a RunArtifact step in the build graph. A RunArtifact step
|
|
// invokes an executable compiled by Zig. Steps will only be executed by the
|
|
// runner if invoked directly by the user (in the case of top level steps)
|
|
// or if another step depends on it, so it's up to you to define when and
|
|
// how this Run step will be executed. In our case we want to run it when
|
|
// the user runs `zig build run`, so we create a dependency link.
|
|
const run_cmd = b.addRunArtifact(exe);
|
|
run_step.dependOn(&run_cmd.step);
|
|
|
|
// By making the run step depend on the default step, it will be run from the
|
|
// installation directory rather than directly from within the cache directory.
|
|
run_cmd.step.dependOn(b.getInstallStep());
|
|
|
|
// This allows the user to pass arguments to the application in the build
|
|
// command itself, like this: `zig build run -- arg1 arg2 etc`
|
|
if (b.args) |args| {
|
|
run_cmd.addArgs(args);
|
|
}
|
|
|
|
// Creates an executable that will run `test` blocks from the provided module.
|
|
// Here `mod` needs to define a target, which is why earlier we made sure to
|
|
// set the releative field.
|
|
const mod_tests = b.addTest(.{
|
|
.root_module = mod,
|
|
});
|
|
|
|
// A run step that will run the test executable.
|
|
const run_mod_tests = b.addRunArtifact(mod_tests);
|
|
run_mod_tests.setCwd(.{ .cwd_relative = b.getInstallPath(.bin, "") });
|
|
run_mod_tests.step.dependOn(&install_data.step);
|
|
// Creates an executable that will run `test` blocks from the executable's
|
|
// root module. Note that test executables only test one module at a time,
|
|
// hence why we have to create two separate ones.
|
|
const exe_tests = b.addTest(.{
|
|
.root_module = exe.root_module,
|
|
});
|
|
|
|
// A run step that will run the second test executable.
|
|
const run_exe_tests = b.addRunArtifact(exe_tests);
|
|
run_exe_tests.setCwd(.{ .cwd_relative = b.getInstallPath(.bin, "") });
|
|
run_exe_tests.step.dependOn(&install_data.step);
|
|
|
|
// A top level step for running all tests. dependOn can be called multiple
|
|
// times and since the two run steps do not depend on one another, this will
|
|
// make the two of them run in parallel.
|
|
const test_step = b.step("test", "Run tests");
|
|
|
|
test_step.dependOn(&run_mod_tests.step);
|
|
test_step.dependOn(&run_exe_tests.step);
|
|
|
|
// Just like flags, top level steps are also listed in the `--help` menu.
|
|
//
|
|
// The Zig build system is entirely implemented in userland, which means
|
|
// that it cannot hook into private compiler APIs. All compilation work
|
|
// orchestrated by the build system will result in other Zig compiler
|
|
// subcommands being invoked with the right flags defined. You can observe
|
|
// these invocations when one fails (or you pass a flag to increase
|
|
// verbosity) to validate assumptions and diagnose problems.
|
|
//
|
|
// Lastly, the Zig build system is relatively simple and self-contained,
|
|
// and reading its source code will allow you to master it.
|
|
}
|
|
|
|
fn DownloadStep(comptime name: []const u8, comptime link: []const u8) type {
|
|
return struct {
|
|
step: std.Build.Step,
|
|
builder: *std.Build,
|
|
|
|
const download_link = link;
|
|
const download_uri = std.Uri.parse(link) catch @compileError("download link is not a valid Uri");
|
|
const file_type: enum {
|
|
targz,
|
|
zip,
|
|
} = if (std.mem.endsWith(u8, link, ".tar.gz")) .targz else if (std.mem.endsWith(u8, link, "zip")) .zip else @compileError("can only download tar.gz or zip files");
|
|
|
|
const Self = @This();
|
|
|
|
const Dependency = struct {
|
|
build_root: []const u8,
|
|
build: *std.Build,
|
|
download: *Self,
|
|
|
|
pub fn path(self: Dependency, sub_path: []const u8) std.Build.LazyPath {
|
|
const cache_path = (self.download.getOutputPath() catch @panic("OOM")).path;
|
|
const full_path = std.fs.path.join(self.build.allocator, &.{ cache_path, sub_path }) catch @panic("OOM");
|
|
return .{
|
|
.src_path = .{
|
|
.owner = self.build,
|
|
.sub_path = full_path,
|
|
},
|
|
};
|
|
}
|
|
};
|
|
|
|
fn fileName(uri: std.Uri) []const u8 {
|
|
const path = switch (uri.path) {
|
|
.raw => |r| r,
|
|
.percent_encoded => |p| p,
|
|
};
|
|
var it = std.mem.splitBackwardsScalar(u8, path, std.fs.path.sep);
|
|
return it.first();
|
|
}
|
|
|
|
fn fileNameNoExtension() []const u8 {
|
|
const file_name = fileName(download_uri);
|
|
return switch (file_type) {
|
|
.targz => file_name[0..std.mem.lastIndexOf(u8, file_name, ".tar.gz").?],
|
|
.zip => file_name[0..std.mem.lastIndexOf(u8, file_name, ".zip").?],
|
|
};
|
|
}
|
|
|
|
pub fn create(builder: *std.Build) *Self {
|
|
const self = builder.allocator.create(Self) catch @panic("OOM");
|
|
self.* = .{
|
|
.step = std.Build.Step.init(.{
|
|
.id = .custom,
|
|
.name = "download " ++ name,
|
|
.owner = builder,
|
|
.makeFn = make,
|
|
}),
|
|
.builder = builder,
|
|
};
|
|
return self;
|
|
}
|
|
|
|
const Algo = std.crypto.hash.sha2.Sha256;
|
|
pub fn getOutputPath(self: *Self) !struct { path: []const u8, hash: [Algo.digest_length]u8 } {
|
|
var hasher = Algo.init(.{});
|
|
hasher.update(download_link);
|
|
var cache_hash: [Algo.digest_length]u8 = undefined;
|
|
hasher.final(&cache_hash);
|
|
|
|
const cache_dir = try std.fs.path.join(self.builder.allocator, &[_][]const u8{ self.builder.cache_root.path.?, "o", try std.fmt.allocPrint(self.builder.allocator, "{s}", .{std.fmt.bytesToHex(cache_hash, .lower)}), fileNameNoExtension() });
|
|
|
|
return .{
|
|
.path = cache_dir,
|
|
.hash = cache_hash,
|
|
};
|
|
}
|
|
|
|
fn make(step: *std.Build.Step, options: std.Build.Step.MakeOptions) anyerror!void {
|
|
_ = options;
|
|
const self: *Self = @fieldParentPtr("step", step);
|
|
|
|
const model_dir = fileNameNoExtension();
|
|
|
|
// Create a cache hash based on the URL
|
|
var hasher = Algo.init(.{});
|
|
hasher.update(link);
|
|
var cache_hash: [Algo.digest_length]u8 = undefined;
|
|
hasher.final(&cache_hash);
|
|
|
|
const cache_dir = try std.fs.path.join(self.builder.allocator, &[_][]const u8{ self.builder.cache_root.path.?, "o", try std.fmt.allocPrint(self.builder.allocator, "{s}", .{std.fmt.bytesToHex(cache_hash, .lower)}) });
|
|
|
|
const cached_model_dir = try std.fs.path.join(
|
|
self.builder.allocator,
|
|
&[_][]const u8{ cache_dir, model_dir },
|
|
);
|
|
defer self.builder.allocator.free(cached_model_dir);
|
|
|
|
// Check if already cached
|
|
if (std.fs.cwd().access(cached_model_dir, .{})) |_| {
|
|
step.result_cached = true;
|
|
return;
|
|
} else |_| {}
|
|
|
|
// Not cached, need to download
|
|
std.fs.cwd().makePath(cache_dir) catch @panic("Could not create cache directory");
|
|
|
|
const archive = try std.fs.path.join(
|
|
self.builder.allocator,
|
|
&[_][]const u8{ cache_dir, fileName(download_uri) },
|
|
);
|
|
defer self.builder.allocator.free(archive);
|
|
|
|
// Download
|
|
const download_result = std.process.Child.run(.{
|
|
.allocator = self.builder.allocator,
|
|
.argv = &.{ "curl", "-s", "-o", archive, link },
|
|
}) catch return error.DownloadFailed;
|
|
if (download_result.term.Exited != 0) return error.DownloadFailed;
|
|
|
|
switch (file_type) {
|
|
.zip => {
|
|
// Extract to cache using stdlib
|
|
var zip_file = std.fs.cwd().openFile(archive, .{}) catch return error.UnzipFailed;
|
|
defer zip_file.close();
|
|
|
|
var cache_dir_handle = std.fs.cwd().openDir(cache_dir, .{}) catch return error.UnzipFailed;
|
|
defer cache_dir_handle.close();
|
|
|
|
var zip_file_buffer: [4096]u8 = undefined;
|
|
var zip_file_reader = zip_file.reader(&zip_file_buffer);
|
|
|
|
std.zip.extract(cache_dir_handle, &zip_file_reader, .{}) catch return error.UnzipFailed;
|
|
|
|
step.result_cached = false;
|
|
},
|
|
.targz => {
|
|
var archive_file = std.fs.cwd().openFile(archive, .{}) catch return error.ExtractFailed;
|
|
defer archive_file.close();
|
|
|
|
var buf: [4096]u8 = undefined;
|
|
var file_reader = archive_file.reader(&buf);
|
|
const reader = &file_reader.interface;
|
|
|
|
var cache_dir_handle = std.fs.cwd().openDir(cache_dir, .{}) catch return error.ExtractFailed;
|
|
defer cache_dir_handle.close();
|
|
|
|
var gz_buf: [std.compress.flate.max_window_len]u8 = undefined;
|
|
var decompress = std.compress.flate.Decompress.init(reader, .gzip, &gz_buf);
|
|
std.tar.pipeToFileSystem(
|
|
cache_dir_handle,
|
|
&decompress.reader,
|
|
.{ .mode_mode = .ignore },
|
|
) catch return error.ExtractFailed;
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn dependency(
|
|
self: *Self,
|
|
b: *std.Build,
|
|
args: anytype,
|
|
) *Dependency {
|
|
_ = args;
|
|
const output = self.getOutputPath() catch @panic("cannot get output path");
|
|
const dep = b.allocator.create(Dependency) catch @panic("OOM");
|
|
|
|
dep.* = .{
|
|
.download = self,
|
|
.build = b,
|
|
.build_root = output.path,
|
|
};
|
|
return dep;
|
|
}
|
|
const UserValue = union(enum) {
|
|
flag: void,
|
|
scalar: []const u8,
|
|
list: std.array_list.Managed([]const u8),
|
|
map: std.StringHashMap(*const UserValue),
|
|
lazy_path: std.Build.LazyPath,
|
|
lazy_path_list: std.array_list.Managed(std.Build.LazyPath),
|
|
};
|
|
};
|
|
}
|