diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7d9efed --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM debian:bullseye + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libfontconfig-dev \ + ca-certificates \ + curl \ + xz-utils \ + && curl https://mirror.bazel.build/ziglang.org/builds/zig-linux-x86_64-0.11.0-dev.3886+0c1bfe271.tar.xz | tar -C /usr/local/ -xJ \ + && apt-get -y remove curl xz-utils \ + && ln -s /usr/local/zig*/zig /usr/local/bin \ + && rm -rf /var/lib/apt/lists/* + +ENTRYPOINT ["/usr/local/bin/zig"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a339f72 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Emil Lerch + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..3588a9a --- /dev/null +++ b/build.zig @@ -0,0 +1,86 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "fontfinder", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + configure(exe); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const unit_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + configure(unit_tests); + + const run_unit_tests = b.addRunArtifact(unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); +} + +fn configure(object: anytype) void { + // object.linkage = .static; + object.linkLibC(); + + // Fontconfig must be installed. Docker can also be used (see Dockerfile) + object.addSystemIncludePath("/usr/include"); + object.linkSystemLibrary("fontconfig"); + // object.linkSystemLibrary("expat"); // fontconfig dependency - needed for static builds + object.addLibraryPath("/usr/lib"); + object.addCSourceFile("src/fontconfig.c", &[_][]const u8{"-std=c99"}); +} diff --git a/src/core b/src/core new file mode 100644 index 0000000..b2ddecc Binary files /dev/null and b/src/core differ diff --git a/src/fontconfig.c b/src/fontconfig.c new file mode 100644 index 0000000..c123852 --- /dev/null +++ b/src/fontconfig.c @@ -0,0 +1,54 @@ +#include +#include + +/* #<{(| FcChar32 FcCharSetCount (const FcCharSet *a); |)}># */ +/* void printCharacters(FcPattern* fontPattern) { */ +/* FcCharSet* charset; */ +/* if (FcPatternGetCharSet(fontPattern, FC_CHARSET, 0, &charset) == FcResultMatch) { */ +/* FcChar32 ucs4; */ +/* FcCharSetIter iter; */ +/* FcCharSetIterInit(charset, &iter); */ +/* printf("Supported characters:\n"); */ +/* while (FcCharSetIterNext(&iter, &ucs4)) { */ +/* printf("%lc ", (wint_t)ucs4); */ +/* } */ +/* printf("\n"); */ +/* FcCharSetDestroy(charset); */ +/* } */ +/* } */ + +const FcChar32 MAX_UNICODE = 0x10FFFD; + +void freeAllCharacters(unsigned int *chars) { + free(chars); +} + +int allCharacters(void* fontPattern, FcChar32 ** chars) { + FcPattern* pat = (FcPattern*) fontPattern; + FcCharSet* charset; + if (FcPatternGetCharSet(pat, FC_CHARSET, 0, &charset) != FcResultMatch) { + return -1; + } + FcChar32 count = FcCharSetCount(charset); + unsigned int* char_array = (unsigned int*)malloc(count * sizeof(unsigned int)); + *chars = char_array; + + FcChar32 ucs4 = 0; + size_t found = 0; + size_t inx = 0; + + while (found < count && inx < MAX_UNICODE) { + if (FcCharSetHasChar(charset, inx) == FcTrue) { + char_array[ucs4] = inx; + ucs4++; + found++; + } + inx++; + } + FcCharSetDestroy(charset); + if (found < count) { + freeAllCharacters(*chars); + return -2; + } + return ucs4; +} diff --git a/src/fontconfig.zig b/src/fontconfig.zig new file mode 100644 index 0000000..3cf5c62 --- /dev/null +++ b/src/fontconfig.zig @@ -0,0 +1,284 @@ +const std = @import("std"); +const unicode = @import("unicode.zig"); +const c = @cImport({ + @cInclude("fontconfig/fontconfig.h"); +}); +const log = std.log.scoped(.fontconfig); + +extern fn allCharacters(p: ?*const c.FcPattern, chars: *[*]u32) c_int; +extern fn freeAllCharacters(chars: *[*]usize) void; + +pub const RangeFont = struct { + starting_codepoint: u21, + ending_codepoint: u21, + font: Font, +}; + +pub const Font = struct { + full_name: []const u8, + family: []const u8, + style: []const u8, + supported_chars: []const u21, + + const Self = @This(); + + pub fn deinit(self: *Self) void { + freeAllCharacters(self.supported_chars.ptr); + } +}; + +pub const FontList = struct { + list: std.ArrayList(Font), + allocator: std.mem.Allocator, + pattern: *c.FcPattern, + fontset: *c.FcFontSet, + + const Self = @This(); + pub fn initCapacity(allocator: std.mem.Allocator, num: usize, pattern: *c.FcPattern, fontset: *c.FcFontSet) std.mem.Allocator.Error!Self { + var al = try std.ArrayList(Font).initCapacity(allocator, num); + return Self{ + .allocator = allocator, + .list = al, + .pattern = pattern, + .fontset = fontset, + }; + } + + pub fn deinit(self: *Self) void { + c.FcPatternDestroy(self.pattern); + c.FcFontSetDestroy(self.fontset); + self.list.deinit(); + } + + pub fn addFontAssumeCapacity( + self: *Self, + full_name: []const u8, + family: []const u8, + style: []const u8, + supported_chars: []const u21, + ) !void { + self.list.appendAssumeCapacity(.{ + .full_name = full_name, + .family = family, + .style = style, + .supported_chars = supported_chars, + }); + } +}; + +var fc_config: ?*c.FcConfig = null; +var deinited = false; +// pub var test_should_deinit = true; +/// De-initializes the underlying c library. Should only be called +/// after all processing has completed +pub fn deinit() void { + // https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r2370.html + // Says that "Note that calling this function with the return from FcConfigGetCurrent will place the library in an indeterminate state." + // However, it seems as though you can't do this either: + // + // 1. c.FcInitLoadConfigAndFonts(); + // 2. c.FcConfigDestroy(); + // 3. c.FcInitLoadConfigAndFonts(); + // 4. c.FcConfigDestroy(); // Seg fault here + if (deinited) @panic("Cannot deinitialize this library more than once"); + deinited = true; + if (fc_config) |conf| { + log.debug("destroying config: do not use library or call me again", .{}); + c.FcConfigDestroy(conf); + } + fc_config = null; +} + +pub const FontQuery = struct { + allocator: std.mem.Allocator, + // fc_config: ?*c.FcConfig = null, + + const Self = @This(); + + pub fn init(allocator: std.mem.Allocator) Self { + return Self{ + .allocator = allocator, + }; + } + pub fn deinit(self: *Self) void { + _ = self; + // if (self.all_fonts) |a| a.deinit(); + } + + pub fn fontList(self: *Self, pattern: [:0]const u8) !FontList { + if (fc_config == null and deinited) @panic("fontconfig C library is in an inconsistent state - should not use"); + if (fc_config == null) fc_config = c.FcInitLoadConfigAndFonts(); + const config = if (fc_config) |conf| conf else return error.FontConfigInitLoadFailure; + + // Pretty sure we want this... + const pat = c.FcNameParse(pattern); + // We cannot destroy the pattern until we're completely done + // This will be managed by FontList object + // defer if (pat != null) c.FcPatternDestroy(pat); + + // const pat = c.FcPatternCreate(); // *FcPattern + // defer if (pat != null) c.FcPatternDestroy(pat); + // + // // FC_WEIGHT_NORMAL is 80 + // // This is equivalent to "regular" style + // if (c.FcPatternAddInteger(pat, c.FC_WEIGHT, c.FC_WEIGHT_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern; + // + // // This is "normal" vs Bold or Italic + // if (c.FcPatternAddInteger(pat, c.FC_WIDTH, c.FC_WIDTH_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern; + // + // // Monospaced fonts + // if (c.FcPatternAddInteger(pat, c.FC_SPACING, c.FC_MONO) != c.FcTrue) return error.FontConfigCouldNotSetPattern; + // + // // FC_SLANT_ROMAN is 0 (italic 100, oblique 110) + // if (c.FcPatternAddInteger(pat, c.FC_SLANT, c.FC_SLANT_ROMAN) != c.FcTrue) return error.FontConfigCouldNotSetPattern; + // + const os = c.FcObjectSetBuild(c.FC_FAMILY, c.FC_STYLE, c.FC_LANG, c.FC_FULLNAME, c.FC_CHARSET, @as(?*u8, null)); // *FcObjectSet + defer if (os != null) c.FcObjectSetDestroy(os); + const fs = c.FcFontList(config, pat, os); // FcFontSet + // TODO: Move this defer into deinit + // defer if (fs != null) c.FcFontSetDestroy(fs); + + // Use the following only when needed. NameUnparse allocates memory + // log.debug("Total matching fonts: {d}. Pattern: {s}\n", .{ fs.*.nfont, c.FcNameUnparse(pat) }); + log.debug("Total matching fonts: {d}", .{fs.*.nfont}); + var rc = try FontList.initCapacity(self.allocator, @as(usize, @intCast(fs.*.nfont)), pat.?, fs.?); + errdefer rc.deinit(); + for (0..@as(usize, @intCast(fs.*.nfont))) |i| { + const font = fs.*.fonts[i].?; // *FcPattern + var fullname: [*:0]c.FcChar8 = undefined; + var style: [*:0]c.FcChar8 = undefined; + var family: [*:0]c.FcChar8 = undefined; + + var charset: [*]u21 = undefined; + const len = allCharacters(font, @ptrCast(&charset)); + if (len < 0) return error.FontConfigCouldNotGetCharSet; + + // https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r600.html + // Note that these (like FcPatternGet) do not make a copy of any data structure referenced by the return value + // https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r570.html + // The value returned is not a copy, but rather refers to the data stored within the pattern directly. Applications must not free this value. + if (c.FcPatternGetString(font, c.FC_FULLNAME, 0, @as([*c][*c]c.FcChar8, @ptrCast(&fullname))) != c.FcResultMatch) + fullname = @constCast(@ptrCast("".ptr)); + // return error.FontConfigCouldNotGetFontFullName; + + if (c.FcPatternGetString(font, c.FC_FAMILY, 0, @as([*c][*c]c.FcChar8, @ptrCast(&family))) != c.FcResultMatch) + return error.FontConfigHasNoFamily; + if (c.FcPatternGetString(font, c.FC_STYLE, 0, @as([*c][*c]c.FcChar8, @ptrCast(&style))) != c.FcResultMatch) + return error.FontConfigHasNoStyle; + + log.debug( + "Chars: {d:5.0} Family '{s}' Style '{s}' Full Name: {s}", + .{ @as(usize, @intCast(len)), family, style, fullname }, + ); + + try rc.addFontAssumeCapacity( + fullname[0..std.mem.len(fullname)], + family[0..std.mem.len(family)], + style[0..std.mem.len(style)], + charset[0..@as(usize, @intCast(len))], + ); + } + return rc; + } + + pub fn fontsForRange( + self: *Self, + starting_codepoint: u21, + ending_codepoint: u21, + fonts: []const Font, + exclude_previous: bool, + ) ![]RangeFont { + // const group_len = group.ending_codepoint - group.starting_codepoint; + var rc = std.ArrayList(RangeFont).init(self.allocator); + defer rc.deinit(); + + var previously_supported = blk: { + if (!exclude_previous) break :blk null; + var al = try std.ArrayList(bool).initCapacity(self.allocator, ending_codepoint - starting_codepoint); + defer al.deinit(); + for (starting_codepoint..ending_codepoint) |_| + al.appendAssumeCapacity(false); + break :blk try al.toOwnedSlice(); + }; + defer if (previously_supported) |p| self.allocator.free(p); + + for (fonts) |font| { + var current_start = @as(u21, 0); + var current_end = @as(u21, 0); + var inx = @as(usize, 0); + + var range_count = @as(usize, 0); + // Advance to the start of the range + while (inx < font.supported_chars.len and + font.supported_chars[inx] < starting_codepoint) + inx += 1; + + while (inx < font.supported_chars.len and + font.supported_chars[inx] < ending_codepoint) + { + if (previously_supported) |p| { + if (p[font.supported_chars[inx]]) { + inx += 1; + continue; // This was already supported - continue + } + } + // We found the beginning of a range + current_start = font.supported_chars[inx]; + current_end = font.supported_chars[inx]; + if (previously_supported) |p| + p[font.supported_chars[inx]] = true; + + // Advance to the next supported character, then start checking for continuous ranges + inx += 1; + while (inx < font.supported_chars.len and + font.supported_chars[inx] == current_end + 1 and + font.supported_chars[inx] <= ending_codepoint and + (!exclude_previous or !previously_supported.?[font.supported_chars[inx]])) + { + if (previously_supported) |p| + p[font.supported_chars[inx]] = true; + inx += 1; + current_end += 1; + } + + // We've found the end of the range (which could be the end of a group) + // If we have not hit the stops, inx at this point is at the beginning of + // a new range + range_count += 1; + try rc.append(.{ + .font = font, + .starting_codepoint = current_start, + .ending_codepoint = current_end, + }); + } + } + return rc.toOwnedSlice(); + } +}; + +test { + std.testing.refAllDecls(@This()); // Only catches public decls +} +test "Get fonts" { + // std.testing.log_level = .debug; + log.debug("get fonts", .{}); + var fq = FontQuery.init(std.testing.allocator); + defer fq.deinit(); + var fl = try fq.fontList(":regular:normal:spacing=100:slant=0"); + defer fl.deinit(); + try std.testing.expect(fl.list.items.len > 0); + var matched = blk: { + for (fl.list.items) |item| { + log.debug("full_name: '{s}'", .{item.full_name}); + if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name)) + break :blk item; + } + break :blk null; + }; + try std.testing.expect(matched != null); + try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len); +} +test { + // if (test_should_deinit) deinit(); + deinit(); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..92e528b --- /dev/null +++ b/src/main.zig @@ -0,0 +1,391 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const unicode = @import("unicode.zig"); +const fontconfig = @import("fontconfig.zig"); + +const max_unicode: u21 = 0x10FFFD; +const all_chars = blk: { + var all: [max_unicode + 1]u21 = undefined; + @setEvalBranchQuota(max_unicode); + for (0..max_unicode) |i| + all[i] = i; + break :blk all; +}; +pub fn main() !u8 { + // TODO: Add back in + // defer fontconfig.deinit(); + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + // stdout is for the actual output of your application, for example if you + // are implementing gzip, then only the compressed bytes should be sent to + // stdout, not any debugging messages. + const stdout_file = std.io.getStdOut().writer(); + var bw = std.io.bufferedWriter(stdout_file); + defer bw.flush() catch @panic("could not flush stdout"); // don't forget to flush! + const stdout = bw.writer(); + + // std.os.argv is os specific + var arg_iterator = std.process.args(); + const arg0 = arg_iterator.next().?; + const options = parseCommandLine(&arg_iterator) catch |err| { + if (err == error.UserRequestedHelp) { + try usage(stdout, arg0); + return 0; + } + try usage(std.io.getStdErr().writer(), arg0); + return 2; + }; + + var unicode_ranges = unicode.all_ranges(); + if (options.list_groups) { + defer unicode_ranges.reset(); + while (unicode_ranges.next()) |range| { + try stdout.print("{s}", .{range.name}); + for (range.name.len..unicode_ranges.longest_name_len + 2) |_| + try stdout.writeByte(' '); + try stdout.print("U+{X} - U+{X}\n", .{ range.starting_codepoint, range.ending_codepoint }); + } + return 0; + } + if (options.list_fonts) { + var fq = fontconfig.FontQuery.init(allocator); + defer fq.deinit(); + var fl = try fq.fontList(options.pattern); + var longest_family_name = @as(usize, 0); + var longest_style_name = @as(usize, 0); + for (fl.list.items) |f| { + longest_family_name = @max(f.family.len, longest_family_name); + longest_style_name = @max(f.style.len, longest_style_name); + } + + std.sort.insertion(fontconfig.Font, fl.list.items, {}, cmpFont); + for (fl.list.items) |f| { + try stdout.print("Family: {s}", .{f.family}); + for (f.family.len..longest_family_name + 1) |_| + try stdout.writeByte(' '); + try stdout.print("Chars: {d:5}\tStyle: {s}", .{ f.supported_chars.len, f.style }); + for (f.style.len..longest_style_name + 1) |_| + try stdout.writeByte(' '); + try stdout.print("\tName: {s}\n", .{ + f.full_name, + }); + } + return 0; + } + const exclude_previous = options.fonts != null; + const fonts: []fontconfig.Font = blk: { + if (options.fonts == null) break :blk &[_]fontconfig.Font{}; + const fo = options.fonts.?; + var si = std.mem.splitScalar(u8, fo, ','); + var fq = fontconfig.FontQuery.init(allocator); + defer fq.deinit(); + var fl = try fq.fontList(options.pattern); + // This messes with data after, and we don't need to deinit anyway + // defer fl.deinit(); + var al = try std.ArrayList(fontconfig.Font).initCapacity(allocator, std.mem.count(u8, fo, ",") + 2); + defer al.deinit(); + while (si.next()) |font_str| { + const font = font_blk: { + for (fl.list.items) |f| + if (std.ascii.eqlIgnoreCase(f.family, font_str)) + break :font_blk f; + try std.io.getStdErr().writer().print("Error: Font '{s}' not installed", .{font_str}); + return 255; + }; + + al.appendAssumeCapacity(font); + } + al.appendAssumeCapacity(.{ + .full_name = "Unsupported", + .family = "Unsupported by any preferred font", + .style = "Regular", + .supported_chars = &all_chars, + }); + break :blk try al.toOwnedSlice(); + }; + + const order_by_range = if (std.ascii.eqlIgnoreCase("font", options.order)) + false + else if (std.ascii.eqlIgnoreCase("range", options.order)) + true + else + null; + if (order_by_range == null) { + try std.io.getStdErr().writer().print("Error: Order type '{s}' invalid", .{options.order}); + return 255; + } + std.log.debug("{0} prefered fonts:", .{fonts.len - 1}); + for (fonts[0 .. fonts.len - 1]) |f| + std.log.debug("\t{s}", .{f.family}); + if (options.groups) |group| { + while (unicode_ranges.next()) |range| { + var it = std.mem.splitScalar(u8, group, ','); + while (it.next()) |desired_group| { + if (std.mem.eql(u8, range.name, desired_group)) { + try outputRange( + allocator, + range.starting_codepoint, + range.ending_codepoint, + fonts, + exclude_previous, + order_by_range.?, + stdout, + ); + } + } + } + } else { + try outputRange( + allocator, + 0, + max_unicode, + fonts, + exclude_previous, + order_by_range.?, + stdout, + ); + } + + return 0; +} +fn cmpFont(context: void, a: fontconfig.Font, b: fontconfig.Font) bool { + _ = context; + return std.mem.order(u8, a.family, b.family) == .lt; // a.family < b.family; +} +fn cmpRangeList(context: void, a: fontconfig.RangeFont, b: fontconfig.RangeFont) bool { + _ = context; + return a.starting_codepoint < b.starting_codepoint; +} +fn formatRangeFontEndingCodepoint( + data: fontconfig.RangeFont, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + if (data.starting_codepoint == data.ending_codepoint) return; + try std.fmt.format(writer, "-{" ++ fmt ++ "}", .{data.ending_codepoint}); +} +fn fmtRangeFontEndingCodepoint(range_font: fontconfig.RangeFont) std.fmt.Formatter(formatRangeFontEndingCodepoint) { + return .{ + .data = range_font, + }; +} +fn outputRange( + allocator: std.mem.Allocator, + starting_codepoint: u21, + ending_codepoint: u21, + fonts: []const fontconfig.Font, + exclude_previous: bool, + order_by_range: bool, + writer: anytype, +) !void { + var fq = fontconfig.FontQuery.init(allocator); + defer fq.deinit(); + var range_fonts = try fq.fontsForRange(starting_codepoint, ending_codepoint, fonts, exclude_previous); // do we want hard limits around this? + defer allocator.free(range_fonts); + + std.log.debug("Got {d} range fonts back from query", .{range_fonts.len}); + if (order_by_range) + std.sort.insertion(fontconfig.RangeFont, range_fonts, {}, cmpRangeList); + + for (range_fonts) |range_font| { + try writer.print("{s}U+{x}{x}={s}\n", .{ + if (std.mem.eql(u8, range_font.font.full_name, "Unsupported")) "#" else "", + range_font.starting_codepoint, + fmtRangeFontEndingCodepoint(range_font), //.ending_codepoint, + range_font.font.family, + }); + } +} + +const Options = struct { + end_of_options_signifier: ?usize = null, + groups: ?[]const u8 = null, + fonts: ?[]const u8 = &[_]u8{}, + list_groups: bool = false, + list_fonts: bool = false, + pattern: [:0]const u8 = ":regular:normal:spacing=100:slant=0", + order: [:0]const u8 = "font", +}; + +fn usage(writer: anytype, arg0: []const u8) !void { + try writer.print( + \\usage: {s} [OPTION]... + \\ + \\Options: + \\ -p, --pattern font pattern to use (Default: :regular:normal:spacing=100:slant=0) + \\ -g, --groups group names to process, comma delimited (e.g. Thai,Lao - default is all groups) + \\ -f, --fonts prefered fonts in order, comma delimited (e.g. "DejaVu Sans Mono,Hack Nerd Font" - default is all fonts) + \\ note this will change the behavior such that ranges supported by the first font found will not + \\ be considered for use by subsequent fonts + \\ -o, --order order by (Default: font, can also order by range) + \\ -G, --list-groups list all groups and exit + \\ -F, --list-fonts list all fonts matching pattern and exit + \\ -h, --help display this help text and exit + \\ + , .{arg0}); +} + +fn parseCommandLine(arg_iterator: anytype) !Options { + var current_arg: usize = 0; + var rc = Options{}; + while (arg_iterator.next()) |arg| { + if (std.mem.eql(u8, arg, "--")) { + rc.end_of_options_signifier = current_arg + 1; + return rc; + } + if (try getArgValue(arg_iterator, arg, "groups", "g", .{})) |val| { + rc.groups = val; + } else if (try getArgValue(arg_iterator, arg, "pattern", "p", .{})) |val| { + rc.pattern = val; + } else if (try getArgValue(arg_iterator, arg, "fonts", "f", .{})) |val| { + rc.fonts = val; + } else if (try getArgValue(arg_iterator, arg, "order", "o", .{})) |val| { + rc.order = val; + } else if (try getArgValue(arg_iterator, arg, "list-groups", "G", .{ .is_bool = true })) |_| { + rc.list_groups = true; + } else if (try getArgValue(arg_iterator, arg, "list-fonts", "F", .{ .is_bool = true })) |_| { + rc.list_fonts = true; + } else if (try getArgValue(arg_iterator, arg, "help", "h", .{ .is_bool = true })) |_| { + return error.UserRequestedHelp; + } else { + if (!builtin.is_test) + try std.io.getStdErr().writer().print("invalid option: {s}\n\n", .{arg}); + return error.InvalidOption; + } + current_arg += 1; + } + return rc; +} +const ArgOptions = struct { + is_bool: bool = false, + is_required: bool = false, +}; +fn getArgValue( + arg_iterator: anytype, + arg: [:0]const u8, + comptime name: ?[]const u8, + comptime short_name: ?[]const u8, + arg_options: ArgOptions, +) !?[:0]const u8 { + if (short_name) |s| { + if (std.mem.eql(u8, "-" ++ s, arg)) { + if (arg_options.is_bool) return arg; + if (arg_iterator.next()) |val| { + return val; + } else return error.NoValueOnFlag; + } + } + if (name) |n| { + if (std.mem.eql(u8, "--" ++ n, arg)) { + if (arg_options.is_bool) return ""; + if (arg_iterator.next()) |val| { + return val; + } else return error.NoValueOnName; + } + if (std.mem.startsWith(u8, arg, "--" ++ n ++ "=")) { + if (arg_options.is_bool) return error.EqualsInvalidForBooleanArgument; + return arg[("--" ++ n ++ "=").len.. :0]; + } + } + return null; +} + +// Tests run in this order: +// +// 1. Main file +// - In order, from top to bottom +// 2. Referenced file(s), if any +// - In order, from top to bottom +// +// libfontconfig gets inconsistent in a hurry with a lot of init/deinit, so +// we only want to deinit once. Because we have no way of saying "go do other +// tests, then come back", we have no way of controlling deinitialization other +// than something that's not super obvious. So, we're adding this comment. +// We will allow fontconfig tests to do our deinit() call, and we shall ignore +// deinitialization here +test "startup" { + // std.testing.log_level = .debug; +} +test "command line parses with short name" { + var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "-g Latin-1"); + defer it.deinit(); + const options = try parseCommandLine(&it); + try std.testing.expectEqualStrings("Latin-1", options.groups.?); +} +test "command line parses with long name no equals" { + var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups Latin-1"); + defer it.deinit(); + const options = try parseCommandLine(&it); + try std.testing.expectEqualStrings("Latin-1", options.groups.?); +} +test "command line parses with long name equals" { + var log_level = std.testing.log_level; + defer std.testing.log_level = log_level; + std.testing.log_level = .debug; + var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups=Latin-1"); + defer it.deinit(); + const options = try parseCommandLine(&it); + try std.testing.expectEqualStrings("Latin-1", options.groups.?); +} +test "Get ranges" { + std.log.debug("get ranges", .{}); + // defer fontconfig.deinit(); + var fq = fontconfig.FontQuery.init(std.testing.allocator); + defer fq.deinit(); + var fl = try fq.fontList(":regular:normal:spacing=100:slant=0"); + defer fl.deinit(); + try std.testing.expect(fl.list.items.len > 0); + var matched = blk: { + for (fl.list.items) |item| { + std.log.debug("full_name: '{s}'", .{item.full_name}); + if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name)) + break :blk item; + } + break :blk null; + }; + try std.testing.expect(matched != null); + const arr: []const fontconfig.Font = &[_]fontconfig.Font{matched.?}; + var al = std.ArrayList(u8).init(std.testing.allocator); + defer al.deinit(); + const range_name = "Basic Latin"; + var matched_range = try blk: { + var unicode_ranges = unicode.all_ranges(); + while (unicode_ranges.next()) |range| { + var it = std.mem.splitScalar(u8, range_name, ','); + while (it.next()) |desired_range| { + if (std.mem.eql(u8, range.name, desired_range)) { + break :blk range; + } + } + } + break :blk error.RangeNotFound; + }; + var log_level = std.testing.log_level; + std.testing.log_level = .debug; + defer std.testing.log_level = log_level; + try outputRange(std.testing.allocator, matched_range.starting_codepoint, matched_range.ending_codepoint, arr, false, al.writer()); + try std.testing.expectEqualStrings(al.items, "U+20-7e=DejaVu Sans Mono\n"); + + std.log.debug("\nwhole unicode space:", .{}); + try outputRange(std.testing.allocator, 0, max_unicode, arr, false, al.writer()); + const expected = + \\U+20-7e=DejaVu Sans Mono + \\U+20-7e=DejaVu Sans Mono + \\U+a0-1c3=DejaVu Sans Mono + \\U+1cd-1e3=DejaVu Sans Mono + \\U+1e6-1f0=DejaVu Sans Mono + \\U+1f4-1f6=DejaVu Sans Mono + ; + try std.testing.expectStringStartsWith(al.items, expected); + + // try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len); +} + +test "teardown, followed by libraries" { + std.testing.refAllDecls(@This()); // Only catches public decls + _ = @import("unicode.zig"); +} diff --git a/src/ranges.txt b/src/ranges.txt new file mode 100644 index 0000000..61e828e --- /dev/null +++ b/src/ranges.txt @@ -0,0 +1,209 @@ +Basic Latin U+0 - U+7F +Latin-1 Supplement U+80 - U+FF +Latin Extended-A U+100 - U+17F +Latin Extended-B U+180 - U+24F +IPA Extensions U+250 - U+2AF +Spacing Modifier Letters U+2B0 - U+2FF +Combining Diacritical Marks U+300 - U+36F +Greek and Coptic U+370 - U+3FF +Cyrillic U+400 - U+4FF +Cyrillic Supplement U+500 - U+527 +Armenian U+531 - U+58A +Hebrew U+591 - U+5F4 +Arabic U+600 - U+6FF +Syriac U+700 - U+74F +Arabic Supplement U+750 - U+77F +Thaana U+780 - U+7B1 +NKo U+7C0 - U+7FA +Samaritan U+800 - U+83E +Mandaic U+840 - U+85E +Devanagari U+900 - U+97F +Bengali U+981 - U+9FB +Gurmukhi U+A01 - U+A75 +Gujarati U+A81 - U+AF1 +Oriya U+B01 - U+B77 +Tamil U+B82 - U+BFA +Telugu U+C01 - U+C7F +Kannada U+C82 - U+CF2 +Malayalam U+D02 - U+D7F +Sinhala U+D82 - U+DF4 +Thai U+E01 - U+E5B +Lao U+E81 - U+EDD +Tibetan U+F00 - U+FDA +Myanmar U+1000 - U+109F +Georgian U+10A0 - U+10FC +Hangul Jamo U+1100 - U+11FF +Ethiopic U+1200 - U+137C +Ethiopic Supplement U+1380 - U+1399 +Cherokee U+13A0 - U+13F4 +Unified Canadian Aboriginal Syllabics U+1400 - U+167F +Ogham U+1680 - U+169C +Runic U+16A0 - U+16F0 +Tagalog U+1700 - U+1714 +Hanunoo U+1720 - U+1736 +Buhid U+1740 - U+1753 +Tagbanwa U+1760 - U+1773 +Khmer U+1780 - U+17F9 +Mongolian U+1800 - U+18AA +Unified Canadian Aboriginal Syllabics Extended U+18B0 - U+18F5 +Limbu U+1900 - U+194F +Tai Le U+1950 - U+1974 +New Tai Lue U+1980 - U+19DF +Khmer Symbols U+19E0 - U+19FF +Buginese U+1A00 - U+1A1F +Tai Tham U+1A20 - U+1AAD +Balinese U+1B00 - U+1B7C +Sundanese U+1B80 - U+1BB9 +Batak U+1BC0 - U+1BFF +Lepcha U+1C00 - U+1C4F +Ol Chiki U+1C50 - U+1C7F +Vedic Extensions U+1CD0 - U+1CF2 +Phonetic Extensions U+1D00 - U+1D7F +Phonetic Extensions Supplement U+1D80 - U+1DBF +Combining Diacritical Marks Supplement U+1DC0 - U+1DFF +Latin Extended Additional U+1E00 - U+1EFF +Greek Extended U+1F00 - U+1FFE +General Punctuation U+2000 - U+206F +Superscripts and Subscripts U+2070 - U+209C +Currency Symbols U+20A0 - U+20B9 +Combining Diacritical Marks for Symbols U+20D0 - U+20F0 +Letterlike Symbols U+2100 - U+214F +Number Forms U+2150 - U+2189 +Arrows U+2190 - U+21FF +Mathematical Operators U+2200 - U+22FF +Miscellaneous Technical U+2300 - U+23F3 +Control Pictures U+2400 - U+2426 +Optical Character Recognition U+2440 - U+244A +Enclosed Alphanumerics U+2460 - U+24FF +Box Drawing U+2500 - U+257F +Block Elements U+2580 - U+259F +Geometric Shapes U+25A0 - U+25FF +Miscellaneous Symbols U+2600 - U+26FF +Dingbats U+2701 - U+27BF +Miscellaneous Mathematical Symbols-A U+27C0 - U+27EF +Supplemental Arrows-A U+27F0 - U+27FF +Braille Patterns U+2800 - U+28FF +Supplemental Arrows-B U+2900 - U+297F +Miscellaneous Mathematical Symbols-B U+2980 - U+29FF +Supplemental Mathematical Operators U+2A00 - U+2AFF +Miscellaneous Symbols and Arrows U+2B00 - U+2B59 +Glagolitic U+2C00 - U+2C5E +Latin Extended-C U+2C60 - U+2C7F +Coptic U+2C80 - U+2CFF +Georgian Supplement U+2D00 - U+2D25 +Tifinagh U+2D30 - U+2D7F +Ethiopic Extended U+2D80 - U+2DDE +Cyrillic Extended-A U+2DE0 - U+2DFF +Supplemental Punctuation U+2E00 - U+2E31 +CJK Radicals Supplement U+2E80 - U+2EF3 +Kangxi Radicals U+2F00 - U+2FD5 +Ideographic Description Characters U+2FF0 - U+2FFB +CJK Symbols and Punctuation U+3000 - U+303F +Hiragana U+3041 - U+309F +Katakana U+30A0 - U+30FF +Bopomofo U+3105 - U+312D +Hangul Compatibility Jamo U+3131 - U+318E +Kanbun U+3190 - U+319F +Bopomofo Extended U+31A0 - U+31BA +CJK Strokes U+31C0 - U+31E3 +Katakana Phonetic Extensions U+31F0 - U+31FF +Enclosed CJK Letters and Months U+3200 - U+32FE +CJK Compatibility U+3300 - U+33FF +CJK Unified Ideographs Extension A U+3400 - U+4DB5 +Yijing Hexagram Symbols U+4DC0 - U+4DFF +CJK Unified Ideographs U+4E00 - U+9FCB +Yi Syllables U+A000 - U+A48C +Yi Radicals U+A490 - U+A4C6 +Lisu U+A4D0 - U+A4FF +Vai U+A500 - U+A62B +Cyrillic Extended-B U+A640 - U+A697 +Bamum U+A6A0 - U+A6F7 +Modifier Tone Letters U+A700 - U+A71F +Latin Extended-D U+A720 - U+A7FF +Syloti Nagri U+A800 - U+A82B +Common Indic Number Forms U+A830 - U+A839 +Phags-pa U+A840 - U+A877 +Saurashtra U+A880 - U+A8D9 +Devanagari Extended U+A8E0 - U+A8FB +Kayah Li U+A900 - U+A92F +Rejang U+A930 - U+A95F +Hangul Jamo Extended-A U+A960 - U+A97C +Javanese U+A980 - U+A9DF +Cham U+AA00 - U+AA5F +Myanmar Extended-A U+AA60 - U+AA7B +Tai Viet U+AA80 - U+AADF +Ethiopic Extended-A U+AB01 - U+AB2E +Meetei Mayek U+ABC0 - U+ABF9 +Hangul Syllables U+AC00 - U+D7A3 +Hangul Jamo Extended-B U+D7B0 - U+D7FB +High Surrogates U+D800 - U+DB7F +High Private Use Surrogates U+DB80 - U+DBFF +Low Surrogates U+DC00 - U+DFFF +Private Use Area U+E000 - U+F8FF +CJK Compatibility Ideographs U+F900 - U+FAD9 +Alphabetic Presentation Forms U+FB00 - U+FB4F +Arabic Presentation Forms-A U+FB50 - U+FDFD +Variation Selectors U+FE00 - U+FE0F +Vertical Forms U+FE10 - U+FE19 +Combining Half Marks U+FE20 - U+FE26 +CJK Compatibility Forms U+FE30 - U+FE4F +Small Form Variants U+FE50 - U+FE6B +Arabic Presentation Forms-B U+FE70 - U+FEFF +Halfwidth and Fullwidth Forms U+FF01 - U+FFEE +Specials U+FFF9 - U+FFFD +Linear B Syllabary U+10000 - U+1005D +Linear B Ideograms U+10080 - U+100FA +Aegean Numbers U+10100 - U+1013F +Ancient Greek Numbers U+10140 - U+1018A +Ancient Symbols U+10190 - U+1019B +Phaistos Disc U+101D0 - U+101FD +Lycian U+10280 - U+1029C +Carian U+102A0 - U+102D0 +Old Italic U+10300 - U+10323 +Gothic U+10330 - U+1034A +Ugaritic U+10380 - U+1039F +Old Persian U+103A0 - U+103D5 +Deseret U+10400 - U+1044F +Shavian U+10450 - U+1047F +Osmanya U+10480 - U+104A9 +Cypriot Syllabary U+10800 - U+1083F +Imperial Aramaic U+10840 - U+1085F +Phoenician U+10900 - U+1091F +Lydian U+10920 - U+1093F +Kharoshthi U+10A00 - U+10A58 +Old South Arabian U+10A60 - U+10A7F +Avestan U+10B00 - U+10B3F +Inscriptional Parthian U+10B40 - U+10B5F +Inscriptional Pahlavi U+10B60 - U+10B7F +Old Turkic U+10C00 - U+10C48 +Rumi Numeral Symbols U+10E60 - U+10E7E +Brahmi U+11000 - U+1106F +Kaithi U+11080 - U+110C1 +Cuneiform U+12000 - U+1236E +Cuneiform Numbers and Punctuation U+12400 - U+12473 +Egyptian Hieroglyphs U+13000 - U+1342E +Bamum Supplement U+16800 - U+16A38 +Kana Supplement U+1B000 - U+1B001 +Byzantine Musical Symbols U+1D000 - U+1D0F5 +Musical Symbols U+1D100 - U+1D1DD +Ancient Greek Musical Notation U+1D200 - U+1D245 +Tai Xuan Jing Symbols U+1D300 - U+1D356 +Counting Rod Numerals U+1D360 - U+1D371 +Mathematical Alphanumeric Symbols U+1D400 - U+1D7FF +Mahjong Tiles U+1F000 - U+1F02B +Domino Tiles U+1F030 - U+1F093 +Playing Cards U+1F0A0 - U+1F0DF +Enclosed Alphanumeric Supplement U+1F100 - U+1F1FF +Enclosed Ideographic Supplement U+1F200 - U+1F251 +Miscellaneous Symbols And Pictographs U+1F300 - U+1F5FF +Emoticons U+1F601 - U+1F64F +Transport And Map Symbols U+1F680 - U+1F6C5 +Alchemical Symbols U+1F700 - U+1F773 +CJK Unified Ideographs Extension B U+20000 - U+2A6D6 +CJK Unified Ideographs Extension C U+2A700 - U+2B734 +CJK Unified Ideographs Extension D U+2B740 - U+2B81D +CJK Compatibility Ideographs Supplement U+2F800 - U+2FA1D +Tags U+E0001 - U+E007F +Variation Selectors Supplement U+E0100 - U+E01EF +Supplementary Private Use Area-A U+F0000 - U+FFFFD +Supplementary Private Use Area-B U+100000 - U+10FFFD diff --git a/src/unicode.zig b/src/unicode.zig new file mode 100644 index 0000000..98efa51 --- /dev/null +++ b/src/unicode.zig @@ -0,0 +1,112 @@ +const std = @import("std"); + +// Pulled from: https://www.unicodepedia.com/groups/ +const ranges = @embedFile("ranges.txt"); +const eval_branch_quota_base = 18500; +const range_count = blk: { + // This should be related to the number of characters in our embedded file above + @setEvalBranchQuota(eval_branch_quota_base); + break :blk std.mem.count(u8, ranges, "\n"); +}; +const Ranges = struct { + names: [range_count][]const u8 = undefined, + starting_codepoints: [range_count]u21 = undefined, + ending_codepoints: [range_count]u21 = undefined, + current_inx: usize = 0, + longest_name_len: usize = 0, + + const Self = @This(); + + pub fn first(self: *Self) ?UnicodeGroup { + self.reset(); + return self.next(); + } + pub fn reset(self: *Self) void { + self.current_inx = 0; + } + pub fn next(self: *Self) ?UnicodeGroup { + if (self.current_inx == range_count) return null; + self.current_inx += 1; + return self.item(self.current_inx - 1); + } + pub fn item(self: Self, index: usize) UnicodeGroup { + return .{ + .name = self.names[index], + .starting_codepoint = self.starting_codepoints[index], + .ending_codepoint = self.ending_codepoints[index], + }; + } +}; + +const _all_ranges = blk: { + @setEvalBranchQuota(eval_branch_quota_base * 2); + break :blk parseRanges(ranges) catch @compileError("Could not parse ranges.txt"); +}; + +pub fn all_ranges() Ranges { + return .{ + .names = _all_ranges.names, + .starting_codepoints = _all_ranges.starting_codepoints, + .ending_codepoints = _all_ranges.ending_codepoints, + .longest_name_len = _all_ranges.longest_name_len, + }; +} + +pub const UnicodeGroup = struct { + name: []const u8, + starting_codepoint: u21, + ending_codepoint: u21, +}; + +fn parseRanges(text: []const u8) !Ranges { + var rc = Ranges{}; + var iterator = std.mem.splitSequence(u8, text, "\n"); + var inx: usize = 0; + while (iterator.next()) |group| + if (group.len > 0) { + const uc = try parseGroup(group); + rc.names[inx] = uc.name; + rc.starting_codepoints[inx] = uc.starting_codepoint; + rc.ending_codepoints[inx] = uc.ending_codepoint; + rc.longest_name_len = @max(rc.longest_name_len, uc.name.len); + inx += 1; + }; + return rc; +} + +fn parseGroup(group_text: []const u8) !UnicodeGroup { + // Basic Latin U+0 - U+7F + var iterator = std.mem.splitSequence(u8, group_text, "\t"); + const name = std.mem.trimRight(u8, iterator.first(), " "); + const range_text = iterator.next() orelse { + std.log.err("failed parsing on group '{s}'", .{group_text}); + return error.NoRangeSpecifiedInGroup; + }; + var range_iterator = std.mem.splitSequence(u8, range_text, " - "); + const start_text = range_iterator.first(); + const end_text = range_iterator.next() orelse return error.NoEndingCodepointInGroup; + return UnicodeGroup{ + .name = name, + .starting_codepoint = try std.fmt.parseUnsigned(u21, start_text[2..], 16), + .ending_codepoint = try std.fmt.parseUnsigned(u21, end_text[2..], 16), + }; +} + +test "check ranges" { + var parsed_ranges = all_ranges(); + // Entry 8 should be: + // Cyrillic U+400 - U+4FF + try std.testing.expectEqual(@as(u21, 0x400), parsed_ranges.starting_codepoints[8]); + try std.testing.expectEqual(@as(u21, 0x4ff), parsed_ranges.ending_codepoints[8]); + try std.testing.expectEqualStrings("Cyrillic", parsed_ranges.names[8]); + + var range = parsed_ranges.first().?; + try std.testing.expectEqualStrings("Basic Latin", range.name); + try std.testing.expectEqual(@as(u21, 0x0), range.starting_codepoint); + try std.testing.expectEqual(@as(u21, 0x7f), range.ending_codepoint); + + range = parsed_ranges.next().?; + try std.testing.expectEqualStrings("Latin-1 Supplement", range.name); + try std.testing.expectEqual(@as(u21, 0x80), range.starting_codepoint); + try std.testing.expectEqual(@as(u21, 0xff), range.ending_codepoint); +} diff --git a/zig-via-docker b/zig-via-docker new file mode 100755 index 0000000..4809fef --- /dev/null +++ b/zig-via-docker @@ -0,0 +1,4 @@ +#!/bin/sh +scriptpath="$( cd "$(dirname "$0")" ; pwd -P )" +# podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder-alpine "$@" +podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder "$@"