initial source code

This commit is contained in:
Emil Lerch 2023-07-29 20:10:04 -07:00
parent c731c3a015
commit 5feee8c0c3
Signed by: lobo
GPG Key ID: A7B62D657EF764F8
11 changed files with 1175 additions and 0 deletions

14
Dockerfile Normal file
View File

@ -0,0 +1,14 @@
FROM debian:bullseye
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libfontconfig-dev \
ca-certificates \
curl \
xz-utils \
&& curl https://mirror.bazel.build/ziglang.org/builds/zig-linux-x86_64-0.11.0-dev.3886+0c1bfe271.tar.xz | tar -C /usr/local/ -xJ \
&& apt-get -y remove curl xz-utils \
&& ln -s /usr/local/zig*/zig /usr/local/bin \
&& rm -rf /var/lib/apt/lists/*
ENTRYPOINT ["/usr/local/bin/zig"]

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Emil Lerch
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

0
README.md Normal file
View File

86
build.zig Normal file
View File

@ -0,0 +1,86 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "fontfinder",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
configure(exe);
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const unit_tests = b.addTest(.{
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
configure(unit_tests);
const run_unit_tests = b.addRunArtifact(unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_unit_tests.step);
}
fn configure(object: anytype) void {
// object.linkage = .static;
object.linkLibC();
// Fontconfig must be installed. Docker can also be used (see Dockerfile)
object.addSystemIncludePath("/usr/include");
object.linkSystemLibrary("fontconfig");
// object.linkSystemLibrary("expat"); // fontconfig dependency - needed for static builds
object.addLibraryPath("/usr/lib");
object.addCSourceFile("src/fontconfig.c", &[_][]const u8{"-std=c99"});
}

BIN
src/core Normal file

Binary file not shown.

54
src/fontconfig.c Normal file
View File

@ -0,0 +1,54 @@
#include <stdlib.h>
#include <fontconfig/fontconfig.h>
/* #<{(| FcChar32 FcCharSetCount (const FcCharSet *a); |)}># */
/* void printCharacters(FcPattern* fontPattern) { */
/* FcCharSet* charset; */
/* if (FcPatternGetCharSet(fontPattern, FC_CHARSET, 0, &charset) == FcResultMatch) { */
/* FcChar32 ucs4; */
/* FcCharSetIter iter; */
/* FcCharSetIterInit(charset, &iter); */
/* printf("Supported characters:\n"); */
/* while (FcCharSetIterNext(&iter, &ucs4)) { */
/* printf("%lc ", (wint_t)ucs4); */
/* } */
/* printf("\n"); */
/* FcCharSetDestroy(charset); */
/* } */
/* } */
const FcChar32 MAX_UNICODE = 0x10FFFD;
void freeAllCharacters(unsigned int *chars) {
free(chars);
}
int allCharacters(void* fontPattern, FcChar32 ** chars) {
FcPattern* pat = (FcPattern*) fontPattern;
FcCharSet* charset;
if (FcPatternGetCharSet(pat, FC_CHARSET, 0, &charset) != FcResultMatch) {
return -1;
}
FcChar32 count = FcCharSetCount(charset);
unsigned int* char_array = (unsigned int*)malloc(count * sizeof(unsigned int));
*chars = char_array;
FcChar32 ucs4 = 0;
size_t found = 0;
size_t inx = 0;
while (found < count && inx < MAX_UNICODE) {
if (FcCharSetHasChar(charset, inx) == FcTrue) {
char_array[ucs4] = inx;
ucs4++;
found++;
}
inx++;
}
FcCharSetDestroy(charset);
if (found < count) {
freeAllCharacters(*chars);
return -2;
}
return ucs4;
}

284
src/fontconfig.zig Normal file
View File

@ -0,0 +1,284 @@
const std = @import("std");
const unicode = @import("unicode.zig");
const c = @cImport({
@cInclude("fontconfig/fontconfig.h");
});
const log = std.log.scoped(.fontconfig);
extern fn allCharacters(p: ?*const c.FcPattern, chars: *[*]u32) c_int;
extern fn freeAllCharacters(chars: *[*]usize) void;
pub const RangeFont = struct {
starting_codepoint: u21,
ending_codepoint: u21,
font: Font,
};
pub const Font = struct {
full_name: []const u8,
family: []const u8,
style: []const u8,
supported_chars: []const u21,
const Self = @This();
pub fn deinit(self: *Self) void {
freeAllCharacters(self.supported_chars.ptr);
}
};
pub const FontList = struct {
list: std.ArrayList(Font),
allocator: std.mem.Allocator,
pattern: *c.FcPattern,
fontset: *c.FcFontSet,
const Self = @This();
pub fn initCapacity(allocator: std.mem.Allocator, num: usize, pattern: *c.FcPattern, fontset: *c.FcFontSet) std.mem.Allocator.Error!Self {
var al = try std.ArrayList(Font).initCapacity(allocator, num);
return Self{
.allocator = allocator,
.list = al,
.pattern = pattern,
.fontset = fontset,
};
}
pub fn deinit(self: *Self) void {
c.FcPatternDestroy(self.pattern);
c.FcFontSetDestroy(self.fontset);
self.list.deinit();
}
pub fn addFontAssumeCapacity(
self: *Self,
full_name: []const u8,
family: []const u8,
style: []const u8,
supported_chars: []const u21,
) !void {
self.list.appendAssumeCapacity(.{
.full_name = full_name,
.family = family,
.style = style,
.supported_chars = supported_chars,
});
}
};
var fc_config: ?*c.FcConfig = null;
var deinited = false;
// pub var test_should_deinit = true;
/// De-initializes the underlying c library. Should only be called
/// after all processing has completed
pub fn deinit() void {
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r2370.html
// Says that "Note that calling this function with the return from FcConfigGetCurrent will place the library in an indeterminate state."
// However, it seems as though you can't do this either:
//
// 1. c.FcInitLoadConfigAndFonts();
// 2. c.FcConfigDestroy();
// 3. c.FcInitLoadConfigAndFonts();
// 4. c.FcConfigDestroy(); // Seg fault here
if (deinited) @panic("Cannot deinitialize this library more than once");
deinited = true;
if (fc_config) |conf| {
log.debug("destroying config: do not use library or call me again", .{});
c.FcConfigDestroy(conf);
}
fc_config = null;
}
pub const FontQuery = struct {
allocator: std.mem.Allocator,
// fc_config: ?*c.FcConfig = null,
const Self = @This();
pub fn init(allocator: std.mem.Allocator) Self {
return Self{
.allocator = allocator,
};
}
pub fn deinit(self: *Self) void {
_ = self;
// if (self.all_fonts) |a| a.deinit();
}
pub fn fontList(self: *Self, pattern: [:0]const u8) !FontList {
if (fc_config == null and deinited) @panic("fontconfig C library is in an inconsistent state - should not use");
if (fc_config == null) fc_config = c.FcInitLoadConfigAndFonts();
const config = if (fc_config) |conf| conf else return error.FontConfigInitLoadFailure;
// Pretty sure we want this...
const pat = c.FcNameParse(pattern);
// We cannot destroy the pattern until we're completely done
// This will be managed by FontList object
// defer if (pat != null) c.FcPatternDestroy(pat);
// const pat = c.FcPatternCreate(); // *FcPattern
// defer if (pat != null) c.FcPatternDestroy(pat);
//
// // FC_WEIGHT_NORMAL is 80
// // This is equivalent to "regular" style
// if (c.FcPatternAddInteger(pat, c.FC_WEIGHT, c.FC_WEIGHT_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
//
// // This is "normal" vs Bold or Italic
// if (c.FcPatternAddInteger(pat, c.FC_WIDTH, c.FC_WIDTH_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
//
// // Monospaced fonts
// if (c.FcPatternAddInteger(pat, c.FC_SPACING, c.FC_MONO) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
//
// // FC_SLANT_ROMAN is 0 (italic 100, oblique 110)
// if (c.FcPatternAddInteger(pat, c.FC_SLANT, c.FC_SLANT_ROMAN) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
//
const os = c.FcObjectSetBuild(c.FC_FAMILY, c.FC_STYLE, c.FC_LANG, c.FC_FULLNAME, c.FC_CHARSET, @as(?*u8, null)); // *FcObjectSet
defer if (os != null) c.FcObjectSetDestroy(os);
const fs = c.FcFontList(config, pat, os); // FcFontSet
// TODO: Move this defer into deinit
// defer if (fs != null) c.FcFontSetDestroy(fs);
// Use the following only when needed. NameUnparse allocates memory
// log.debug("Total matching fonts: {d}. Pattern: {s}\n", .{ fs.*.nfont, c.FcNameUnparse(pat) });
log.debug("Total matching fonts: {d}", .{fs.*.nfont});
var rc = try FontList.initCapacity(self.allocator, @as(usize, @intCast(fs.*.nfont)), pat.?, fs.?);
errdefer rc.deinit();
for (0..@as(usize, @intCast(fs.*.nfont))) |i| {
const font = fs.*.fonts[i].?; // *FcPattern
var fullname: [*:0]c.FcChar8 = undefined;
var style: [*:0]c.FcChar8 = undefined;
var family: [*:0]c.FcChar8 = undefined;
var charset: [*]u21 = undefined;
const len = allCharacters(font, @ptrCast(&charset));
if (len < 0) return error.FontConfigCouldNotGetCharSet;
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r600.html
// Note that these (like FcPatternGet) do not make a copy of any data structure referenced by the return value
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r570.html
// The value returned is not a copy, but rather refers to the data stored within the pattern directly. Applications must not free this value.
if (c.FcPatternGetString(font, c.FC_FULLNAME, 0, @as([*c][*c]c.FcChar8, @ptrCast(&fullname))) != c.FcResultMatch)
fullname = @constCast(@ptrCast("".ptr));
// return error.FontConfigCouldNotGetFontFullName;
if (c.FcPatternGetString(font, c.FC_FAMILY, 0, @as([*c][*c]c.FcChar8, @ptrCast(&family))) != c.FcResultMatch)
return error.FontConfigHasNoFamily;
if (c.FcPatternGetString(font, c.FC_STYLE, 0, @as([*c][*c]c.FcChar8, @ptrCast(&style))) != c.FcResultMatch)
return error.FontConfigHasNoStyle;
log.debug(
"Chars: {d:5.0} Family '{s}' Style '{s}' Full Name: {s}",
.{ @as(usize, @intCast(len)), family, style, fullname },
);
try rc.addFontAssumeCapacity(
fullname[0..std.mem.len(fullname)],
family[0..std.mem.len(family)],
style[0..std.mem.len(style)],
charset[0..@as(usize, @intCast(len))],
);
}
return rc;
}
pub fn fontsForRange(
self: *Self,
starting_codepoint: u21,
ending_codepoint: u21,
fonts: []const Font,
exclude_previous: bool,
) ![]RangeFont {
// const group_len = group.ending_codepoint - group.starting_codepoint;
var rc = std.ArrayList(RangeFont).init(self.allocator);
defer rc.deinit();
var previously_supported = blk: {
if (!exclude_previous) break :blk null;
var al = try std.ArrayList(bool).initCapacity(self.allocator, ending_codepoint - starting_codepoint);
defer al.deinit();
for (starting_codepoint..ending_codepoint) |_|
al.appendAssumeCapacity(false);
break :blk try al.toOwnedSlice();
};
defer if (previously_supported) |p| self.allocator.free(p);
for (fonts) |font| {
var current_start = @as(u21, 0);
var current_end = @as(u21, 0);
var inx = @as(usize, 0);
var range_count = @as(usize, 0);
// Advance to the start of the range
while (inx < font.supported_chars.len and
font.supported_chars[inx] < starting_codepoint)
inx += 1;
while (inx < font.supported_chars.len and
font.supported_chars[inx] < ending_codepoint)
{
if (previously_supported) |p| {
if (p[font.supported_chars[inx]]) {
inx += 1;
continue; // This was already supported - continue
}
}
// We found the beginning of a range
current_start = font.supported_chars[inx];
current_end = font.supported_chars[inx];
if (previously_supported) |p|
p[font.supported_chars[inx]] = true;
// Advance to the next supported character, then start checking for continuous ranges
inx += 1;
while (inx < font.supported_chars.len and
font.supported_chars[inx] == current_end + 1 and
font.supported_chars[inx] <= ending_codepoint and
(!exclude_previous or !previously_supported.?[font.supported_chars[inx]]))
{
if (previously_supported) |p|
p[font.supported_chars[inx]] = true;
inx += 1;
current_end += 1;
}
// We've found the end of the range (which could be the end of a group)
// If we have not hit the stops, inx at this point is at the beginning of
// a new range
range_count += 1;
try rc.append(.{
.font = font,
.starting_codepoint = current_start,
.ending_codepoint = current_end,
});
}
}
return rc.toOwnedSlice();
}
};
test {
std.testing.refAllDecls(@This()); // Only catches public decls
}
test "Get fonts" {
// std.testing.log_level = .debug;
log.debug("get fonts", .{});
var fq = FontQuery.init(std.testing.allocator);
defer fq.deinit();
var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
defer fl.deinit();
try std.testing.expect(fl.list.items.len > 0);
var matched = blk: {
for (fl.list.items) |item| {
log.debug("full_name: '{s}'", .{item.full_name});
if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
break :blk item;
}
break :blk null;
};
try std.testing.expect(matched != null);
try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
}
test {
// if (test_should_deinit) deinit();
deinit();
}

391
src/main.zig Normal file
View File

@ -0,0 +1,391 @@
const std = @import("std");
const builtin = @import("builtin");
const unicode = @import("unicode.zig");
const fontconfig = @import("fontconfig.zig");
const max_unicode: u21 = 0x10FFFD;
const all_chars = blk: {
var all: [max_unicode + 1]u21 = undefined;
@setEvalBranchQuota(max_unicode);
for (0..max_unicode) |i|
all[i] = i;
break :blk all;
};
pub fn main() !u8 {
// TODO: Add back in
// defer fontconfig.deinit();
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
const stdout_file = std.io.getStdOut().writer();
var bw = std.io.bufferedWriter(stdout_file);
defer bw.flush() catch @panic("could not flush stdout"); // don't forget to flush!
const stdout = bw.writer();
// std.os.argv is os specific
var arg_iterator = std.process.args();
const arg0 = arg_iterator.next().?;
const options = parseCommandLine(&arg_iterator) catch |err| {
if (err == error.UserRequestedHelp) {
try usage(stdout, arg0);
return 0;
}
try usage(std.io.getStdErr().writer(), arg0);
return 2;
};
var unicode_ranges = unicode.all_ranges();
if (options.list_groups) {
defer unicode_ranges.reset();
while (unicode_ranges.next()) |range| {
try stdout.print("{s}", .{range.name});
for (range.name.len..unicode_ranges.longest_name_len + 2) |_|
try stdout.writeByte(' ');
try stdout.print("U+{X} - U+{X}\n", .{ range.starting_codepoint, range.ending_codepoint });
}
return 0;
}
if (options.list_fonts) {
var fq = fontconfig.FontQuery.init(allocator);
defer fq.deinit();
var fl = try fq.fontList(options.pattern);
var longest_family_name = @as(usize, 0);
var longest_style_name = @as(usize, 0);
for (fl.list.items) |f| {
longest_family_name = @max(f.family.len, longest_family_name);
longest_style_name = @max(f.style.len, longest_style_name);
}
std.sort.insertion(fontconfig.Font, fl.list.items, {}, cmpFont);
for (fl.list.items) |f| {
try stdout.print("Family: {s}", .{f.family});
for (f.family.len..longest_family_name + 1) |_|
try stdout.writeByte(' ');
try stdout.print("Chars: {d:5}\tStyle: {s}", .{ f.supported_chars.len, f.style });
for (f.style.len..longest_style_name + 1) |_|
try stdout.writeByte(' ');
try stdout.print("\tName: {s}\n", .{
f.full_name,
});
}
return 0;
}
const exclude_previous = options.fonts != null;
const fonts: []fontconfig.Font = blk: {
if (options.fonts == null) break :blk &[_]fontconfig.Font{};
const fo = options.fonts.?;
var si = std.mem.splitScalar(u8, fo, ',');
var fq = fontconfig.FontQuery.init(allocator);
defer fq.deinit();
var fl = try fq.fontList(options.pattern);
// This messes with data after, and we don't need to deinit anyway
// defer fl.deinit();
var al = try std.ArrayList(fontconfig.Font).initCapacity(allocator, std.mem.count(u8, fo, ",") + 2);
defer al.deinit();
while (si.next()) |font_str| {
const font = font_blk: {
for (fl.list.items) |f|
if (std.ascii.eqlIgnoreCase(f.family, font_str))
break :font_blk f;
try std.io.getStdErr().writer().print("Error: Font '{s}' not installed", .{font_str});
return 255;
};
al.appendAssumeCapacity(font);
}
al.appendAssumeCapacity(.{
.full_name = "Unsupported",
.family = "Unsupported by any preferred font",
.style = "Regular",
.supported_chars = &all_chars,
});
break :blk try al.toOwnedSlice();
};
const order_by_range = if (std.ascii.eqlIgnoreCase("font", options.order))
false
else if (std.ascii.eqlIgnoreCase("range", options.order))
true
else
null;
if (order_by_range == null) {
try std.io.getStdErr().writer().print("Error: Order type '{s}' invalid", .{options.order});
return 255;
}
std.log.debug("{0} prefered fonts:", .{fonts.len - 1});
for (fonts[0 .. fonts.len - 1]) |f|
std.log.debug("\t{s}", .{f.family});
if (options.groups) |group| {
while (unicode_ranges.next()) |range| {
var it = std.mem.splitScalar(u8, group, ',');
while (it.next()) |desired_group| {
if (std.mem.eql(u8, range.name, desired_group)) {
try outputRange(
allocator,
range.starting_codepoint,
range.ending_codepoint,
fonts,
exclude_previous,
order_by_range.?,
stdout,
);
}
}
}
} else {
try outputRange(
allocator,
0,
max_unicode,
fonts,
exclude_previous,
order_by_range.?,
stdout,
);
}
return 0;
}
fn cmpFont(context: void, a: fontconfig.Font, b: fontconfig.Font) bool {
_ = context;
return std.mem.order(u8, a.family, b.family) == .lt; // a.family < b.family;
}
fn cmpRangeList(context: void, a: fontconfig.RangeFont, b: fontconfig.RangeFont) bool {
_ = context;
return a.starting_codepoint < b.starting_codepoint;
}
fn formatRangeFontEndingCodepoint(
data: fontconfig.RangeFont,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = options;
if (data.starting_codepoint == data.ending_codepoint) return;
try std.fmt.format(writer, "-{" ++ fmt ++ "}", .{data.ending_codepoint});
}
fn fmtRangeFontEndingCodepoint(range_font: fontconfig.RangeFont) std.fmt.Formatter(formatRangeFontEndingCodepoint) {
return .{
.data = range_font,
};
}
fn outputRange(
allocator: std.mem.Allocator,
starting_codepoint: u21,
ending_codepoint: u21,
fonts: []const fontconfig.Font,
exclude_previous: bool,
order_by_range: bool,
writer: anytype,
) !void {
var fq = fontconfig.FontQuery.init(allocator);
defer fq.deinit();
var range_fonts = try fq.fontsForRange(starting_codepoint, ending_codepoint, fonts, exclude_previous); // do we want hard limits around this?
defer allocator.free(range_fonts);
std.log.debug("Got {d} range fonts back from query", .{range_fonts.len});
if (order_by_range)
std.sort.insertion(fontconfig.RangeFont, range_fonts, {}, cmpRangeList);
for (range_fonts) |range_font| {
try writer.print("{s}U+{x}{x}={s}\n", .{
if (std.mem.eql(u8, range_font.font.full_name, "Unsupported")) "#" else "",
range_font.starting_codepoint,
fmtRangeFontEndingCodepoint(range_font), //.ending_codepoint,
range_font.font.family,
});
}
}
const Options = struct {
end_of_options_signifier: ?usize = null,
groups: ?[]const u8 = null,
fonts: ?[]const u8 = &[_]u8{},
list_groups: bool = false,
list_fonts: bool = false,
pattern: [:0]const u8 = ":regular:normal:spacing=100:slant=0",
order: [:0]const u8 = "font",
};
fn usage(writer: anytype, arg0: []const u8) !void {
try writer.print(
\\usage: {s} [OPTION]...
\\
\\Options:
\\ -p, --pattern font pattern to use (Default: :regular:normal:spacing=100:slant=0)
\\ -g, --groups group names to process, comma delimited (e.g. Thai,Lao - default is all groups)
\\ -f, --fonts prefered fonts in order, comma delimited (e.g. "DejaVu Sans Mono,Hack Nerd Font" - default is all fonts)
\\ note this will change the behavior such that ranges supported by the first font found will not
\\ be considered for use by subsequent fonts
\\ -o, --order order by (Default: font, can also order by range)
\\ -G, --list-groups list all groups and exit
\\ -F, --list-fonts list all fonts matching pattern and exit
\\ -h, --help display this help text and exit
\\
, .{arg0});
}
fn parseCommandLine(arg_iterator: anytype) !Options {
var current_arg: usize = 0;
var rc = Options{};
while (arg_iterator.next()) |arg| {
if (std.mem.eql(u8, arg, "--")) {
rc.end_of_options_signifier = current_arg + 1;
return rc;
}
if (try getArgValue(arg_iterator, arg, "groups", "g", .{})) |val| {
rc.groups = val;
} else if (try getArgValue(arg_iterator, arg, "pattern", "p", .{})) |val| {
rc.pattern = val;
} else if (try getArgValue(arg_iterator, arg, "fonts", "f", .{})) |val| {
rc.fonts = val;
} else if (try getArgValue(arg_iterator, arg, "order", "o", .{})) |val| {
rc.order = val;
} else if (try getArgValue(arg_iterator, arg, "list-groups", "G", .{ .is_bool = true })) |_| {
rc.list_groups = true;
} else if (try getArgValue(arg_iterator, arg, "list-fonts", "F", .{ .is_bool = true })) |_| {
rc.list_fonts = true;
} else if (try getArgValue(arg_iterator, arg, "help", "h", .{ .is_bool = true })) |_| {
return error.UserRequestedHelp;
} else {
if (!builtin.is_test)
try std.io.getStdErr().writer().print("invalid option: {s}\n\n", .{arg});
return error.InvalidOption;
}
current_arg += 1;
}
return rc;
}
const ArgOptions = struct {
is_bool: bool = false,
is_required: bool = false,
};
fn getArgValue(
arg_iterator: anytype,
arg: [:0]const u8,
comptime name: ?[]const u8,
comptime short_name: ?[]const u8,
arg_options: ArgOptions,
) !?[:0]const u8 {
if (short_name) |s| {
if (std.mem.eql(u8, "-" ++ s, arg)) {
if (arg_options.is_bool) return arg;
if (arg_iterator.next()) |val| {
return val;
} else return error.NoValueOnFlag;
}
}
if (name) |n| {
if (std.mem.eql(u8, "--" ++ n, arg)) {
if (arg_options.is_bool) return "";
if (arg_iterator.next()) |val| {
return val;
} else return error.NoValueOnName;
}
if (std.mem.startsWith(u8, arg, "--" ++ n ++ "=")) {
if (arg_options.is_bool) return error.EqualsInvalidForBooleanArgument;
return arg[("--" ++ n ++ "=").len.. :0];
}
}
return null;
}
// Tests run in this order:
//
// 1. Main file
// - In order, from top to bottom
// 2. Referenced file(s), if any
// - In order, from top to bottom
//
// libfontconfig gets inconsistent in a hurry with a lot of init/deinit, so
// we only want to deinit once. Because we have no way of saying "go do other
// tests, then come back", we have no way of controlling deinitialization other
// than something that's not super obvious. So, we're adding this comment.
// We will allow fontconfig tests to do our deinit() call, and we shall ignore
// deinitialization here
test "startup" {
// std.testing.log_level = .debug;
}
test "command line parses with short name" {
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "-g Latin-1");
defer it.deinit();
const options = try parseCommandLine(&it);
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
}
test "command line parses with long name no equals" {
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups Latin-1");
defer it.deinit();
const options = try parseCommandLine(&it);
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
}
test "command line parses with long name equals" {
var log_level = std.testing.log_level;
defer std.testing.log_level = log_level;
std.testing.log_level = .debug;
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups=Latin-1");
defer it.deinit();
const options = try parseCommandLine(&it);
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
}
test "Get ranges" {
std.log.debug("get ranges", .{});
// defer fontconfig.deinit();
var fq = fontconfig.FontQuery.init(std.testing.allocator);
defer fq.deinit();
var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
defer fl.deinit();
try std.testing.expect(fl.list.items.len > 0);
var matched = blk: {
for (fl.list.items) |item| {
std.log.debug("full_name: '{s}'", .{item.full_name});
if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
break :blk item;
}
break :blk null;
};
try std.testing.expect(matched != null);
const arr: []const fontconfig.Font = &[_]fontconfig.Font{matched.?};
var al = std.ArrayList(u8).init(std.testing.allocator);
defer al.deinit();
const range_name = "Basic Latin";
var matched_range = try blk: {
var unicode_ranges = unicode.all_ranges();
while (unicode_ranges.next()) |range| {
var it = std.mem.splitScalar(u8, range_name, ',');
while (it.next()) |desired_range| {
if (std.mem.eql(u8, range.name, desired_range)) {
break :blk range;
}
}
}
break :blk error.RangeNotFound;
};
var log_level = std.testing.log_level;
std.testing.log_level = .debug;
defer std.testing.log_level = log_level;
try outputRange(std.testing.allocator, matched_range.starting_codepoint, matched_range.ending_codepoint, arr, false, al.writer());
try std.testing.expectEqualStrings(al.items, "U+20-7e=DejaVu Sans Mono\n");
std.log.debug("\nwhole unicode space:", .{});
try outputRange(std.testing.allocator, 0, max_unicode, arr, false, al.writer());
const expected =
\\U+20-7e=DejaVu Sans Mono
\\U+20-7e=DejaVu Sans Mono
\\U+a0-1c3=DejaVu Sans Mono
\\U+1cd-1e3=DejaVu Sans Mono
\\U+1e6-1f0=DejaVu Sans Mono
\\U+1f4-1f6=DejaVu Sans Mono
;
try std.testing.expectStringStartsWith(al.items, expected);
// try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
}
test "teardown, followed by libraries" {
std.testing.refAllDecls(@This()); // Only catches public decls
_ = @import("unicode.zig");
}

209
src/ranges.txt Normal file
View File

@ -0,0 +1,209 @@
Basic Latin U+0 - U+7F
Latin-1 Supplement U+80 - U+FF
Latin Extended-A U+100 - U+17F
Latin Extended-B U+180 - U+24F
IPA Extensions U+250 - U+2AF
Spacing Modifier Letters U+2B0 - U+2FF
Combining Diacritical Marks U+300 - U+36F
Greek and Coptic U+370 - U+3FF
Cyrillic U+400 - U+4FF
Cyrillic Supplement U+500 - U+527
Armenian U+531 - U+58A
Hebrew U+591 - U+5F4
Arabic U+600 - U+6FF
Syriac U+700 - U+74F
Arabic Supplement U+750 - U+77F
Thaana U+780 - U+7B1
NKo U+7C0 - U+7FA
Samaritan U+800 - U+83E
Mandaic U+840 - U+85E
Devanagari U+900 - U+97F
Bengali U+981 - U+9FB
Gurmukhi U+A01 - U+A75
Gujarati U+A81 - U+AF1
Oriya U+B01 - U+B77
Tamil U+B82 - U+BFA
Telugu U+C01 - U+C7F
Kannada U+C82 - U+CF2
Malayalam U+D02 - U+D7F
Sinhala U+D82 - U+DF4
Thai U+E01 - U+E5B
Lao U+E81 - U+EDD
Tibetan U+F00 - U+FDA
Myanmar U+1000 - U+109F
Georgian U+10A0 - U+10FC
Hangul Jamo U+1100 - U+11FF
Ethiopic U+1200 - U+137C
Ethiopic Supplement U+1380 - U+1399
Cherokee U+13A0 - U+13F4
Unified Canadian Aboriginal Syllabics U+1400 - U+167F
Ogham U+1680 - U+169C
Runic U+16A0 - U+16F0
Tagalog U+1700 - U+1714
Hanunoo U+1720 - U+1736
Buhid U+1740 - U+1753
Tagbanwa U+1760 - U+1773
Khmer U+1780 - U+17F9
Mongolian U+1800 - U+18AA
Unified Canadian Aboriginal Syllabics Extended U+18B0 - U+18F5
Limbu U+1900 - U+194F
Tai Le U+1950 - U+1974
New Tai Lue U+1980 - U+19DF
Khmer Symbols U+19E0 - U+19FF
Buginese U+1A00 - U+1A1F
Tai Tham U+1A20 - U+1AAD
Balinese U+1B00 - U+1B7C
Sundanese U+1B80 - U+1BB9
Batak U+1BC0 - U+1BFF
Lepcha U+1C00 - U+1C4F
Ol Chiki U+1C50 - U+1C7F
Vedic Extensions U+1CD0 - U+1CF2
Phonetic Extensions U+1D00 - U+1D7F
Phonetic Extensions Supplement U+1D80 - U+1DBF
Combining Diacritical Marks Supplement U+1DC0 - U+1DFF
Latin Extended Additional U+1E00 - U+1EFF
Greek Extended U+1F00 - U+1FFE
General Punctuation U+2000 - U+206F
Superscripts and Subscripts U+2070 - U+209C
Currency Symbols U+20A0 - U+20B9
Combining Diacritical Marks for Symbols U+20D0 - U+20F0
Letterlike Symbols U+2100 - U+214F
Number Forms U+2150 - U+2189
Arrows U+2190 - U+21FF
Mathematical Operators U+2200 - U+22FF
Miscellaneous Technical U+2300 - U+23F3
Control Pictures U+2400 - U+2426
Optical Character Recognition U+2440 - U+244A
Enclosed Alphanumerics U+2460 - U+24FF
Box Drawing U+2500 - U+257F
Block Elements U+2580 - U+259F
Geometric Shapes U+25A0 - U+25FF
Miscellaneous Symbols U+2600 - U+26FF
Dingbats U+2701 - U+27BF
Miscellaneous Mathematical Symbols-A U+27C0 - U+27EF
Supplemental Arrows-A U+27F0 - U+27FF
Braille Patterns U+2800 - U+28FF
Supplemental Arrows-B U+2900 - U+297F
Miscellaneous Mathematical Symbols-B U+2980 - U+29FF
Supplemental Mathematical Operators U+2A00 - U+2AFF
Miscellaneous Symbols and Arrows U+2B00 - U+2B59
Glagolitic U+2C00 - U+2C5E
Latin Extended-C U+2C60 - U+2C7F
Coptic U+2C80 - U+2CFF
Georgian Supplement U+2D00 - U+2D25
Tifinagh U+2D30 - U+2D7F
Ethiopic Extended U+2D80 - U+2DDE
Cyrillic Extended-A U+2DE0 - U+2DFF
Supplemental Punctuation U+2E00 - U+2E31
CJK Radicals Supplement U+2E80 - U+2EF3
Kangxi Radicals U+2F00 - U+2FD5
Ideographic Description Characters U+2FF0 - U+2FFB
CJK Symbols and Punctuation U+3000 - U+303F
Hiragana U+3041 - U+309F
Katakana U+30A0 - U+30FF
Bopomofo U+3105 - U+312D
Hangul Compatibility Jamo U+3131 - U+318E
Kanbun U+3190 - U+319F
Bopomofo Extended U+31A0 - U+31BA
CJK Strokes U+31C0 - U+31E3
Katakana Phonetic Extensions U+31F0 - U+31FF
Enclosed CJK Letters and Months U+3200 - U+32FE
CJK Compatibility U+3300 - U+33FF
CJK Unified Ideographs Extension A U+3400 - U+4DB5
Yijing Hexagram Symbols U+4DC0 - U+4DFF
CJK Unified Ideographs U+4E00 - U+9FCB
Yi Syllables U+A000 - U+A48C
Yi Radicals U+A490 - U+A4C6
Lisu U+A4D0 - U+A4FF
Vai U+A500 - U+A62B
Cyrillic Extended-B U+A640 - U+A697
Bamum U+A6A0 - U+A6F7
Modifier Tone Letters U+A700 - U+A71F
Latin Extended-D U+A720 - U+A7FF
Syloti Nagri U+A800 - U+A82B
Common Indic Number Forms U+A830 - U+A839
Phags-pa U+A840 - U+A877
Saurashtra U+A880 - U+A8D9
Devanagari Extended U+A8E0 - U+A8FB
Kayah Li U+A900 - U+A92F
Rejang U+A930 - U+A95F
Hangul Jamo Extended-A U+A960 - U+A97C
Javanese U+A980 - U+A9DF
Cham U+AA00 - U+AA5F
Myanmar Extended-A U+AA60 - U+AA7B
Tai Viet U+AA80 - U+AADF
Ethiopic Extended-A U+AB01 - U+AB2E
Meetei Mayek U+ABC0 - U+ABF9
Hangul Syllables U+AC00 - U+D7A3
Hangul Jamo Extended-B U+D7B0 - U+D7FB
High Surrogates U+D800 - U+DB7F
High Private Use Surrogates U+DB80 - U+DBFF
Low Surrogates U+DC00 - U+DFFF
Private Use Area U+E000 - U+F8FF
CJK Compatibility Ideographs U+F900 - U+FAD9
Alphabetic Presentation Forms U+FB00 - U+FB4F
Arabic Presentation Forms-A U+FB50 - U+FDFD
Variation Selectors U+FE00 - U+FE0F
Vertical Forms U+FE10 - U+FE19
Combining Half Marks U+FE20 - U+FE26
CJK Compatibility Forms U+FE30 - U+FE4F
Small Form Variants U+FE50 - U+FE6B
Arabic Presentation Forms-B U+FE70 - U+FEFF
Halfwidth and Fullwidth Forms U+FF01 - U+FFEE
Specials U+FFF9 - U+FFFD
Linear B Syllabary U+10000 - U+1005D
Linear B Ideograms U+10080 - U+100FA
Aegean Numbers U+10100 - U+1013F
Ancient Greek Numbers U+10140 - U+1018A
Ancient Symbols U+10190 - U+1019B
Phaistos Disc U+101D0 - U+101FD
Lycian U+10280 - U+1029C
Carian U+102A0 - U+102D0
Old Italic U+10300 - U+10323
Gothic U+10330 - U+1034A
Ugaritic U+10380 - U+1039F
Old Persian U+103A0 - U+103D5
Deseret U+10400 - U+1044F
Shavian U+10450 - U+1047F
Osmanya U+10480 - U+104A9
Cypriot Syllabary U+10800 - U+1083F
Imperial Aramaic U+10840 - U+1085F
Phoenician U+10900 - U+1091F
Lydian U+10920 - U+1093F
Kharoshthi U+10A00 - U+10A58
Old South Arabian U+10A60 - U+10A7F
Avestan U+10B00 - U+10B3F
Inscriptional Parthian U+10B40 - U+10B5F
Inscriptional Pahlavi U+10B60 - U+10B7F
Old Turkic U+10C00 - U+10C48
Rumi Numeral Symbols U+10E60 - U+10E7E
Brahmi U+11000 - U+1106F
Kaithi U+11080 - U+110C1
Cuneiform U+12000 - U+1236E
Cuneiform Numbers and Punctuation U+12400 - U+12473
Egyptian Hieroglyphs U+13000 - U+1342E
Bamum Supplement U+16800 - U+16A38
Kana Supplement U+1B000 - U+1B001
Byzantine Musical Symbols U+1D000 - U+1D0F5
Musical Symbols U+1D100 - U+1D1DD
Ancient Greek Musical Notation U+1D200 - U+1D245
Tai Xuan Jing Symbols U+1D300 - U+1D356
Counting Rod Numerals U+1D360 - U+1D371
Mathematical Alphanumeric Symbols U+1D400 - U+1D7FF
Mahjong Tiles U+1F000 - U+1F02B
Domino Tiles U+1F030 - U+1F093
Playing Cards U+1F0A0 - U+1F0DF
Enclosed Alphanumeric Supplement U+1F100 - U+1F1FF
Enclosed Ideographic Supplement U+1F200 - U+1F251
Miscellaneous Symbols And Pictographs U+1F300 - U+1F5FF
Emoticons U+1F601 - U+1F64F
Transport And Map Symbols U+1F680 - U+1F6C5
Alchemical Symbols U+1F700 - U+1F773
CJK Unified Ideographs Extension B U+20000 - U+2A6D6
CJK Unified Ideographs Extension C U+2A700 - U+2B734
CJK Unified Ideographs Extension D U+2B740 - U+2B81D
CJK Compatibility Ideographs Supplement U+2F800 - U+2FA1D
Tags U+E0001 - U+E007F
Variation Selectors Supplement U+E0100 - U+E01EF
Supplementary Private Use Area-A U+F0000 - U+FFFFD
Supplementary Private Use Area-B U+100000 - U+10FFFD

112
src/unicode.zig Normal file
View File

@ -0,0 +1,112 @@
const std = @import("std");
// Pulled from: https://www.unicodepedia.com/groups/
const ranges = @embedFile("ranges.txt");
const eval_branch_quota_base = 18500;
const range_count = blk: {
// This should be related to the number of characters in our embedded file above
@setEvalBranchQuota(eval_branch_quota_base);
break :blk std.mem.count(u8, ranges, "\n");
};
const Ranges = struct {
names: [range_count][]const u8 = undefined,
starting_codepoints: [range_count]u21 = undefined,
ending_codepoints: [range_count]u21 = undefined,
current_inx: usize = 0,
longest_name_len: usize = 0,
const Self = @This();
pub fn first(self: *Self) ?UnicodeGroup {
self.reset();
return self.next();
}
pub fn reset(self: *Self) void {
self.current_inx = 0;
}
pub fn next(self: *Self) ?UnicodeGroup {
if (self.current_inx == range_count) return null;
self.current_inx += 1;
return self.item(self.current_inx - 1);
}
pub fn item(self: Self, index: usize) UnicodeGroup {
return .{
.name = self.names[index],
.starting_codepoint = self.starting_codepoints[index],
.ending_codepoint = self.ending_codepoints[index],
};
}
};
const _all_ranges = blk: {
@setEvalBranchQuota(eval_branch_quota_base * 2);
break :blk parseRanges(ranges) catch @compileError("Could not parse ranges.txt");
};
pub fn all_ranges() Ranges {
return .{
.names = _all_ranges.names,
.starting_codepoints = _all_ranges.starting_codepoints,
.ending_codepoints = _all_ranges.ending_codepoints,
.longest_name_len = _all_ranges.longest_name_len,
};
}
pub const UnicodeGroup = struct {
name: []const u8,
starting_codepoint: u21,
ending_codepoint: u21,
};
fn parseRanges(text: []const u8) !Ranges {
var rc = Ranges{};
var iterator = std.mem.splitSequence(u8, text, "\n");
var inx: usize = 0;
while (iterator.next()) |group|
if (group.len > 0) {
const uc = try parseGroup(group);
rc.names[inx] = uc.name;
rc.starting_codepoints[inx] = uc.starting_codepoint;
rc.ending_codepoints[inx] = uc.ending_codepoint;
rc.longest_name_len = @max(rc.longest_name_len, uc.name.len);
inx += 1;
};
return rc;
}
fn parseGroup(group_text: []const u8) !UnicodeGroup {
// Basic Latin U+0 - U+7F
var iterator = std.mem.splitSequence(u8, group_text, "\t");
const name = std.mem.trimRight(u8, iterator.first(), " ");
const range_text = iterator.next() orelse {
std.log.err("failed parsing on group '{s}'", .{group_text});
return error.NoRangeSpecifiedInGroup;
};
var range_iterator = std.mem.splitSequence(u8, range_text, " - ");
const start_text = range_iterator.first();
const end_text = range_iterator.next() orelse return error.NoEndingCodepointInGroup;
return UnicodeGroup{
.name = name,
.starting_codepoint = try std.fmt.parseUnsigned(u21, start_text[2..], 16),
.ending_codepoint = try std.fmt.parseUnsigned(u21, end_text[2..], 16),
};
}
test "check ranges" {
var parsed_ranges = all_ranges();
// Entry 8 should be:
// Cyrillic U+400 - U+4FF
try std.testing.expectEqual(@as(u21, 0x400), parsed_ranges.starting_codepoints[8]);
try std.testing.expectEqual(@as(u21, 0x4ff), parsed_ranges.ending_codepoints[8]);
try std.testing.expectEqualStrings("Cyrillic", parsed_ranges.names[8]);
var range = parsed_ranges.first().?;
try std.testing.expectEqualStrings("Basic Latin", range.name);
try std.testing.expectEqual(@as(u21, 0x0), range.starting_codepoint);
try std.testing.expectEqual(@as(u21, 0x7f), range.ending_codepoint);
range = parsed_ranges.next().?;
try std.testing.expectEqualStrings("Latin-1 Supplement", range.name);
try std.testing.expectEqual(@as(u21, 0x80), range.starting_codepoint);
try std.testing.expectEqual(@as(u21, 0xff), range.ending_codepoint);
}

4
zig-via-docker Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
scriptpath="$( cd "$(dirname "$0")" ; pwd -P )"
# podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder-alpine "$@"
podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder "$@"