initial source code
This commit is contained in:
parent
c731c3a015
commit
5feee8c0c3
14
Dockerfile
Normal file
14
Dockerfile
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
FROM debian:bullseye
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
libfontconfig-dev \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
xz-utils \
|
||||||
|
&& curl https://mirror.bazel.build/ziglang.org/builds/zig-linux-x86_64-0.11.0-dev.3886+0c1bfe271.tar.xz | tar -C /usr/local/ -xJ \
|
||||||
|
&& apt-get -y remove curl xz-utils \
|
||||||
|
&& ln -s /usr/local/zig*/zig /usr/local/bin \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/zig"]
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 Emil Lerch
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
86
build.zig
Normal file
86
build.zig
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
// Although this function looks imperative, note that its job is to
|
||||||
|
// declaratively construct a build graph that will be executed by an external
|
||||||
|
// runner.
|
||||||
|
pub fn build(b: *std.Build) void {
|
||||||
|
// Standard target options allows the person running `zig build` to choose
|
||||||
|
// what target to build for. Here we do not override the defaults, which
|
||||||
|
// means any target is allowed, and the default is native. Other options
|
||||||
|
// for restricting supported target set are available.
|
||||||
|
const target = b.standardTargetOptions(.{});
|
||||||
|
|
||||||
|
// Standard optimization options allow the person running `zig build` to select
|
||||||
|
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
|
||||||
|
// set a preferred release mode, allowing the user to decide how to optimize.
|
||||||
|
const optimize = b.standardOptimizeOption(.{});
|
||||||
|
|
||||||
|
const exe = b.addExecutable(.{
|
||||||
|
.name = "fontfinder",
|
||||||
|
// In this case the main source file is merely a path, however, in more
|
||||||
|
// complicated build scripts, this could be a generated file.
|
||||||
|
.root_source_file = .{ .path = "src/main.zig" },
|
||||||
|
.target = target,
|
||||||
|
.optimize = optimize,
|
||||||
|
});
|
||||||
|
|
||||||
|
configure(exe);
|
||||||
|
|
||||||
|
// This declares intent for the executable to be installed into the
|
||||||
|
// standard location when the user invokes the "install" step (the default
|
||||||
|
// step when running `zig build`).
|
||||||
|
b.installArtifact(exe);
|
||||||
|
|
||||||
|
// This *creates* a Run step in the build graph, to be executed when another
|
||||||
|
// step is evaluated that depends on it. The next line below will establish
|
||||||
|
// such a dependency.
|
||||||
|
const run_cmd = b.addRunArtifact(exe);
|
||||||
|
|
||||||
|
// By making the run step depend on the install step, it will be run from the
|
||||||
|
// installation directory rather than directly from within the cache directory.
|
||||||
|
// This is not necessary, however, if the application depends on other installed
|
||||||
|
// files, this ensures they will be present and in the expected location.
|
||||||
|
run_cmd.step.dependOn(b.getInstallStep());
|
||||||
|
|
||||||
|
// This allows the user to pass arguments to the application in the build
|
||||||
|
// command itself, like this: `zig build run -- arg1 arg2 etc`
|
||||||
|
if (b.args) |args| {
|
||||||
|
run_cmd.addArgs(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This creates a build step. It will be visible in the `zig build --help` menu,
|
||||||
|
// and can be selected like this: `zig build run`
|
||||||
|
// This will evaluate the `run` step rather than the default, which is "install".
|
||||||
|
const run_step = b.step("run", "Run the app");
|
||||||
|
run_step.dependOn(&run_cmd.step);
|
||||||
|
|
||||||
|
// Creates a step for unit testing. This only builds the test executable
|
||||||
|
// but does not run it.
|
||||||
|
const unit_tests = b.addTest(.{
|
||||||
|
.root_source_file = .{ .path = "src/main.zig" },
|
||||||
|
.target = target,
|
||||||
|
.optimize = optimize,
|
||||||
|
});
|
||||||
|
|
||||||
|
configure(unit_tests);
|
||||||
|
|
||||||
|
const run_unit_tests = b.addRunArtifact(unit_tests);
|
||||||
|
|
||||||
|
// Similar to creating the run step earlier, this exposes a `test` step to
|
||||||
|
// the `zig build --help` menu, providing a way for the user to request
|
||||||
|
// running the unit tests.
|
||||||
|
const test_step = b.step("test", "Run unit tests");
|
||||||
|
test_step.dependOn(&run_unit_tests.step);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn configure(object: anytype) void {
|
||||||
|
// object.linkage = .static;
|
||||||
|
object.linkLibC();
|
||||||
|
|
||||||
|
// Fontconfig must be installed. Docker can also be used (see Dockerfile)
|
||||||
|
object.addSystemIncludePath("/usr/include");
|
||||||
|
object.linkSystemLibrary("fontconfig");
|
||||||
|
// object.linkSystemLibrary("expat"); // fontconfig dependency - needed for static builds
|
||||||
|
object.addLibraryPath("/usr/lib");
|
||||||
|
object.addCSourceFile("src/fontconfig.c", &[_][]const u8{"-std=c99"});
|
||||||
|
}
|
54
src/fontconfig.c
Normal file
54
src/fontconfig.c
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <fontconfig/fontconfig.h>
|
||||||
|
|
||||||
|
/* #<{(| FcChar32 FcCharSetCount (const FcCharSet *a); |)}># */
|
||||||
|
/* void printCharacters(FcPattern* fontPattern) { */
|
||||||
|
/* FcCharSet* charset; */
|
||||||
|
/* if (FcPatternGetCharSet(fontPattern, FC_CHARSET, 0, &charset) == FcResultMatch) { */
|
||||||
|
/* FcChar32 ucs4; */
|
||||||
|
/* FcCharSetIter iter; */
|
||||||
|
/* FcCharSetIterInit(charset, &iter); */
|
||||||
|
/* printf("Supported characters:\n"); */
|
||||||
|
/* while (FcCharSetIterNext(&iter, &ucs4)) { */
|
||||||
|
/* printf("%lc ", (wint_t)ucs4); */
|
||||||
|
/* } */
|
||||||
|
/* printf("\n"); */
|
||||||
|
/* FcCharSetDestroy(charset); */
|
||||||
|
/* } */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
const FcChar32 MAX_UNICODE = 0x10FFFD;
|
||||||
|
|
||||||
|
void freeAllCharacters(unsigned int *chars) {
|
||||||
|
free(chars);
|
||||||
|
}
|
||||||
|
|
||||||
|
int allCharacters(void* fontPattern, FcChar32 ** chars) {
|
||||||
|
FcPattern* pat = (FcPattern*) fontPattern;
|
||||||
|
FcCharSet* charset;
|
||||||
|
if (FcPatternGetCharSet(pat, FC_CHARSET, 0, &charset) != FcResultMatch) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
FcChar32 count = FcCharSetCount(charset);
|
||||||
|
unsigned int* char_array = (unsigned int*)malloc(count * sizeof(unsigned int));
|
||||||
|
*chars = char_array;
|
||||||
|
|
||||||
|
FcChar32 ucs4 = 0;
|
||||||
|
size_t found = 0;
|
||||||
|
size_t inx = 0;
|
||||||
|
|
||||||
|
while (found < count && inx < MAX_UNICODE) {
|
||||||
|
if (FcCharSetHasChar(charset, inx) == FcTrue) {
|
||||||
|
char_array[ucs4] = inx;
|
||||||
|
ucs4++;
|
||||||
|
found++;
|
||||||
|
}
|
||||||
|
inx++;
|
||||||
|
}
|
||||||
|
FcCharSetDestroy(charset);
|
||||||
|
if (found < count) {
|
||||||
|
freeAllCharacters(*chars);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
return ucs4;
|
||||||
|
}
|
284
src/fontconfig.zig
Normal file
284
src/fontconfig.zig
Normal file
|
@ -0,0 +1,284 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const unicode = @import("unicode.zig");
|
||||||
|
const c = @cImport({
|
||||||
|
@cInclude("fontconfig/fontconfig.h");
|
||||||
|
});
|
||||||
|
const log = std.log.scoped(.fontconfig);
|
||||||
|
|
||||||
|
extern fn allCharacters(p: ?*const c.FcPattern, chars: *[*]u32) c_int;
|
||||||
|
extern fn freeAllCharacters(chars: *[*]usize) void;
|
||||||
|
|
||||||
|
pub const RangeFont = struct {
|
||||||
|
starting_codepoint: u21,
|
||||||
|
ending_codepoint: u21,
|
||||||
|
font: Font,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Font = struct {
|
||||||
|
full_name: []const u8,
|
||||||
|
family: []const u8,
|
||||||
|
style: []const u8,
|
||||||
|
supported_chars: []const u21,
|
||||||
|
|
||||||
|
const Self = @This();
|
||||||
|
|
||||||
|
pub fn deinit(self: *Self) void {
|
||||||
|
freeAllCharacters(self.supported_chars.ptr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const FontList = struct {
|
||||||
|
list: std.ArrayList(Font),
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
pattern: *c.FcPattern,
|
||||||
|
fontset: *c.FcFontSet,
|
||||||
|
|
||||||
|
const Self = @This();
|
||||||
|
pub fn initCapacity(allocator: std.mem.Allocator, num: usize, pattern: *c.FcPattern, fontset: *c.FcFontSet) std.mem.Allocator.Error!Self {
|
||||||
|
var al = try std.ArrayList(Font).initCapacity(allocator, num);
|
||||||
|
return Self{
|
||||||
|
.allocator = allocator,
|
||||||
|
.list = al,
|
||||||
|
.pattern = pattern,
|
||||||
|
.fontset = fontset,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit(self: *Self) void {
|
||||||
|
c.FcPatternDestroy(self.pattern);
|
||||||
|
c.FcFontSetDestroy(self.fontset);
|
||||||
|
self.list.deinit();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn addFontAssumeCapacity(
|
||||||
|
self: *Self,
|
||||||
|
full_name: []const u8,
|
||||||
|
family: []const u8,
|
||||||
|
style: []const u8,
|
||||||
|
supported_chars: []const u21,
|
||||||
|
) !void {
|
||||||
|
self.list.appendAssumeCapacity(.{
|
||||||
|
.full_name = full_name,
|
||||||
|
.family = family,
|
||||||
|
.style = style,
|
||||||
|
.supported_chars = supported_chars,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var fc_config: ?*c.FcConfig = null;
|
||||||
|
var deinited = false;
|
||||||
|
// pub var test_should_deinit = true;
|
||||||
|
/// De-initializes the underlying c library. Should only be called
|
||||||
|
/// after all processing has completed
|
||||||
|
pub fn deinit() void {
|
||||||
|
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r2370.html
|
||||||
|
// Says that "Note that calling this function with the return from FcConfigGetCurrent will place the library in an indeterminate state."
|
||||||
|
// However, it seems as though you can't do this either:
|
||||||
|
//
|
||||||
|
// 1. c.FcInitLoadConfigAndFonts();
|
||||||
|
// 2. c.FcConfigDestroy();
|
||||||
|
// 3. c.FcInitLoadConfigAndFonts();
|
||||||
|
// 4. c.FcConfigDestroy(); // Seg fault here
|
||||||
|
if (deinited) @panic("Cannot deinitialize this library more than once");
|
||||||
|
deinited = true;
|
||||||
|
if (fc_config) |conf| {
|
||||||
|
log.debug("destroying config: do not use library or call me again", .{});
|
||||||
|
c.FcConfigDestroy(conf);
|
||||||
|
}
|
||||||
|
fc_config = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const FontQuery = struct {
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
// fc_config: ?*c.FcConfig = null,
|
||||||
|
|
||||||
|
const Self = @This();
|
||||||
|
|
||||||
|
pub fn init(allocator: std.mem.Allocator) Self {
|
||||||
|
return Self{
|
||||||
|
.allocator = allocator,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
pub fn deinit(self: *Self) void {
|
||||||
|
_ = self;
|
||||||
|
// if (self.all_fonts) |a| a.deinit();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fontList(self: *Self, pattern: [:0]const u8) !FontList {
|
||||||
|
if (fc_config == null and deinited) @panic("fontconfig C library is in an inconsistent state - should not use");
|
||||||
|
if (fc_config == null) fc_config = c.FcInitLoadConfigAndFonts();
|
||||||
|
const config = if (fc_config) |conf| conf else return error.FontConfigInitLoadFailure;
|
||||||
|
|
||||||
|
// Pretty sure we want this...
|
||||||
|
const pat = c.FcNameParse(pattern);
|
||||||
|
// We cannot destroy the pattern until we're completely done
|
||||||
|
// This will be managed by FontList object
|
||||||
|
// defer if (pat != null) c.FcPatternDestroy(pat);
|
||||||
|
|
||||||
|
// const pat = c.FcPatternCreate(); // *FcPattern
|
||||||
|
// defer if (pat != null) c.FcPatternDestroy(pat);
|
||||||
|
//
|
||||||
|
// // FC_WEIGHT_NORMAL is 80
|
||||||
|
// // This is equivalent to "regular" style
|
||||||
|
// if (c.FcPatternAddInteger(pat, c.FC_WEIGHT, c.FC_WEIGHT_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||||
|
//
|
||||||
|
// // This is "normal" vs Bold or Italic
|
||||||
|
// if (c.FcPatternAddInteger(pat, c.FC_WIDTH, c.FC_WIDTH_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||||
|
//
|
||||||
|
// // Monospaced fonts
|
||||||
|
// if (c.FcPatternAddInteger(pat, c.FC_SPACING, c.FC_MONO) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||||
|
//
|
||||||
|
// // FC_SLANT_ROMAN is 0 (italic 100, oblique 110)
|
||||||
|
// if (c.FcPatternAddInteger(pat, c.FC_SLANT, c.FC_SLANT_ROMAN) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||||
|
//
|
||||||
|
const os = c.FcObjectSetBuild(c.FC_FAMILY, c.FC_STYLE, c.FC_LANG, c.FC_FULLNAME, c.FC_CHARSET, @as(?*u8, null)); // *FcObjectSet
|
||||||
|
defer if (os != null) c.FcObjectSetDestroy(os);
|
||||||
|
const fs = c.FcFontList(config, pat, os); // FcFontSet
|
||||||
|
// TODO: Move this defer into deinit
|
||||||
|
// defer if (fs != null) c.FcFontSetDestroy(fs);
|
||||||
|
|
||||||
|
// Use the following only when needed. NameUnparse allocates memory
|
||||||
|
// log.debug("Total matching fonts: {d}. Pattern: {s}\n", .{ fs.*.nfont, c.FcNameUnparse(pat) });
|
||||||
|
log.debug("Total matching fonts: {d}", .{fs.*.nfont});
|
||||||
|
var rc = try FontList.initCapacity(self.allocator, @as(usize, @intCast(fs.*.nfont)), pat.?, fs.?);
|
||||||
|
errdefer rc.deinit();
|
||||||
|
for (0..@as(usize, @intCast(fs.*.nfont))) |i| {
|
||||||
|
const font = fs.*.fonts[i].?; // *FcPattern
|
||||||
|
var fullname: [*:0]c.FcChar8 = undefined;
|
||||||
|
var style: [*:0]c.FcChar8 = undefined;
|
||||||
|
var family: [*:0]c.FcChar8 = undefined;
|
||||||
|
|
||||||
|
var charset: [*]u21 = undefined;
|
||||||
|
const len = allCharacters(font, @ptrCast(&charset));
|
||||||
|
if (len < 0) return error.FontConfigCouldNotGetCharSet;
|
||||||
|
|
||||||
|
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r600.html
|
||||||
|
// Note that these (like FcPatternGet) do not make a copy of any data structure referenced by the return value
|
||||||
|
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r570.html
|
||||||
|
// The value returned is not a copy, but rather refers to the data stored within the pattern directly. Applications must not free this value.
|
||||||
|
if (c.FcPatternGetString(font, c.FC_FULLNAME, 0, @as([*c][*c]c.FcChar8, @ptrCast(&fullname))) != c.FcResultMatch)
|
||||||
|
fullname = @constCast(@ptrCast("".ptr));
|
||||||
|
// return error.FontConfigCouldNotGetFontFullName;
|
||||||
|
|
||||||
|
if (c.FcPatternGetString(font, c.FC_FAMILY, 0, @as([*c][*c]c.FcChar8, @ptrCast(&family))) != c.FcResultMatch)
|
||||||
|
return error.FontConfigHasNoFamily;
|
||||||
|
if (c.FcPatternGetString(font, c.FC_STYLE, 0, @as([*c][*c]c.FcChar8, @ptrCast(&style))) != c.FcResultMatch)
|
||||||
|
return error.FontConfigHasNoStyle;
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"Chars: {d:5.0} Family '{s}' Style '{s}' Full Name: {s}",
|
||||||
|
.{ @as(usize, @intCast(len)), family, style, fullname },
|
||||||
|
);
|
||||||
|
|
||||||
|
try rc.addFontAssumeCapacity(
|
||||||
|
fullname[0..std.mem.len(fullname)],
|
||||||
|
family[0..std.mem.len(family)],
|
||||||
|
style[0..std.mem.len(style)],
|
||||||
|
charset[0..@as(usize, @intCast(len))],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fontsForRange(
|
||||||
|
self: *Self,
|
||||||
|
starting_codepoint: u21,
|
||||||
|
ending_codepoint: u21,
|
||||||
|
fonts: []const Font,
|
||||||
|
exclude_previous: bool,
|
||||||
|
) ![]RangeFont {
|
||||||
|
// const group_len = group.ending_codepoint - group.starting_codepoint;
|
||||||
|
var rc = std.ArrayList(RangeFont).init(self.allocator);
|
||||||
|
defer rc.deinit();
|
||||||
|
|
||||||
|
var previously_supported = blk: {
|
||||||
|
if (!exclude_previous) break :blk null;
|
||||||
|
var al = try std.ArrayList(bool).initCapacity(self.allocator, ending_codepoint - starting_codepoint);
|
||||||
|
defer al.deinit();
|
||||||
|
for (starting_codepoint..ending_codepoint) |_|
|
||||||
|
al.appendAssumeCapacity(false);
|
||||||
|
break :blk try al.toOwnedSlice();
|
||||||
|
};
|
||||||
|
defer if (previously_supported) |p| self.allocator.free(p);
|
||||||
|
|
||||||
|
for (fonts) |font| {
|
||||||
|
var current_start = @as(u21, 0);
|
||||||
|
var current_end = @as(u21, 0);
|
||||||
|
var inx = @as(usize, 0);
|
||||||
|
|
||||||
|
var range_count = @as(usize, 0);
|
||||||
|
// Advance to the start of the range
|
||||||
|
while (inx < font.supported_chars.len and
|
||||||
|
font.supported_chars[inx] < starting_codepoint)
|
||||||
|
inx += 1;
|
||||||
|
|
||||||
|
while (inx < font.supported_chars.len and
|
||||||
|
font.supported_chars[inx] < ending_codepoint)
|
||||||
|
{
|
||||||
|
if (previously_supported) |p| {
|
||||||
|
if (p[font.supported_chars[inx]]) {
|
||||||
|
inx += 1;
|
||||||
|
continue; // This was already supported - continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We found the beginning of a range
|
||||||
|
current_start = font.supported_chars[inx];
|
||||||
|
current_end = font.supported_chars[inx];
|
||||||
|
if (previously_supported) |p|
|
||||||
|
p[font.supported_chars[inx]] = true;
|
||||||
|
|
||||||
|
// Advance to the next supported character, then start checking for continuous ranges
|
||||||
|
inx += 1;
|
||||||
|
while (inx < font.supported_chars.len and
|
||||||
|
font.supported_chars[inx] == current_end + 1 and
|
||||||
|
font.supported_chars[inx] <= ending_codepoint and
|
||||||
|
(!exclude_previous or !previously_supported.?[font.supported_chars[inx]]))
|
||||||
|
{
|
||||||
|
if (previously_supported) |p|
|
||||||
|
p[font.supported_chars[inx]] = true;
|
||||||
|
inx += 1;
|
||||||
|
current_end += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We've found the end of the range (which could be the end of a group)
|
||||||
|
// If we have not hit the stops, inx at this point is at the beginning of
|
||||||
|
// a new range
|
||||||
|
range_count += 1;
|
||||||
|
try rc.append(.{
|
||||||
|
.font = font,
|
||||||
|
.starting_codepoint = current_start,
|
||||||
|
.ending_codepoint = current_end,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rc.toOwnedSlice();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
test {
|
||||||
|
std.testing.refAllDecls(@This()); // Only catches public decls
|
||||||
|
}
|
||||||
|
test "Get fonts" {
|
||||||
|
// std.testing.log_level = .debug;
|
||||||
|
log.debug("get fonts", .{});
|
||||||
|
var fq = FontQuery.init(std.testing.allocator);
|
||||||
|
defer fq.deinit();
|
||||||
|
var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
|
||||||
|
defer fl.deinit();
|
||||||
|
try std.testing.expect(fl.list.items.len > 0);
|
||||||
|
var matched = blk: {
|
||||||
|
for (fl.list.items) |item| {
|
||||||
|
log.debug("full_name: '{s}'", .{item.full_name});
|
||||||
|
if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
|
||||||
|
break :blk item;
|
||||||
|
}
|
||||||
|
break :blk null;
|
||||||
|
};
|
||||||
|
try std.testing.expect(matched != null);
|
||||||
|
try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
|
||||||
|
}
|
||||||
|
test {
|
||||||
|
// if (test_should_deinit) deinit();
|
||||||
|
deinit();
|
||||||
|
}
|
391
src/main.zig
Normal file
391
src/main.zig
Normal file
|
@ -0,0 +1,391 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const builtin = @import("builtin");
|
||||||
|
const unicode = @import("unicode.zig");
|
||||||
|
const fontconfig = @import("fontconfig.zig");
|
||||||
|
|
||||||
|
const max_unicode: u21 = 0x10FFFD;
|
||||||
|
const all_chars = blk: {
|
||||||
|
var all: [max_unicode + 1]u21 = undefined;
|
||||||
|
@setEvalBranchQuota(max_unicode);
|
||||||
|
for (0..max_unicode) |i|
|
||||||
|
all[i] = i;
|
||||||
|
break :blk all;
|
||||||
|
};
|
||||||
|
pub fn main() !u8 {
|
||||||
|
// TODO: Add back in
|
||||||
|
// defer fontconfig.deinit();
|
||||||
|
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||||
|
defer arena.deinit();
|
||||||
|
const allocator = arena.allocator();
|
||||||
|
|
||||||
|
// stdout is for the actual output of your application, for example if you
|
||||||
|
// are implementing gzip, then only the compressed bytes should be sent to
|
||||||
|
// stdout, not any debugging messages.
|
||||||
|
const stdout_file = std.io.getStdOut().writer();
|
||||||
|
var bw = std.io.bufferedWriter(stdout_file);
|
||||||
|
defer bw.flush() catch @panic("could not flush stdout"); // don't forget to flush!
|
||||||
|
const stdout = bw.writer();
|
||||||
|
|
||||||
|
// std.os.argv is os specific
|
||||||
|
var arg_iterator = std.process.args();
|
||||||
|
const arg0 = arg_iterator.next().?;
|
||||||
|
const options = parseCommandLine(&arg_iterator) catch |err| {
|
||||||
|
if (err == error.UserRequestedHelp) {
|
||||||
|
try usage(stdout, arg0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
try usage(std.io.getStdErr().writer(), arg0);
|
||||||
|
return 2;
|
||||||
|
};
|
||||||
|
|
||||||
|
var unicode_ranges = unicode.all_ranges();
|
||||||
|
if (options.list_groups) {
|
||||||
|
defer unicode_ranges.reset();
|
||||||
|
while (unicode_ranges.next()) |range| {
|
||||||
|
try stdout.print("{s}", .{range.name});
|
||||||
|
for (range.name.len..unicode_ranges.longest_name_len + 2) |_|
|
||||||
|
try stdout.writeByte(' ');
|
||||||
|
try stdout.print("U+{X} - U+{X}\n", .{ range.starting_codepoint, range.ending_codepoint });
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (options.list_fonts) {
|
||||||
|
var fq = fontconfig.FontQuery.init(allocator);
|
||||||
|
defer fq.deinit();
|
||||||
|
var fl = try fq.fontList(options.pattern);
|
||||||
|
var longest_family_name = @as(usize, 0);
|
||||||
|
var longest_style_name = @as(usize, 0);
|
||||||
|
for (fl.list.items) |f| {
|
||||||
|
longest_family_name = @max(f.family.len, longest_family_name);
|
||||||
|
longest_style_name = @max(f.style.len, longest_style_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
std.sort.insertion(fontconfig.Font, fl.list.items, {}, cmpFont);
|
||||||
|
for (fl.list.items) |f| {
|
||||||
|
try stdout.print("Family: {s}", .{f.family});
|
||||||
|
for (f.family.len..longest_family_name + 1) |_|
|
||||||
|
try stdout.writeByte(' ');
|
||||||
|
try stdout.print("Chars: {d:5}\tStyle: {s}", .{ f.supported_chars.len, f.style });
|
||||||
|
for (f.style.len..longest_style_name + 1) |_|
|
||||||
|
try stdout.writeByte(' ');
|
||||||
|
try stdout.print("\tName: {s}\n", .{
|
||||||
|
f.full_name,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const exclude_previous = options.fonts != null;
|
||||||
|
const fonts: []fontconfig.Font = blk: {
|
||||||
|
if (options.fonts == null) break :blk &[_]fontconfig.Font{};
|
||||||
|
const fo = options.fonts.?;
|
||||||
|
var si = std.mem.splitScalar(u8, fo, ',');
|
||||||
|
var fq = fontconfig.FontQuery.init(allocator);
|
||||||
|
defer fq.deinit();
|
||||||
|
var fl = try fq.fontList(options.pattern);
|
||||||
|
// This messes with data after, and we don't need to deinit anyway
|
||||||
|
// defer fl.deinit();
|
||||||
|
var al = try std.ArrayList(fontconfig.Font).initCapacity(allocator, std.mem.count(u8, fo, ",") + 2);
|
||||||
|
defer al.deinit();
|
||||||
|
while (si.next()) |font_str| {
|
||||||
|
const font = font_blk: {
|
||||||
|
for (fl.list.items) |f|
|
||||||
|
if (std.ascii.eqlIgnoreCase(f.family, font_str))
|
||||||
|
break :font_blk f;
|
||||||
|
try std.io.getStdErr().writer().print("Error: Font '{s}' not installed", .{font_str});
|
||||||
|
return 255;
|
||||||
|
};
|
||||||
|
|
||||||
|
al.appendAssumeCapacity(font);
|
||||||
|
}
|
||||||
|
al.appendAssumeCapacity(.{
|
||||||
|
.full_name = "Unsupported",
|
||||||
|
.family = "Unsupported by any preferred font",
|
||||||
|
.style = "Regular",
|
||||||
|
.supported_chars = &all_chars,
|
||||||
|
});
|
||||||
|
break :blk try al.toOwnedSlice();
|
||||||
|
};
|
||||||
|
|
||||||
|
const order_by_range = if (std.ascii.eqlIgnoreCase("font", options.order))
|
||||||
|
false
|
||||||
|
else if (std.ascii.eqlIgnoreCase("range", options.order))
|
||||||
|
true
|
||||||
|
else
|
||||||
|
null;
|
||||||
|
if (order_by_range == null) {
|
||||||
|
try std.io.getStdErr().writer().print("Error: Order type '{s}' invalid", .{options.order});
|
||||||
|
return 255;
|
||||||
|
}
|
||||||
|
std.log.debug("{0} prefered fonts:", .{fonts.len - 1});
|
||||||
|
for (fonts[0 .. fonts.len - 1]) |f|
|
||||||
|
std.log.debug("\t{s}", .{f.family});
|
||||||
|
if (options.groups) |group| {
|
||||||
|
while (unicode_ranges.next()) |range| {
|
||||||
|
var it = std.mem.splitScalar(u8, group, ',');
|
||||||
|
while (it.next()) |desired_group| {
|
||||||
|
if (std.mem.eql(u8, range.name, desired_group)) {
|
||||||
|
try outputRange(
|
||||||
|
allocator,
|
||||||
|
range.starting_codepoint,
|
||||||
|
range.ending_codepoint,
|
||||||
|
fonts,
|
||||||
|
exclude_previous,
|
||||||
|
order_by_range.?,
|
||||||
|
stdout,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try outputRange(
|
||||||
|
allocator,
|
||||||
|
0,
|
||||||
|
max_unicode,
|
||||||
|
fonts,
|
||||||
|
exclude_previous,
|
||||||
|
order_by_range.?,
|
||||||
|
stdout,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
fn cmpFont(context: void, a: fontconfig.Font, b: fontconfig.Font) bool {
|
||||||
|
_ = context;
|
||||||
|
return std.mem.order(u8, a.family, b.family) == .lt; // a.family < b.family;
|
||||||
|
}
|
||||||
|
fn cmpRangeList(context: void, a: fontconfig.RangeFont, b: fontconfig.RangeFont) bool {
|
||||||
|
_ = context;
|
||||||
|
return a.starting_codepoint < b.starting_codepoint;
|
||||||
|
}
|
||||||
|
fn formatRangeFontEndingCodepoint(
|
||||||
|
data: fontconfig.RangeFont,
|
||||||
|
comptime fmt: []const u8,
|
||||||
|
options: std.fmt.FormatOptions,
|
||||||
|
writer: anytype,
|
||||||
|
) !void {
|
||||||
|
_ = options;
|
||||||
|
if (data.starting_codepoint == data.ending_codepoint) return;
|
||||||
|
try std.fmt.format(writer, "-{" ++ fmt ++ "}", .{data.ending_codepoint});
|
||||||
|
}
|
||||||
|
fn fmtRangeFontEndingCodepoint(range_font: fontconfig.RangeFont) std.fmt.Formatter(formatRangeFontEndingCodepoint) {
|
||||||
|
return .{
|
||||||
|
.data = range_font,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
fn outputRange(
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
starting_codepoint: u21,
|
||||||
|
ending_codepoint: u21,
|
||||||
|
fonts: []const fontconfig.Font,
|
||||||
|
exclude_previous: bool,
|
||||||
|
order_by_range: bool,
|
||||||
|
writer: anytype,
|
||||||
|
) !void {
|
||||||
|
var fq = fontconfig.FontQuery.init(allocator);
|
||||||
|
defer fq.deinit();
|
||||||
|
var range_fonts = try fq.fontsForRange(starting_codepoint, ending_codepoint, fonts, exclude_previous); // do we want hard limits around this?
|
||||||
|
defer allocator.free(range_fonts);
|
||||||
|
|
||||||
|
std.log.debug("Got {d} range fonts back from query", .{range_fonts.len});
|
||||||
|
if (order_by_range)
|
||||||
|
std.sort.insertion(fontconfig.RangeFont, range_fonts, {}, cmpRangeList);
|
||||||
|
|
||||||
|
for (range_fonts) |range_font| {
|
||||||
|
try writer.print("{s}U+{x}{x}={s}\n", .{
|
||||||
|
if (std.mem.eql(u8, range_font.font.full_name, "Unsupported")) "#" else "",
|
||||||
|
range_font.starting_codepoint,
|
||||||
|
fmtRangeFontEndingCodepoint(range_font), //.ending_codepoint,
|
||||||
|
range_font.font.family,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const Options = struct {
|
||||||
|
end_of_options_signifier: ?usize = null,
|
||||||
|
groups: ?[]const u8 = null,
|
||||||
|
fonts: ?[]const u8 = &[_]u8{},
|
||||||
|
list_groups: bool = false,
|
||||||
|
list_fonts: bool = false,
|
||||||
|
pattern: [:0]const u8 = ":regular:normal:spacing=100:slant=0",
|
||||||
|
order: [:0]const u8 = "font",
|
||||||
|
};
|
||||||
|
|
||||||
|
fn usage(writer: anytype, arg0: []const u8) !void {
|
||||||
|
try writer.print(
|
||||||
|
\\usage: {s} [OPTION]...
|
||||||
|
\\
|
||||||
|
\\Options:
|
||||||
|
\\ -p, --pattern font pattern to use (Default: :regular:normal:spacing=100:slant=0)
|
||||||
|
\\ -g, --groups group names to process, comma delimited (e.g. Thai,Lao - default is all groups)
|
||||||
|
\\ -f, --fonts prefered fonts in order, comma delimited (e.g. "DejaVu Sans Mono,Hack Nerd Font" - default is all fonts)
|
||||||
|
\\ note this will change the behavior such that ranges supported by the first font found will not
|
||||||
|
\\ be considered for use by subsequent fonts
|
||||||
|
\\ -o, --order order by (Default: font, can also order by range)
|
||||||
|
\\ -G, --list-groups list all groups and exit
|
||||||
|
\\ -F, --list-fonts list all fonts matching pattern and exit
|
||||||
|
\\ -h, --help display this help text and exit
|
||||||
|
\\
|
||||||
|
, .{arg0});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parseCommandLine(arg_iterator: anytype) !Options {
|
||||||
|
var current_arg: usize = 0;
|
||||||
|
var rc = Options{};
|
||||||
|
while (arg_iterator.next()) |arg| {
|
||||||
|
if (std.mem.eql(u8, arg, "--")) {
|
||||||
|
rc.end_of_options_signifier = current_arg + 1;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
if (try getArgValue(arg_iterator, arg, "groups", "g", .{})) |val| {
|
||||||
|
rc.groups = val;
|
||||||
|
} else if (try getArgValue(arg_iterator, arg, "pattern", "p", .{})) |val| {
|
||||||
|
rc.pattern = val;
|
||||||
|
} else if (try getArgValue(arg_iterator, arg, "fonts", "f", .{})) |val| {
|
||||||
|
rc.fonts = val;
|
||||||
|
} else if (try getArgValue(arg_iterator, arg, "order", "o", .{})) |val| {
|
||||||
|
rc.order = val;
|
||||||
|
} else if (try getArgValue(arg_iterator, arg, "list-groups", "G", .{ .is_bool = true })) |_| {
|
||||||
|
rc.list_groups = true;
|
||||||
|
} else if (try getArgValue(arg_iterator, arg, "list-fonts", "F", .{ .is_bool = true })) |_| {
|
||||||
|
rc.list_fonts = true;
|
||||||
|
} else if (try getArgValue(arg_iterator, arg, "help", "h", .{ .is_bool = true })) |_| {
|
||||||
|
return error.UserRequestedHelp;
|
||||||
|
} else {
|
||||||
|
if (!builtin.is_test)
|
||||||
|
try std.io.getStdErr().writer().print("invalid option: {s}\n\n", .{arg});
|
||||||
|
return error.InvalidOption;
|
||||||
|
}
|
||||||
|
current_arg += 1;
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
const ArgOptions = struct {
|
||||||
|
is_bool: bool = false,
|
||||||
|
is_required: bool = false,
|
||||||
|
};
|
||||||
|
fn getArgValue(
|
||||||
|
arg_iterator: anytype,
|
||||||
|
arg: [:0]const u8,
|
||||||
|
comptime name: ?[]const u8,
|
||||||
|
comptime short_name: ?[]const u8,
|
||||||
|
arg_options: ArgOptions,
|
||||||
|
) !?[:0]const u8 {
|
||||||
|
if (short_name) |s| {
|
||||||
|
if (std.mem.eql(u8, "-" ++ s, arg)) {
|
||||||
|
if (arg_options.is_bool) return arg;
|
||||||
|
if (arg_iterator.next()) |val| {
|
||||||
|
return val;
|
||||||
|
} else return error.NoValueOnFlag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (name) |n| {
|
||||||
|
if (std.mem.eql(u8, "--" ++ n, arg)) {
|
||||||
|
if (arg_options.is_bool) return "";
|
||||||
|
if (arg_iterator.next()) |val| {
|
||||||
|
return val;
|
||||||
|
} else return error.NoValueOnName;
|
||||||
|
}
|
||||||
|
if (std.mem.startsWith(u8, arg, "--" ++ n ++ "=")) {
|
||||||
|
if (arg_options.is_bool) return error.EqualsInvalidForBooleanArgument;
|
||||||
|
return arg[("--" ++ n ++ "=").len.. :0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tests run in this order:
|
||||||
|
//
|
||||||
|
// 1. Main file
|
||||||
|
// - In order, from top to bottom
|
||||||
|
// 2. Referenced file(s), if any
|
||||||
|
// - In order, from top to bottom
|
||||||
|
//
|
||||||
|
// libfontconfig gets inconsistent in a hurry with a lot of init/deinit, so
|
||||||
|
// we only want to deinit once. Because we have no way of saying "go do other
|
||||||
|
// tests, then come back", we have no way of controlling deinitialization other
|
||||||
|
// than something that's not super obvious. So, we're adding this comment.
|
||||||
|
// We will allow fontconfig tests to do our deinit() call, and we shall ignore
|
||||||
|
// deinitialization here
|
||||||
|
test "startup" {
|
||||||
|
// std.testing.log_level = .debug;
|
||||||
|
}
|
||||||
|
test "command line parses with short name" {
|
||||||
|
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "-g Latin-1");
|
||||||
|
defer it.deinit();
|
||||||
|
const options = try parseCommandLine(&it);
|
||||||
|
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
|
||||||
|
}
|
||||||
|
test "command line parses with long name no equals" {
|
||||||
|
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups Latin-1");
|
||||||
|
defer it.deinit();
|
||||||
|
const options = try parseCommandLine(&it);
|
||||||
|
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
|
||||||
|
}
|
||||||
|
test "command line parses with long name equals" {
|
||||||
|
var log_level = std.testing.log_level;
|
||||||
|
defer std.testing.log_level = log_level;
|
||||||
|
std.testing.log_level = .debug;
|
||||||
|
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups=Latin-1");
|
||||||
|
defer it.deinit();
|
||||||
|
const options = try parseCommandLine(&it);
|
||||||
|
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
|
||||||
|
}
|
||||||
|
test "Get ranges" {
|
||||||
|
std.log.debug("get ranges", .{});
|
||||||
|
// defer fontconfig.deinit();
|
||||||
|
var fq = fontconfig.FontQuery.init(std.testing.allocator);
|
||||||
|
defer fq.deinit();
|
||||||
|
var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
|
||||||
|
defer fl.deinit();
|
||||||
|
try std.testing.expect(fl.list.items.len > 0);
|
||||||
|
var matched = blk: {
|
||||||
|
for (fl.list.items) |item| {
|
||||||
|
std.log.debug("full_name: '{s}'", .{item.full_name});
|
||||||
|
if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
|
||||||
|
break :blk item;
|
||||||
|
}
|
||||||
|
break :blk null;
|
||||||
|
};
|
||||||
|
try std.testing.expect(matched != null);
|
||||||
|
const arr: []const fontconfig.Font = &[_]fontconfig.Font{matched.?};
|
||||||
|
var al = std.ArrayList(u8).init(std.testing.allocator);
|
||||||
|
defer al.deinit();
|
||||||
|
const range_name = "Basic Latin";
|
||||||
|
var matched_range = try blk: {
|
||||||
|
var unicode_ranges = unicode.all_ranges();
|
||||||
|
while (unicode_ranges.next()) |range| {
|
||||||
|
var it = std.mem.splitScalar(u8, range_name, ',');
|
||||||
|
while (it.next()) |desired_range| {
|
||||||
|
if (std.mem.eql(u8, range.name, desired_range)) {
|
||||||
|
break :blk range;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break :blk error.RangeNotFound;
|
||||||
|
};
|
||||||
|
var log_level = std.testing.log_level;
|
||||||
|
std.testing.log_level = .debug;
|
||||||
|
defer std.testing.log_level = log_level;
|
||||||
|
try outputRange(std.testing.allocator, matched_range.starting_codepoint, matched_range.ending_codepoint, arr, false, al.writer());
|
||||||
|
try std.testing.expectEqualStrings(al.items, "U+20-7e=DejaVu Sans Mono\n");
|
||||||
|
|
||||||
|
std.log.debug("\nwhole unicode space:", .{});
|
||||||
|
try outputRange(std.testing.allocator, 0, max_unicode, arr, false, al.writer());
|
||||||
|
const expected =
|
||||||
|
\\U+20-7e=DejaVu Sans Mono
|
||||||
|
\\U+20-7e=DejaVu Sans Mono
|
||||||
|
\\U+a0-1c3=DejaVu Sans Mono
|
||||||
|
\\U+1cd-1e3=DejaVu Sans Mono
|
||||||
|
\\U+1e6-1f0=DejaVu Sans Mono
|
||||||
|
\\U+1f4-1f6=DejaVu Sans Mono
|
||||||
|
;
|
||||||
|
try std.testing.expectStringStartsWith(al.items, expected);
|
||||||
|
|
||||||
|
// try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "teardown, followed by libraries" {
|
||||||
|
std.testing.refAllDecls(@This()); // Only catches public decls
|
||||||
|
_ = @import("unicode.zig");
|
||||||
|
}
|
209
src/ranges.txt
Normal file
209
src/ranges.txt
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
Basic Latin U+0 - U+7F
|
||||||
|
Latin-1 Supplement U+80 - U+FF
|
||||||
|
Latin Extended-A U+100 - U+17F
|
||||||
|
Latin Extended-B U+180 - U+24F
|
||||||
|
IPA Extensions U+250 - U+2AF
|
||||||
|
Spacing Modifier Letters U+2B0 - U+2FF
|
||||||
|
Combining Diacritical Marks U+300 - U+36F
|
||||||
|
Greek and Coptic U+370 - U+3FF
|
||||||
|
Cyrillic U+400 - U+4FF
|
||||||
|
Cyrillic Supplement U+500 - U+527
|
||||||
|
Armenian U+531 - U+58A
|
||||||
|
Hebrew U+591 - U+5F4
|
||||||
|
Arabic U+600 - U+6FF
|
||||||
|
Syriac U+700 - U+74F
|
||||||
|
Arabic Supplement U+750 - U+77F
|
||||||
|
Thaana U+780 - U+7B1
|
||||||
|
NKo U+7C0 - U+7FA
|
||||||
|
Samaritan U+800 - U+83E
|
||||||
|
Mandaic U+840 - U+85E
|
||||||
|
Devanagari U+900 - U+97F
|
||||||
|
Bengali U+981 - U+9FB
|
||||||
|
Gurmukhi U+A01 - U+A75
|
||||||
|
Gujarati U+A81 - U+AF1
|
||||||
|
Oriya U+B01 - U+B77
|
||||||
|
Tamil U+B82 - U+BFA
|
||||||
|
Telugu U+C01 - U+C7F
|
||||||
|
Kannada U+C82 - U+CF2
|
||||||
|
Malayalam U+D02 - U+D7F
|
||||||
|
Sinhala U+D82 - U+DF4
|
||||||
|
Thai U+E01 - U+E5B
|
||||||
|
Lao U+E81 - U+EDD
|
||||||
|
Tibetan U+F00 - U+FDA
|
||||||
|
Myanmar U+1000 - U+109F
|
||||||
|
Georgian U+10A0 - U+10FC
|
||||||
|
Hangul Jamo U+1100 - U+11FF
|
||||||
|
Ethiopic U+1200 - U+137C
|
||||||
|
Ethiopic Supplement U+1380 - U+1399
|
||||||
|
Cherokee U+13A0 - U+13F4
|
||||||
|
Unified Canadian Aboriginal Syllabics U+1400 - U+167F
|
||||||
|
Ogham U+1680 - U+169C
|
||||||
|
Runic U+16A0 - U+16F0
|
||||||
|
Tagalog U+1700 - U+1714
|
||||||
|
Hanunoo U+1720 - U+1736
|
||||||
|
Buhid U+1740 - U+1753
|
||||||
|
Tagbanwa U+1760 - U+1773
|
||||||
|
Khmer U+1780 - U+17F9
|
||||||
|
Mongolian U+1800 - U+18AA
|
||||||
|
Unified Canadian Aboriginal Syllabics Extended U+18B0 - U+18F5
|
||||||
|
Limbu U+1900 - U+194F
|
||||||
|
Tai Le U+1950 - U+1974
|
||||||
|
New Tai Lue U+1980 - U+19DF
|
||||||
|
Khmer Symbols U+19E0 - U+19FF
|
||||||
|
Buginese U+1A00 - U+1A1F
|
||||||
|
Tai Tham U+1A20 - U+1AAD
|
||||||
|
Balinese U+1B00 - U+1B7C
|
||||||
|
Sundanese U+1B80 - U+1BB9
|
||||||
|
Batak U+1BC0 - U+1BFF
|
||||||
|
Lepcha U+1C00 - U+1C4F
|
||||||
|
Ol Chiki U+1C50 - U+1C7F
|
||||||
|
Vedic Extensions U+1CD0 - U+1CF2
|
||||||
|
Phonetic Extensions U+1D00 - U+1D7F
|
||||||
|
Phonetic Extensions Supplement U+1D80 - U+1DBF
|
||||||
|
Combining Diacritical Marks Supplement U+1DC0 - U+1DFF
|
||||||
|
Latin Extended Additional U+1E00 - U+1EFF
|
||||||
|
Greek Extended U+1F00 - U+1FFE
|
||||||
|
General Punctuation U+2000 - U+206F
|
||||||
|
Superscripts and Subscripts U+2070 - U+209C
|
||||||
|
Currency Symbols U+20A0 - U+20B9
|
||||||
|
Combining Diacritical Marks for Symbols U+20D0 - U+20F0
|
||||||
|
Letterlike Symbols U+2100 - U+214F
|
||||||
|
Number Forms U+2150 - U+2189
|
||||||
|
Arrows U+2190 - U+21FF
|
||||||
|
Mathematical Operators U+2200 - U+22FF
|
||||||
|
Miscellaneous Technical U+2300 - U+23F3
|
||||||
|
Control Pictures U+2400 - U+2426
|
||||||
|
Optical Character Recognition U+2440 - U+244A
|
||||||
|
Enclosed Alphanumerics U+2460 - U+24FF
|
||||||
|
Box Drawing U+2500 - U+257F
|
||||||
|
Block Elements U+2580 - U+259F
|
||||||
|
Geometric Shapes U+25A0 - U+25FF
|
||||||
|
Miscellaneous Symbols U+2600 - U+26FF
|
||||||
|
Dingbats U+2701 - U+27BF
|
||||||
|
Miscellaneous Mathematical Symbols-A U+27C0 - U+27EF
|
||||||
|
Supplemental Arrows-A U+27F0 - U+27FF
|
||||||
|
Braille Patterns U+2800 - U+28FF
|
||||||
|
Supplemental Arrows-B U+2900 - U+297F
|
||||||
|
Miscellaneous Mathematical Symbols-B U+2980 - U+29FF
|
||||||
|
Supplemental Mathematical Operators U+2A00 - U+2AFF
|
||||||
|
Miscellaneous Symbols and Arrows U+2B00 - U+2B59
|
||||||
|
Glagolitic U+2C00 - U+2C5E
|
||||||
|
Latin Extended-C U+2C60 - U+2C7F
|
||||||
|
Coptic U+2C80 - U+2CFF
|
||||||
|
Georgian Supplement U+2D00 - U+2D25
|
||||||
|
Tifinagh U+2D30 - U+2D7F
|
||||||
|
Ethiopic Extended U+2D80 - U+2DDE
|
||||||
|
Cyrillic Extended-A U+2DE0 - U+2DFF
|
||||||
|
Supplemental Punctuation U+2E00 - U+2E31
|
||||||
|
CJK Radicals Supplement U+2E80 - U+2EF3
|
||||||
|
Kangxi Radicals U+2F00 - U+2FD5
|
||||||
|
Ideographic Description Characters U+2FF0 - U+2FFB
|
||||||
|
CJK Symbols and Punctuation U+3000 - U+303F
|
||||||
|
Hiragana U+3041 - U+309F
|
||||||
|
Katakana U+30A0 - U+30FF
|
||||||
|
Bopomofo U+3105 - U+312D
|
||||||
|
Hangul Compatibility Jamo U+3131 - U+318E
|
||||||
|
Kanbun U+3190 - U+319F
|
||||||
|
Bopomofo Extended U+31A0 - U+31BA
|
||||||
|
CJK Strokes U+31C0 - U+31E3
|
||||||
|
Katakana Phonetic Extensions U+31F0 - U+31FF
|
||||||
|
Enclosed CJK Letters and Months U+3200 - U+32FE
|
||||||
|
CJK Compatibility U+3300 - U+33FF
|
||||||
|
CJK Unified Ideographs Extension A U+3400 - U+4DB5
|
||||||
|
Yijing Hexagram Symbols U+4DC0 - U+4DFF
|
||||||
|
CJK Unified Ideographs U+4E00 - U+9FCB
|
||||||
|
Yi Syllables U+A000 - U+A48C
|
||||||
|
Yi Radicals U+A490 - U+A4C6
|
||||||
|
Lisu U+A4D0 - U+A4FF
|
||||||
|
Vai U+A500 - U+A62B
|
||||||
|
Cyrillic Extended-B U+A640 - U+A697
|
||||||
|
Bamum U+A6A0 - U+A6F7
|
||||||
|
Modifier Tone Letters U+A700 - U+A71F
|
||||||
|
Latin Extended-D U+A720 - U+A7FF
|
||||||
|
Syloti Nagri U+A800 - U+A82B
|
||||||
|
Common Indic Number Forms U+A830 - U+A839
|
||||||
|
Phags-pa U+A840 - U+A877
|
||||||
|
Saurashtra U+A880 - U+A8D9
|
||||||
|
Devanagari Extended U+A8E0 - U+A8FB
|
||||||
|
Kayah Li U+A900 - U+A92F
|
||||||
|
Rejang U+A930 - U+A95F
|
||||||
|
Hangul Jamo Extended-A U+A960 - U+A97C
|
||||||
|
Javanese U+A980 - U+A9DF
|
||||||
|
Cham U+AA00 - U+AA5F
|
||||||
|
Myanmar Extended-A U+AA60 - U+AA7B
|
||||||
|
Tai Viet U+AA80 - U+AADF
|
||||||
|
Ethiopic Extended-A U+AB01 - U+AB2E
|
||||||
|
Meetei Mayek U+ABC0 - U+ABF9
|
||||||
|
Hangul Syllables U+AC00 - U+D7A3
|
||||||
|
Hangul Jamo Extended-B U+D7B0 - U+D7FB
|
||||||
|
High Surrogates U+D800 - U+DB7F
|
||||||
|
High Private Use Surrogates U+DB80 - U+DBFF
|
||||||
|
Low Surrogates U+DC00 - U+DFFF
|
||||||
|
Private Use Area U+E000 - U+F8FF
|
||||||
|
CJK Compatibility Ideographs U+F900 - U+FAD9
|
||||||
|
Alphabetic Presentation Forms U+FB00 - U+FB4F
|
||||||
|
Arabic Presentation Forms-A U+FB50 - U+FDFD
|
||||||
|
Variation Selectors U+FE00 - U+FE0F
|
||||||
|
Vertical Forms U+FE10 - U+FE19
|
||||||
|
Combining Half Marks U+FE20 - U+FE26
|
||||||
|
CJK Compatibility Forms U+FE30 - U+FE4F
|
||||||
|
Small Form Variants U+FE50 - U+FE6B
|
||||||
|
Arabic Presentation Forms-B U+FE70 - U+FEFF
|
||||||
|
Halfwidth and Fullwidth Forms U+FF01 - U+FFEE
|
||||||
|
Specials U+FFF9 - U+FFFD
|
||||||
|
Linear B Syllabary U+10000 - U+1005D
|
||||||
|
Linear B Ideograms U+10080 - U+100FA
|
||||||
|
Aegean Numbers U+10100 - U+1013F
|
||||||
|
Ancient Greek Numbers U+10140 - U+1018A
|
||||||
|
Ancient Symbols U+10190 - U+1019B
|
||||||
|
Phaistos Disc U+101D0 - U+101FD
|
||||||
|
Lycian U+10280 - U+1029C
|
||||||
|
Carian U+102A0 - U+102D0
|
||||||
|
Old Italic U+10300 - U+10323
|
||||||
|
Gothic U+10330 - U+1034A
|
||||||
|
Ugaritic U+10380 - U+1039F
|
||||||
|
Old Persian U+103A0 - U+103D5
|
||||||
|
Deseret U+10400 - U+1044F
|
||||||
|
Shavian U+10450 - U+1047F
|
||||||
|
Osmanya U+10480 - U+104A9
|
||||||
|
Cypriot Syllabary U+10800 - U+1083F
|
||||||
|
Imperial Aramaic U+10840 - U+1085F
|
||||||
|
Phoenician U+10900 - U+1091F
|
||||||
|
Lydian U+10920 - U+1093F
|
||||||
|
Kharoshthi U+10A00 - U+10A58
|
||||||
|
Old South Arabian U+10A60 - U+10A7F
|
||||||
|
Avestan U+10B00 - U+10B3F
|
||||||
|
Inscriptional Parthian U+10B40 - U+10B5F
|
||||||
|
Inscriptional Pahlavi U+10B60 - U+10B7F
|
||||||
|
Old Turkic U+10C00 - U+10C48
|
||||||
|
Rumi Numeral Symbols U+10E60 - U+10E7E
|
||||||
|
Brahmi U+11000 - U+1106F
|
||||||
|
Kaithi U+11080 - U+110C1
|
||||||
|
Cuneiform U+12000 - U+1236E
|
||||||
|
Cuneiform Numbers and Punctuation U+12400 - U+12473
|
||||||
|
Egyptian Hieroglyphs U+13000 - U+1342E
|
||||||
|
Bamum Supplement U+16800 - U+16A38
|
||||||
|
Kana Supplement U+1B000 - U+1B001
|
||||||
|
Byzantine Musical Symbols U+1D000 - U+1D0F5
|
||||||
|
Musical Symbols U+1D100 - U+1D1DD
|
||||||
|
Ancient Greek Musical Notation U+1D200 - U+1D245
|
||||||
|
Tai Xuan Jing Symbols U+1D300 - U+1D356
|
||||||
|
Counting Rod Numerals U+1D360 - U+1D371
|
||||||
|
Mathematical Alphanumeric Symbols U+1D400 - U+1D7FF
|
||||||
|
Mahjong Tiles U+1F000 - U+1F02B
|
||||||
|
Domino Tiles U+1F030 - U+1F093
|
||||||
|
Playing Cards U+1F0A0 - U+1F0DF
|
||||||
|
Enclosed Alphanumeric Supplement U+1F100 - U+1F1FF
|
||||||
|
Enclosed Ideographic Supplement U+1F200 - U+1F251
|
||||||
|
Miscellaneous Symbols And Pictographs U+1F300 - U+1F5FF
|
||||||
|
Emoticons U+1F601 - U+1F64F
|
||||||
|
Transport And Map Symbols U+1F680 - U+1F6C5
|
||||||
|
Alchemical Symbols U+1F700 - U+1F773
|
||||||
|
CJK Unified Ideographs Extension B U+20000 - U+2A6D6
|
||||||
|
CJK Unified Ideographs Extension C U+2A700 - U+2B734
|
||||||
|
CJK Unified Ideographs Extension D U+2B740 - U+2B81D
|
||||||
|
CJK Compatibility Ideographs Supplement U+2F800 - U+2FA1D
|
||||||
|
Tags U+E0001 - U+E007F
|
||||||
|
Variation Selectors Supplement U+E0100 - U+E01EF
|
||||||
|
Supplementary Private Use Area-A U+F0000 - U+FFFFD
|
||||||
|
Supplementary Private Use Area-B U+100000 - U+10FFFD
|
112
src/unicode.zig
Normal file
112
src/unicode.zig
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
// Pulled from: https://www.unicodepedia.com/groups/
|
||||||
|
const ranges = @embedFile("ranges.txt");
|
||||||
|
const eval_branch_quota_base = 18500;
|
||||||
|
const range_count = blk: {
|
||||||
|
// This should be related to the number of characters in our embedded file above
|
||||||
|
@setEvalBranchQuota(eval_branch_quota_base);
|
||||||
|
break :blk std.mem.count(u8, ranges, "\n");
|
||||||
|
};
|
||||||
|
const Ranges = struct {
|
||||||
|
names: [range_count][]const u8 = undefined,
|
||||||
|
starting_codepoints: [range_count]u21 = undefined,
|
||||||
|
ending_codepoints: [range_count]u21 = undefined,
|
||||||
|
current_inx: usize = 0,
|
||||||
|
longest_name_len: usize = 0,
|
||||||
|
|
||||||
|
const Self = @This();
|
||||||
|
|
||||||
|
pub fn first(self: *Self) ?UnicodeGroup {
|
||||||
|
self.reset();
|
||||||
|
return self.next();
|
||||||
|
}
|
||||||
|
pub fn reset(self: *Self) void {
|
||||||
|
self.current_inx = 0;
|
||||||
|
}
|
||||||
|
pub fn next(self: *Self) ?UnicodeGroup {
|
||||||
|
if (self.current_inx == range_count) return null;
|
||||||
|
self.current_inx += 1;
|
||||||
|
return self.item(self.current_inx - 1);
|
||||||
|
}
|
||||||
|
pub fn item(self: Self, index: usize) UnicodeGroup {
|
||||||
|
return .{
|
||||||
|
.name = self.names[index],
|
||||||
|
.starting_codepoint = self.starting_codepoints[index],
|
||||||
|
.ending_codepoint = self.ending_codepoints[index],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const _all_ranges = blk: {
|
||||||
|
@setEvalBranchQuota(eval_branch_quota_base * 2);
|
||||||
|
break :blk parseRanges(ranges) catch @compileError("Could not parse ranges.txt");
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn all_ranges() Ranges {
|
||||||
|
return .{
|
||||||
|
.names = _all_ranges.names,
|
||||||
|
.starting_codepoints = _all_ranges.starting_codepoints,
|
||||||
|
.ending_codepoints = _all_ranges.ending_codepoints,
|
||||||
|
.longest_name_len = _all_ranges.longest_name_len,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const UnicodeGroup = struct {
|
||||||
|
name: []const u8,
|
||||||
|
starting_codepoint: u21,
|
||||||
|
ending_codepoint: u21,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn parseRanges(text: []const u8) !Ranges {
|
||||||
|
var rc = Ranges{};
|
||||||
|
var iterator = std.mem.splitSequence(u8, text, "\n");
|
||||||
|
var inx: usize = 0;
|
||||||
|
while (iterator.next()) |group|
|
||||||
|
if (group.len > 0) {
|
||||||
|
const uc = try parseGroup(group);
|
||||||
|
rc.names[inx] = uc.name;
|
||||||
|
rc.starting_codepoints[inx] = uc.starting_codepoint;
|
||||||
|
rc.ending_codepoints[inx] = uc.ending_codepoint;
|
||||||
|
rc.longest_name_len = @max(rc.longest_name_len, uc.name.len);
|
||||||
|
inx += 1;
|
||||||
|
};
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parseGroup(group_text: []const u8) !UnicodeGroup {
|
||||||
|
// Basic Latin U+0 - U+7F
|
||||||
|
var iterator = std.mem.splitSequence(u8, group_text, "\t");
|
||||||
|
const name = std.mem.trimRight(u8, iterator.first(), " ");
|
||||||
|
const range_text = iterator.next() orelse {
|
||||||
|
std.log.err("failed parsing on group '{s}'", .{group_text});
|
||||||
|
return error.NoRangeSpecifiedInGroup;
|
||||||
|
};
|
||||||
|
var range_iterator = std.mem.splitSequence(u8, range_text, " - ");
|
||||||
|
const start_text = range_iterator.first();
|
||||||
|
const end_text = range_iterator.next() orelse return error.NoEndingCodepointInGroup;
|
||||||
|
return UnicodeGroup{
|
||||||
|
.name = name,
|
||||||
|
.starting_codepoint = try std.fmt.parseUnsigned(u21, start_text[2..], 16),
|
||||||
|
.ending_codepoint = try std.fmt.parseUnsigned(u21, end_text[2..], 16),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test "check ranges" {
|
||||||
|
var parsed_ranges = all_ranges();
|
||||||
|
// Entry 8 should be:
|
||||||
|
// Cyrillic U+400 - U+4FF
|
||||||
|
try std.testing.expectEqual(@as(u21, 0x400), parsed_ranges.starting_codepoints[8]);
|
||||||
|
try std.testing.expectEqual(@as(u21, 0x4ff), parsed_ranges.ending_codepoints[8]);
|
||||||
|
try std.testing.expectEqualStrings("Cyrillic", parsed_ranges.names[8]);
|
||||||
|
|
||||||
|
var range = parsed_ranges.first().?;
|
||||||
|
try std.testing.expectEqualStrings("Basic Latin", range.name);
|
||||||
|
try std.testing.expectEqual(@as(u21, 0x0), range.starting_codepoint);
|
||||||
|
try std.testing.expectEqual(@as(u21, 0x7f), range.ending_codepoint);
|
||||||
|
|
||||||
|
range = parsed_ranges.next().?;
|
||||||
|
try std.testing.expectEqualStrings("Latin-1 Supplement", range.name);
|
||||||
|
try std.testing.expectEqual(@as(u21, 0x80), range.starting_codepoint);
|
||||||
|
try std.testing.expectEqual(@as(u21, 0xff), range.ending_codepoint);
|
||||||
|
}
|
4
zig-via-docker
Executable file
4
zig-via-docker
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/sh
|
||||||
|
scriptpath="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||||
|
# podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder-alpine "$@"
|
||||||
|
podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder "$@"
|
Loading…
Reference in New Issue
Block a user