initial source code
This commit is contained in:
parent
c731c3a015
commit
5feee8c0c3
14
Dockerfile
Normal file
14
Dockerfile
Normal file
|
@ -0,0 +1,14 @@
|
|||
FROM debian:bullseye
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
libfontconfig-dev \
|
||||
ca-certificates \
|
||||
curl \
|
||||
xz-utils \
|
||||
&& curl https://mirror.bazel.build/ziglang.org/builds/zig-linux-x86_64-0.11.0-dev.3886+0c1bfe271.tar.xz | tar -C /usr/local/ -xJ \
|
||||
&& apt-get -y remove curl xz-utils \
|
||||
&& ln -s /usr/local/zig*/zig /usr/local/bin \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/zig"]
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023 Emil Lerch
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
86
build.zig
Normal file
86
build.zig
Normal file
|
@ -0,0 +1,86 @@
|
|||
const std = @import("std");
|
||||
|
||||
// Although this function looks imperative, note that its job is to
|
||||
// declaratively construct a build graph that will be executed by an external
|
||||
// runner.
|
||||
pub fn build(b: *std.Build) void {
|
||||
// Standard target options allows the person running `zig build` to choose
|
||||
// what target to build for. Here we do not override the defaults, which
|
||||
// means any target is allowed, and the default is native. Other options
|
||||
// for restricting supported target set are available.
|
||||
const target = b.standardTargetOptions(.{});
|
||||
|
||||
// Standard optimization options allow the person running `zig build` to select
|
||||
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
|
||||
// set a preferred release mode, allowing the user to decide how to optimize.
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "fontfinder",
|
||||
// In this case the main source file is merely a path, however, in more
|
||||
// complicated build scripts, this could be a generated file.
|
||||
.root_source_file = .{ .path = "src/main.zig" },
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
configure(exe);
|
||||
|
||||
// This declares intent for the executable to be installed into the
|
||||
// standard location when the user invokes the "install" step (the default
|
||||
// step when running `zig build`).
|
||||
b.installArtifact(exe);
|
||||
|
||||
// This *creates* a Run step in the build graph, to be executed when another
|
||||
// step is evaluated that depends on it. The next line below will establish
|
||||
// such a dependency.
|
||||
const run_cmd = b.addRunArtifact(exe);
|
||||
|
||||
// By making the run step depend on the install step, it will be run from the
|
||||
// installation directory rather than directly from within the cache directory.
|
||||
// This is not necessary, however, if the application depends on other installed
|
||||
// files, this ensures they will be present and in the expected location.
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
|
||||
// This allows the user to pass arguments to the application in the build
|
||||
// command itself, like this: `zig build run -- arg1 arg2 etc`
|
||||
if (b.args) |args| {
|
||||
run_cmd.addArgs(args);
|
||||
}
|
||||
|
||||
// This creates a build step. It will be visible in the `zig build --help` menu,
|
||||
// and can be selected like this: `zig build run`
|
||||
// This will evaluate the `run` step rather than the default, which is "install".
|
||||
const run_step = b.step("run", "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
|
||||
// Creates a step for unit testing. This only builds the test executable
|
||||
// but does not run it.
|
||||
const unit_tests = b.addTest(.{
|
||||
.root_source_file = .{ .path = "src/main.zig" },
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
configure(unit_tests);
|
||||
|
||||
const run_unit_tests = b.addRunArtifact(unit_tests);
|
||||
|
||||
// Similar to creating the run step earlier, this exposes a `test` step to
|
||||
// the `zig build --help` menu, providing a way for the user to request
|
||||
// running the unit tests.
|
||||
const test_step = b.step("test", "Run unit tests");
|
||||
test_step.dependOn(&run_unit_tests.step);
|
||||
}
|
||||
|
||||
fn configure(object: anytype) void {
|
||||
// object.linkage = .static;
|
||||
object.linkLibC();
|
||||
|
||||
// Fontconfig must be installed. Docker can also be used (see Dockerfile)
|
||||
object.addSystemIncludePath("/usr/include");
|
||||
object.linkSystemLibrary("fontconfig");
|
||||
// object.linkSystemLibrary("expat"); // fontconfig dependency - needed for static builds
|
||||
object.addLibraryPath("/usr/lib");
|
||||
object.addCSourceFile("src/fontconfig.c", &[_][]const u8{"-std=c99"});
|
||||
}
|
54
src/fontconfig.c
Normal file
54
src/fontconfig.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
#include <stdlib.h>
|
||||
#include <fontconfig/fontconfig.h>
|
||||
|
||||
/* #<{(| FcChar32 FcCharSetCount (const FcCharSet *a); |)}># */
|
||||
/* void printCharacters(FcPattern* fontPattern) { */
|
||||
/* FcCharSet* charset; */
|
||||
/* if (FcPatternGetCharSet(fontPattern, FC_CHARSET, 0, &charset) == FcResultMatch) { */
|
||||
/* FcChar32 ucs4; */
|
||||
/* FcCharSetIter iter; */
|
||||
/* FcCharSetIterInit(charset, &iter); */
|
||||
/* printf("Supported characters:\n"); */
|
||||
/* while (FcCharSetIterNext(&iter, &ucs4)) { */
|
||||
/* printf("%lc ", (wint_t)ucs4); */
|
||||
/* } */
|
||||
/* printf("\n"); */
|
||||
/* FcCharSetDestroy(charset); */
|
||||
/* } */
|
||||
/* } */
|
||||
|
||||
const FcChar32 MAX_UNICODE = 0x10FFFD;
|
||||
|
||||
void freeAllCharacters(unsigned int *chars) {
|
||||
free(chars);
|
||||
}
|
||||
|
||||
int allCharacters(void* fontPattern, FcChar32 ** chars) {
|
||||
FcPattern* pat = (FcPattern*) fontPattern;
|
||||
FcCharSet* charset;
|
||||
if (FcPatternGetCharSet(pat, FC_CHARSET, 0, &charset) != FcResultMatch) {
|
||||
return -1;
|
||||
}
|
||||
FcChar32 count = FcCharSetCount(charset);
|
||||
unsigned int* char_array = (unsigned int*)malloc(count * sizeof(unsigned int));
|
||||
*chars = char_array;
|
||||
|
||||
FcChar32 ucs4 = 0;
|
||||
size_t found = 0;
|
||||
size_t inx = 0;
|
||||
|
||||
while (found < count && inx < MAX_UNICODE) {
|
||||
if (FcCharSetHasChar(charset, inx) == FcTrue) {
|
||||
char_array[ucs4] = inx;
|
||||
ucs4++;
|
||||
found++;
|
||||
}
|
||||
inx++;
|
||||
}
|
||||
FcCharSetDestroy(charset);
|
||||
if (found < count) {
|
||||
freeAllCharacters(*chars);
|
||||
return -2;
|
||||
}
|
||||
return ucs4;
|
||||
}
|
284
src/fontconfig.zig
Normal file
284
src/fontconfig.zig
Normal file
|
@ -0,0 +1,284 @@
|
|||
const std = @import("std");
|
||||
const unicode = @import("unicode.zig");
|
||||
const c = @cImport({
|
||||
@cInclude("fontconfig/fontconfig.h");
|
||||
});
|
||||
const log = std.log.scoped(.fontconfig);
|
||||
|
||||
extern fn allCharacters(p: ?*const c.FcPattern, chars: *[*]u32) c_int;
|
||||
extern fn freeAllCharacters(chars: *[*]usize) void;
|
||||
|
||||
pub const RangeFont = struct {
|
||||
starting_codepoint: u21,
|
||||
ending_codepoint: u21,
|
||||
font: Font,
|
||||
};
|
||||
|
||||
pub const Font = struct {
|
||||
full_name: []const u8,
|
||||
family: []const u8,
|
||||
style: []const u8,
|
||||
supported_chars: []const u21,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
freeAllCharacters(self.supported_chars.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub const FontList = struct {
|
||||
list: std.ArrayList(Font),
|
||||
allocator: std.mem.Allocator,
|
||||
pattern: *c.FcPattern,
|
||||
fontset: *c.FcFontSet,
|
||||
|
||||
const Self = @This();
|
||||
pub fn initCapacity(allocator: std.mem.Allocator, num: usize, pattern: *c.FcPattern, fontset: *c.FcFontSet) std.mem.Allocator.Error!Self {
|
||||
var al = try std.ArrayList(Font).initCapacity(allocator, num);
|
||||
return Self{
|
||||
.allocator = allocator,
|
||||
.list = al,
|
||||
.pattern = pattern,
|
||||
.fontset = fontset,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
c.FcPatternDestroy(self.pattern);
|
||||
c.FcFontSetDestroy(self.fontset);
|
||||
self.list.deinit();
|
||||
}
|
||||
|
||||
pub fn addFontAssumeCapacity(
|
||||
self: *Self,
|
||||
full_name: []const u8,
|
||||
family: []const u8,
|
||||
style: []const u8,
|
||||
supported_chars: []const u21,
|
||||
) !void {
|
||||
self.list.appendAssumeCapacity(.{
|
||||
.full_name = full_name,
|
||||
.family = family,
|
||||
.style = style,
|
||||
.supported_chars = supported_chars,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
var fc_config: ?*c.FcConfig = null;
|
||||
var deinited = false;
|
||||
// pub var test_should_deinit = true;
|
||||
/// De-initializes the underlying c library. Should only be called
|
||||
/// after all processing has completed
|
||||
pub fn deinit() void {
|
||||
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r2370.html
|
||||
// Says that "Note that calling this function with the return from FcConfigGetCurrent will place the library in an indeterminate state."
|
||||
// However, it seems as though you can't do this either:
|
||||
//
|
||||
// 1. c.FcInitLoadConfigAndFonts();
|
||||
// 2. c.FcConfigDestroy();
|
||||
// 3. c.FcInitLoadConfigAndFonts();
|
||||
// 4. c.FcConfigDestroy(); // Seg fault here
|
||||
if (deinited) @panic("Cannot deinitialize this library more than once");
|
||||
deinited = true;
|
||||
if (fc_config) |conf| {
|
||||
log.debug("destroying config: do not use library or call me again", .{});
|
||||
c.FcConfigDestroy(conf);
|
||||
}
|
||||
fc_config = null;
|
||||
}
|
||||
|
||||
pub const FontQuery = struct {
|
||||
allocator: std.mem.Allocator,
|
||||
// fc_config: ?*c.FcConfig = null,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: std.mem.Allocator) Self {
|
||||
return Self{
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
pub fn deinit(self: *Self) void {
|
||||
_ = self;
|
||||
// if (self.all_fonts) |a| a.deinit();
|
||||
}
|
||||
|
||||
pub fn fontList(self: *Self, pattern: [:0]const u8) !FontList {
|
||||
if (fc_config == null and deinited) @panic("fontconfig C library is in an inconsistent state - should not use");
|
||||
if (fc_config == null) fc_config = c.FcInitLoadConfigAndFonts();
|
||||
const config = if (fc_config) |conf| conf else return error.FontConfigInitLoadFailure;
|
||||
|
||||
// Pretty sure we want this...
|
||||
const pat = c.FcNameParse(pattern);
|
||||
// We cannot destroy the pattern until we're completely done
|
||||
// This will be managed by FontList object
|
||||
// defer if (pat != null) c.FcPatternDestroy(pat);
|
||||
|
||||
// const pat = c.FcPatternCreate(); // *FcPattern
|
||||
// defer if (pat != null) c.FcPatternDestroy(pat);
|
||||
//
|
||||
// // FC_WEIGHT_NORMAL is 80
|
||||
// // This is equivalent to "regular" style
|
||||
// if (c.FcPatternAddInteger(pat, c.FC_WEIGHT, c.FC_WEIGHT_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||
//
|
||||
// // This is "normal" vs Bold or Italic
|
||||
// if (c.FcPatternAddInteger(pat, c.FC_WIDTH, c.FC_WIDTH_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||
//
|
||||
// // Monospaced fonts
|
||||
// if (c.FcPatternAddInteger(pat, c.FC_SPACING, c.FC_MONO) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||
//
|
||||
// // FC_SLANT_ROMAN is 0 (italic 100, oblique 110)
|
||||
// if (c.FcPatternAddInteger(pat, c.FC_SLANT, c.FC_SLANT_ROMAN) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
|
||||
//
|
||||
const os = c.FcObjectSetBuild(c.FC_FAMILY, c.FC_STYLE, c.FC_LANG, c.FC_FULLNAME, c.FC_CHARSET, @as(?*u8, null)); // *FcObjectSet
|
||||
defer if (os != null) c.FcObjectSetDestroy(os);
|
||||
const fs = c.FcFontList(config, pat, os); // FcFontSet
|
||||
// TODO: Move this defer into deinit
|
||||
// defer if (fs != null) c.FcFontSetDestroy(fs);
|
||||
|
||||
// Use the following only when needed. NameUnparse allocates memory
|
||||
// log.debug("Total matching fonts: {d}. Pattern: {s}\n", .{ fs.*.nfont, c.FcNameUnparse(pat) });
|
||||
log.debug("Total matching fonts: {d}", .{fs.*.nfont});
|
||||
var rc = try FontList.initCapacity(self.allocator, @as(usize, @intCast(fs.*.nfont)), pat.?, fs.?);
|
||||
errdefer rc.deinit();
|
||||
for (0..@as(usize, @intCast(fs.*.nfont))) |i| {
|
||||
const font = fs.*.fonts[i].?; // *FcPattern
|
||||
var fullname: [*:0]c.FcChar8 = undefined;
|
||||
var style: [*:0]c.FcChar8 = undefined;
|
||||
var family: [*:0]c.FcChar8 = undefined;
|
||||
|
||||
var charset: [*]u21 = undefined;
|
||||
const len = allCharacters(font, @ptrCast(&charset));
|
||||
if (len < 0) return error.FontConfigCouldNotGetCharSet;
|
||||
|
||||
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r600.html
|
||||
// Note that these (like FcPatternGet) do not make a copy of any data structure referenced by the return value
|
||||
// https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r570.html
|
||||
// The value returned is not a copy, but rather refers to the data stored within the pattern directly. Applications must not free this value.
|
||||
if (c.FcPatternGetString(font, c.FC_FULLNAME, 0, @as([*c][*c]c.FcChar8, @ptrCast(&fullname))) != c.FcResultMatch)
|
||||
fullname = @constCast(@ptrCast("".ptr));
|
||||
// return error.FontConfigCouldNotGetFontFullName;
|
||||
|
||||
if (c.FcPatternGetString(font, c.FC_FAMILY, 0, @as([*c][*c]c.FcChar8, @ptrCast(&family))) != c.FcResultMatch)
|
||||
return error.FontConfigHasNoFamily;
|
||||
if (c.FcPatternGetString(font, c.FC_STYLE, 0, @as([*c][*c]c.FcChar8, @ptrCast(&style))) != c.FcResultMatch)
|
||||
return error.FontConfigHasNoStyle;
|
||||
|
||||
log.debug(
|
||||
"Chars: {d:5.0} Family '{s}' Style '{s}' Full Name: {s}",
|
||||
.{ @as(usize, @intCast(len)), family, style, fullname },
|
||||
);
|
||||
|
||||
try rc.addFontAssumeCapacity(
|
||||
fullname[0..std.mem.len(fullname)],
|
||||
family[0..std.mem.len(family)],
|
||||
style[0..std.mem.len(style)],
|
||||
charset[0..@as(usize, @intCast(len))],
|
||||
);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
pub fn fontsForRange(
|
||||
self: *Self,
|
||||
starting_codepoint: u21,
|
||||
ending_codepoint: u21,
|
||||
fonts: []const Font,
|
||||
exclude_previous: bool,
|
||||
) ![]RangeFont {
|
||||
// const group_len = group.ending_codepoint - group.starting_codepoint;
|
||||
var rc = std.ArrayList(RangeFont).init(self.allocator);
|
||||
defer rc.deinit();
|
||||
|
||||
var previously_supported = blk: {
|
||||
if (!exclude_previous) break :blk null;
|
||||
var al = try std.ArrayList(bool).initCapacity(self.allocator, ending_codepoint - starting_codepoint);
|
||||
defer al.deinit();
|
||||
for (starting_codepoint..ending_codepoint) |_|
|
||||
al.appendAssumeCapacity(false);
|
||||
break :blk try al.toOwnedSlice();
|
||||
};
|
||||
defer if (previously_supported) |p| self.allocator.free(p);
|
||||
|
||||
for (fonts) |font| {
|
||||
var current_start = @as(u21, 0);
|
||||
var current_end = @as(u21, 0);
|
||||
var inx = @as(usize, 0);
|
||||
|
||||
var range_count = @as(usize, 0);
|
||||
// Advance to the start of the range
|
||||
while (inx < font.supported_chars.len and
|
||||
font.supported_chars[inx] < starting_codepoint)
|
||||
inx += 1;
|
||||
|
||||
while (inx < font.supported_chars.len and
|
||||
font.supported_chars[inx] < ending_codepoint)
|
||||
{
|
||||
if (previously_supported) |p| {
|
||||
if (p[font.supported_chars[inx]]) {
|
||||
inx += 1;
|
||||
continue; // This was already supported - continue
|
||||
}
|
||||
}
|
||||
// We found the beginning of a range
|
||||
current_start = font.supported_chars[inx];
|
||||
current_end = font.supported_chars[inx];
|
||||
if (previously_supported) |p|
|
||||
p[font.supported_chars[inx]] = true;
|
||||
|
||||
// Advance to the next supported character, then start checking for continuous ranges
|
||||
inx += 1;
|
||||
while (inx < font.supported_chars.len and
|
||||
font.supported_chars[inx] == current_end + 1 and
|
||||
font.supported_chars[inx] <= ending_codepoint and
|
||||
(!exclude_previous or !previously_supported.?[font.supported_chars[inx]]))
|
||||
{
|
||||
if (previously_supported) |p|
|
||||
p[font.supported_chars[inx]] = true;
|
||||
inx += 1;
|
||||
current_end += 1;
|
||||
}
|
||||
|
||||
// We've found the end of the range (which could be the end of a group)
|
||||
// If we have not hit the stops, inx at this point is at the beginning of
|
||||
// a new range
|
||||
range_count += 1;
|
||||
try rc.append(.{
|
||||
.font = font,
|
||||
.starting_codepoint = current_start,
|
||||
.ending_codepoint = current_end,
|
||||
});
|
||||
}
|
||||
}
|
||||
return rc.toOwnedSlice();
|
||||
}
|
||||
};
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This()); // Only catches public decls
|
||||
}
|
||||
test "Get fonts" {
|
||||
// std.testing.log_level = .debug;
|
||||
log.debug("get fonts", .{});
|
||||
var fq = FontQuery.init(std.testing.allocator);
|
||||
defer fq.deinit();
|
||||
var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
|
||||
defer fl.deinit();
|
||||
try std.testing.expect(fl.list.items.len > 0);
|
||||
var matched = blk: {
|
||||
for (fl.list.items) |item| {
|
||||
log.debug("full_name: '{s}'", .{item.full_name});
|
||||
if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
|
||||
break :blk item;
|
||||
}
|
||||
break :blk null;
|
||||
};
|
||||
try std.testing.expect(matched != null);
|
||||
try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
|
||||
}
|
||||
test {
|
||||
// if (test_should_deinit) deinit();
|
||||
deinit();
|
||||
}
|
391
src/main.zig
Normal file
391
src/main.zig
Normal file
|
@ -0,0 +1,391 @@
|
|||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const unicode = @import("unicode.zig");
|
||||
const fontconfig = @import("fontconfig.zig");
|
||||
|
||||
const max_unicode: u21 = 0x10FFFD;
|
||||
const all_chars = blk: {
|
||||
var all: [max_unicode + 1]u21 = undefined;
|
||||
@setEvalBranchQuota(max_unicode);
|
||||
for (0..max_unicode) |i|
|
||||
all[i] = i;
|
||||
break :blk all;
|
||||
};
|
||||
pub fn main() !u8 {
|
||||
// TODO: Add back in
|
||||
// defer fontconfig.deinit();
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// stdout is for the actual output of your application, for example if you
|
||||
// are implementing gzip, then only the compressed bytes should be sent to
|
||||
// stdout, not any debugging messages.
|
||||
const stdout_file = std.io.getStdOut().writer();
|
||||
var bw = std.io.bufferedWriter(stdout_file);
|
||||
defer bw.flush() catch @panic("could not flush stdout"); // don't forget to flush!
|
||||
const stdout = bw.writer();
|
||||
|
||||
// std.os.argv is os specific
|
||||
var arg_iterator = std.process.args();
|
||||
const arg0 = arg_iterator.next().?;
|
||||
const options = parseCommandLine(&arg_iterator) catch |err| {
|
||||
if (err == error.UserRequestedHelp) {
|
||||
try usage(stdout, arg0);
|
||||
return 0;
|
||||
}
|
||||
try usage(std.io.getStdErr().writer(), arg0);
|
||||
return 2;
|
||||
};
|
||||
|
||||
var unicode_ranges = unicode.all_ranges();
|
||||
if (options.list_groups) {
|
||||
defer unicode_ranges.reset();
|
||||
while (unicode_ranges.next()) |range| {
|
||||
try stdout.print("{s}", .{range.name});
|
||||
for (range.name.len..unicode_ranges.longest_name_len + 2) |_|
|
||||
try stdout.writeByte(' ');
|
||||
try stdout.print("U+{X} - U+{X}\n", .{ range.starting_codepoint, range.ending_codepoint });
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (options.list_fonts) {
|
||||
var fq = fontconfig.FontQuery.init(allocator);
|
||||
defer fq.deinit();
|
||||
var fl = try fq.fontList(options.pattern);
|
||||
var longest_family_name = @as(usize, 0);
|
||||
var longest_style_name = @as(usize, 0);
|
||||
for (fl.list.items) |f| {
|
||||
longest_family_name = @max(f.family.len, longest_family_name);
|
||||
longest_style_name = @max(f.style.len, longest_style_name);
|
||||
}
|
||||
|
||||
std.sort.insertion(fontconfig.Font, fl.list.items, {}, cmpFont);
|
||||
for (fl.list.items) |f| {
|
||||
try stdout.print("Family: {s}", .{f.family});
|
||||
for (f.family.len..longest_family_name + 1) |_|
|
||||
try stdout.writeByte(' ');
|
||||
try stdout.print("Chars: {d:5}\tStyle: {s}", .{ f.supported_chars.len, f.style });
|
||||
for (f.style.len..longest_style_name + 1) |_|
|
||||
try stdout.writeByte(' ');
|
||||
try stdout.print("\tName: {s}\n", .{
|
||||
f.full_name,
|
||||
});
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
const exclude_previous = options.fonts != null;
|
||||
const fonts: []fontconfig.Font = blk: {
|
||||
if (options.fonts == null) break :blk &[_]fontconfig.Font{};
|
||||
const fo = options.fonts.?;
|
||||
var si = std.mem.splitScalar(u8, fo, ',');
|
||||
var fq = fontconfig.FontQuery.init(allocator);
|
||||
defer fq.deinit();
|
||||
var fl = try fq.fontList(options.pattern);
|
||||
// This messes with data after, and we don't need to deinit anyway
|
||||
// defer fl.deinit();
|
||||
var al = try std.ArrayList(fontconfig.Font).initCapacity(allocator, std.mem.count(u8, fo, ",") + 2);
|
||||
defer al.deinit();
|
||||
while (si.next()) |font_str| {
|
||||
const font = font_blk: {
|
||||
for (fl.list.items) |f|
|
||||
if (std.ascii.eqlIgnoreCase(f.family, font_str))
|
||||
break :font_blk f;
|
||||
try std.io.getStdErr().writer().print("Error: Font '{s}' not installed", .{font_str});
|
||||
return 255;
|
||||
};
|
||||
|
||||
al.appendAssumeCapacity(font);
|
||||
}
|
||||
al.appendAssumeCapacity(.{
|
||||
.full_name = "Unsupported",
|
||||
.family = "Unsupported by any preferred font",
|
||||
.style = "Regular",
|
||||
.supported_chars = &all_chars,
|
||||
});
|
||||
break :blk try al.toOwnedSlice();
|
||||
};
|
||||
|
||||
const order_by_range = if (std.ascii.eqlIgnoreCase("font", options.order))
|
||||
false
|
||||
else if (std.ascii.eqlIgnoreCase("range", options.order))
|
||||
true
|
||||
else
|
||||
null;
|
||||
if (order_by_range == null) {
|
||||
try std.io.getStdErr().writer().print("Error: Order type '{s}' invalid", .{options.order});
|
||||
return 255;
|
||||
}
|
||||
std.log.debug("{0} prefered fonts:", .{fonts.len - 1});
|
||||
for (fonts[0 .. fonts.len - 1]) |f|
|
||||
std.log.debug("\t{s}", .{f.family});
|
||||
if (options.groups) |group| {
|
||||
while (unicode_ranges.next()) |range| {
|
||||
var it = std.mem.splitScalar(u8, group, ',');
|
||||
while (it.next()) |desired_group| {
|
||||
if (std.mem.eql(u8, range.name, desired_group)) {
|
||||
try outputRange(
|
||||
allocator,
|
||||
range.starting_codepoint,
|
||||
range.ending_codepoint,
|
||||
fonts,
|
||||
exclude_previous,
|
||||
order_by_range.?,
|
||||
stdout,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
try outputRange(
|
||||
allocator,
|
||||
0,
|
||||
max_unicode,
|
||||
fonts,
|
||||
exclude_previous,
|
||||
order_by_range.?,
|
||||
stdout,
|
||||
);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
fn cmpFont(context: void, a: fontconfig.Font, b: fontconfig.Font) bool {
|
||||
_ = context;
|
||||
return std.mem.order(u8, a.family, b.family) == .lt; // a.family < b.family;
|
||||
}
|
||||
fn cmpRangeList(context: void, a: fontconfig.RangeFont, b: fontconfig.RangeFont) bool {
|
||||
_ = context;
|
||||
return a.starting_codepoint < b.starting_codepoint;
|
||||
}
|
||||
fn formatRangeFontEndingCodepoint(
|
||||
data: fontconfig.RangeFont,
|
||||
comptime fmt: []const u8,
|
||||
options: std.fmt.FormatOptions,
|
||||
writer: anytype,
|
||||
) !void {
|
||||
_ = options;
|
||||
if (data.starting_codepoint == data.ending_codepoint) return;
|
||||
try std.fmt.format(writer, "-{" ++ fmt ++ "}", .{data.ending_codepoint});
|
||||
}
|
||||
fn fmtRangeFontEndingCodepoint(range_font: fontconfig.RangeFont) std.fmt.Formatter(formatRangeFontEndingCodepoint) {
|
||||
return .{
|
||||
.data = range_font,
|
||||
};
|
||||
}
|
||||
fn outputRange(
|
||||
allocator: std.mem.Allocator,
|
||||
starting_codepoint: u21,
|
||||
ending_codepoint: u21,
|
||||
fonts: []const fontconfig.Font,
|
||||
exclude_previous: bool,
|
||||
order_by_range: bool,
|
||||
writer: anytype,
|
||||
) !void {
|
||||
var fq = fontconfig.FontQuery.init(allocator);
|
||||
defer fq.deinit();
|
||||
var range_fonts = try fq.fontsForRange(starting_codepoint, ending_codepoint, fonts, exclude_previous); // do we want hard limits around this?
|
||||
defer allocator.free(range_fonts);
|
||||
|
||||
std.log.debug("Got {d} range fonts back from query", .{range_fonts.len});
|
||||
if (order_by_range)
|
||||
std.sort.insertion(fontconfig.RangeFont, range_fonts, {}, cmpRangeList);
|
||||
|
||||
for (range_fonts) |range_font| {
|
||||
try writer.print("{s}U+{x}{x}={s}\n", .{
|
||||
if (std.mem.eql(u8, range_font.font.full_name, "Unsupported")) "#" else "",
|
||||
range_font.starting_codepoint,
|
||||
fmtRangeFontEndingCodepoint(range_font), //.ending_codepoint,
|
||||
range_font.font.family,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const Options = struct {
|
||||
end_of_options_signifier: ?usize = null,
|
||||
groups: ?[]const u8 = null,
|
||||
fonts: ?[]const u8 = &[_]u8{},
|
||||
list_groups: bool = false,
|
||||
list_fonts: bool = false,
|
||||
pattern: [:0]const u8 = ":regular:normal:spacing=100:slant=0",
|
||||
order: [:0]const u8 = "font",
|
||||
};
|
||||
|
||||
fn usage(writer: anytype, arg0: []const u8) !void {
|
||||
try writer.print(
|
||||
\\usage: {s} [OPTION]...
|
||||
\\
|
||||
\\Options:
|
||||
\\ -p, --pattern font pattern to use (Default: :regular:normal:spacing=100:slant=0)
|
||||
\\ -g, --groups group names to process, comma delimited (e.g. Thai,Lao - default is all groups)
|
||||
\\ -f, --fonts prefered fonts in order, comma delimited (e.g. "DejaVu Sans Mono,Hack Nerd Font" - default is all fonts)
|
||||
\\ note this will change the behavior such that ranges supported by the first font found will not
|
||||
\\ be considered for use by subsequent fonts
|
||||
\\ -o, --order order by (Default: font, can also order by range)
|
||||
\\ -G, --list-groups list all groups and exit
|
||||
\\ -F, --list-fonts list all fonts matching pattern and exit
|
||||
\\ -h, --help display this help text and exit
|
||||
\\
|
||||
, .{arg0});
|
||||
}
|
||||
|
||||
fn parseCommandLine(arg_iterator: anytype) !Options {
|
||||
var current_arg: usize = 0;
|
||||
var rc = Options{};
|
||||
while (arg_iterator.next()) |arg| {
|
||||
if (std.mem.eql(u8, arg, "--")) {
|
||||
rc.end_of_options_signifier = current_arg + 1;
|
||||
return rc;
|
||||
}
|
||||
if (try getArgValue(arg_iterator, arg, "groups", "g", .{})) |val| {
|
||||
rc.groups = val;
|
||||
} else if (try getArgValue(arg_iterator, arg, "pattern", "p", .{})) |val| {
|
||||
rc.pattern = val;
|
||||
} else if (try getArgValue(arg_iterator, arg, "fonts", "f", .{})) |val| {
|
||||
rc.fonts = val;
|
||||
} else if (try getArgValue(arg_iterator, arg, "order", "o", .{})) |val| {
|
||||
rc.order = val;
|
||||
} else if (try getArgValue(arg_iterator, arg, "list-groups", "G", .{ .is_bool = true })) |_| {
|
||||
rc.list_groups = true;
|
||||
} else if (try getArgValue(arg_iterator, arg, "list-fonts", "F", .{ .is_bool = true })) |_| {
|
||||
rc.list_fonts = true;
|
||||
} else if (try getArgValue(arg_iterator, arg, "help", "h", .{ .is_bool = true })) |_| {
|
||||
return error.UserRequestedHelp;
|
||||
} else {
|
||||
if (!builtin.is_test)
|
||||
try std.io.getStdErr().writer().print("invalid option: {s}\n\n", .{arg});
|
||||
return error.InvalidOption;
|
||||
}
|
||||
current_arg += 1;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
const ArgOptions = struct {
|
||||
is_bool: bool = false,
|
||||
is_required: bool = false,
|
||||
};
|
||||
fn getArgValue(
|
||||
arg_iterator: anytype,
|
||||
arg: [:0]const u8,
|
||||
comptime name: ?[]const u8,
|
||||
comptime short_name: ?[]const u8,
|
||||
arg_options: ArgOptions,
|
||||
) !?[:0]const u8 {
|
||||
if (short_name) |s| {
|
||||
if (std.mem.eql(u8, "-" ++ s, arg)) {
|
||||
if (arg_options.is_bool) return arg;
|
||||
if (arg_iterator.next()) |val| {
|
||||
return val;
|
||||
} else return error.NoValueOnFlag;
|
||||
}
|
||||
}
|
||||
if (name) |n| {
|
||||
if (std.mem.eql(u8, "--" ++ n, arg)) {
|
||||
if (arg_options.is_bool) return "";
|
||||
if (arg_iterator.next()) |val| {
|
||||
return val;
|
||||
} else return error.NoValueOnName;
|
||||
}
|
||||
if (std.mem.startsWith(u8, arg, "--" ++ n ++ "=")) {
|
||||
if (arg_options.is_bool) return error.EqualsInvalidForBooleanArgument;
|
||||
return arg[("--" ++ n ++ "=").len.. :0];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Tests run in this order:
|
||||
//
|
||||
// 1. Main file
|
||||
// - In order, from top to bottom
|
||||
// 2. Referenced file(s), if any
|
||||
// - In order, from top to bottom
|
||||
//
|
||||
// libfontconfig gets inconsistent in a hurry with a lot of init/deinit, so
|
||||
// we only want to deinit once. Because we have no way of saying "go do other
|
||||
// tests, then come back", we have no way of controlling deinitialization other
|
||||
// than something that's not super obvious. So, we're adding this comment.
|
||||
// We will allow fontconfig tests to do our deinit() call, and we shall ignore
|
||||
// deinitialization here
|
||||
test "startup" {
|
||||
// std.testing.log_level = .debug;
|
||||
}
|
||||
test "command line parses with short name" {
|
||||
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "-g Latin-1");
|
||||
defer it.deinit();
|
||||
const options = try parseCommandLine(&it);
|
||||
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
|
||||
}
|
||||
test "command line parses with long name no equals" {
|
||||
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups Latin-1");
|
||||
defer it.deinit();
|
||||
const options = try parseCommandLine(&it);
|
||||
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
|
||||
}
|
||||
test "command line parses with long name equals" {
|
||||
var log_level = std.testing.log_level;
|
||||
defer std.testing.log_level = log_level;
|
||||
std.testing.log_level = .debug;
|
||||
var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups=Latin-1");
|
||||
defer it.deinit();
|
||||
const options = try parseCommandLine(&it);
|
||||
try std.testing.expectEqualStrings("Latin-1", options.groups.?);
|
||||
}
|
||||
test "Get ranges" {
|
||||
std.log.debug("get ranges", .{});
|
||||
// defer fontconfig.deinit();
|
||||
var fq = fontconfig.FontQuery.init(std.testing.allocator);
|
||||
defer fq.deinit();
|
||||
var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
|
||||
defer fl.deinit();
|
||||
try std.testing.expect(fl.list.items.len > 0);
|
||||
var matched = blk: {
|
||||
for (fl.list.items) |item| {
|
||||
std.log.debug("full_name: '{s}'", .{item.full_name});
|
||||
if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
|
||||
break :blk item;
|
||||
}
|
||||
break :blk null;
|
||||
};
|
||||
try std.testing.expect(matched != null);
|
||||
const arr: []const fontconfig.Font = &[_]fontconfig.Font{matched.?};
|
||||
var al = std.ArrayList(u8).init(std.testing.allocator);
|
||||
defer al.deinit();
|
||||
const range_name = "Basic Latin";
|
||||
var matched_range = try blk: {
|
||||
var unicode_ranges = unicode.all_ranges();
|
||||
while (unicode_ranges.next()) |range| {
|
||||
var it = std.mem.splitScalar(u8, range_name, ',');
|
||||
while (it.next()) |desired_range| {
|
||||
if (std.mem.eql(u8, range.name, desired_range)) {
|
||||
break :blk range;
|
||||
}
|
||||
}
|
||||
}
|
||||
break :blk error.RangeNotFound;
|
||||
};
|
||||
var log_level = std.testing.log_level;
|
||||
std.testing.log_level = .debug;
|
||||
defer std.testing.log_level = log_level;
|
||||
try outputRange(std.testing.allocator, matched_range.starting_codepoint, matched_range.ending_codepoint, arr, false, al.writer());
|
||||
try std.testing.expectEqualStrings(al.items, "U+20-7e=DejaVu Sans Mono\n");
|
||||
|
||||
std.log.debug("\nwhole unicode space:", .{});
|
||||
try outputRange(std.testing.allocator, 0, max_unicode, arr, false, al.writer());
|
||||
const expected =
|
||||
\\U+20-7e=DejaVu Sans Mono
|
||||
\\U+20-7e=DejaVu Sans Mono
|
||||
\\U+a0-1c3=DejaVu Sans Mono
|
||||
\\U+1cd-1e3=DejaVu Sans Mono
|
||||
\\U+1e6-1f0=DejaVu Sans Mono
|
||||
\\U+1f4-1f6=DejaVu Sans Mono
|
||||
;
|
||||
try std.testing.expectStringStartsWith(al.items, expected);
|
||||
|
||||
// try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
|
||||
}
|
||||
|
||||
test "teardown, followed by libraries" {
|
||||
std.testing.refAllDecls(@This()); // Only catches public decls
|
||||
_ = @import("unicode.zig");
|
||||
}
|
209
src/ranges.txt
Normal file
209
src/ranges.txt
Normal file
|
@ -0,0 +1,209 @@
|
|||
Basic Latin U+0 - U+7F
|
||||
Latin-1 Supplement U+80 - U+FF
|
||||
Latin Extended-A U+100 - U+17F
|
||||
Latin Extended-B U+180 - U+24F
|
||||
IPA Extensions U+250 - U+2AF
|
||||
Spacing Modifier Letters U+2B0 - U+2FF
|
||||
Combining Diacritical Marks U+300 - U+36F
|
||||
Greek and Coptic U+370 - U+3FF
|
||||
Cyrillic U+400 - U+4FF
|
||||
Cyrillic Supplement U+500 - U+527
|
||||
Armenian U+531 - U+58A
|
||||
Hebrew U+591 - U+5F4
|
||||
Arabic U+600 - U+6FF
|
||||
Syriac U+700 - U+74F
|
||||
Arabic Supplement U+750 - U+77F
|
||||
Thaana U+780 - U+7B1
|
||||
NKo U+7C0 - U+7FA
|
||||
Samaritan U+800 - U+83E
|
||||
Mandaic U+840 - U+85E
|
||||
Devanagari U+900 - U+97F
|
||||
Bengali U+981 - U+9FB
|
||||
Gurmukhi U+A01 - U+A75
|
||||
Gujarati U+A81 - U+AF1
|
||||
Oriya U+B01 - U+B77
|
||||
Tamil U+B82 - U+BFA
|
||||
Telugu U+C01 - U+C7F
|
||||
Kannada U+C82 - U+CF2
|
||||
Malayalam U+D02 - U+D7F
|
||||
Sinhala U+D82 - U+DF4
|
||||
Thai U+E01 - U+E5B
|
||||
Lao U+E81 - U+EDD
|
||||
Tibetan U+F00 - U+FDA
|
||||
Myanmar U+1000 - U+109F
|
||||
Georgian U+10A0 - U+10FC
|
||||
Hangul Jamo U+1100 - U+11FF
|
||||
Ethiopic U+1200 - U+137C
|
||||
Ethiopic Supplement U+1380 - U+1399
|
||||
Cherokee U+13A0 - U+13F4
|
||||
Unified Canadian Aboriginal Syllabics U+1400 - U+167F
|
||||
Ogham U+1680 - U+169C
|
||||
Runic U+16A0 - U+16F0
|
||||
Tagalog U+1700 - U+1714
|
||||
Hanunoo U+1720 - U+1736
|
||||
Buhid U+1740 - U+1753
|
||||
Tagbanwa U+1760 - U+1773
|
||||
Khmer U+1780 - U+17F9
|
||||
Mongolian U+1800 - U+18AA
|
||||
Unified Canadian Aboriginal Syllabics Extended U+18B0 - U+18F5
|
||||
Limbu U+1900 - U+194F
|
||||
Tai Le U+1950 - U+1974
|
||||
New Tai Lue U+1980 - U+19DF
|
||||
Khmer Symbols U+19E0 - U+19FF
|
||||
Buginese U+1A00 - U+1A1F
|
||||
Tai Tham U+1A20 - U+1AAD
|
||||
Balinese U+1B00 - U+1B7C
|
||||
Sundanese U+1B80 - U+1BB9
|
||||
Batak U+1BC0 - U+1BFF
|
||||
Lepcha U+1C00 - U+1C4F
|
||||
Ol Chiki U+1C50 - U+1C7F
|
||||
Vedic Extensions U+1CD0 - U+1CF2
|
||||
Phonetic Extensions U+1D00 - U+1D7F
|
||||
Phonetic Extensions Supplement U+1D80 - U+1DBF
|
||||
Combining Diacritical Marks Supplement U+1DC0 - U+1DFF
|
||||
Latin Extended Additional U+1E00 - U+1EFF
|
||||
Greek Extended U+1F00 - U+1FFE
|
||||
General Punctuation U+2000 - U+206F
|
||||
Superscripts and Subscripts U+2070 - U+209C
|
||||
Currency Symbols U+20A0 - U+20B9
|
||||
Combining Diacritical Marks for Symbols U+20D0 - U+20F0
|
||||
Letterlike Symbols U+2100 - U+214F
|
||||
Number Forms U+2150 - U+2189
|
||||
Arrows U+2190 - U+21FF
|
||||
Mathematical Operators U+2200 - U+22FF
|
||||
Miscellaneous Technical U+2300 - U+23F3
|
||||
Control Pictures U+2400 - U+2426
|
||||
Optical Character Recognition U+2440 - U+244A
|
||||
Enclosed Alphanumerics U+2460 - U+24FF
|
||||
Box Drawing U+2500 - U+257F
|
||||
Block Elements U+2580 - U+259F
|
||||
Geometric Shapes U+25A0 - U+25FF
|
||||
Miscellaneous Symbols U+2600 - U+26FF
|
||||
Dingbats U+2701 - U+27BF
|
||||
Miscellaneous Mathematical Symbols-A U+27C0 - U+27EF
|
||||
Supplemental Arrows-A U+27F0 - U+27FF
|
||||
Braille Patterns U+2800 - U+28FF
|
||||
Supplemental Arrows-B U+2900 - U+297F
|
||||
Miscellaneous Mathematical Symbols-B U+2980 - U+29FF
|
||||
Supplemental Mathematical Operators U+2A00 - U+2AFF
|
||||
Miscellaneous Symbols and Arrows U+2B00 - U+2B59
|
||||
Glagolitic U+2C00 - U+2C5E
|
||||
Latin Extended-C U+2C60 - U+2C7F
|
||||
Coptic U+2C80 - U+2CFF
|
||||
Georgian Supplement U+2D00 - U+2D25
|
||||
Tifinagh U+2D30 - U+2D7F
|
||||
Ethiopic Extended U+2D80 - U+2DDE
|
||||
Cyrillic Extended-A U+2DE0 - U+2DFF
|
||||
Supplemental Punctuation U+2E00 - U+2E31
|
||||
CJK Radicals Supplement U+2E80 - U+2EF3
|
||||
Kangxi Radicals U+2F00 - U+2FD5
|
||||
Ideographic Description Characters U+2FF0 - U+2FFB
|
||||
CJK Symbols and Punctuation U+3000 - U+303F
|
||||
Hiragana U+3041 - U+309F
|
||||
Katakana U+30A0 - U+30FF
|
||||
Bopomofo U+3105 - U+312D
|
||||
Hangul Compatibility Jamo U+3131 - U+318E
|
||||
Kanbun U+3190 - U+319F
|
||||
Bopomofo Extended U+31A0 - U+31BA
|
||||
CJK Strokes U+31C0 - U+31E3
|
||||
Katakana Phonetic Extensions U+31F0 - U+31FF
|
||||
Enclosed CJK Letters and Months U+3200 - U+32FE
|
||||
CJK Compatibility U+3300 - U+33FF
|
||||
CJK Unified Ideographs Extension A U+3400 - U+4DB5
|
||||
Yijing Hexagram Symbols U+4DC0 - U+4DFF
|
||||
CJK Unified Ideographs U+4E00 - U+9FCB
|
||||
Yi Syllables U+A000 - U+A48C
|
||||
Yi Radicals U+A490 - U+A4C6
|
||||
Lisu U+A4D0 - U+A4FF
|
||||
Vai U+A500 - U+A62B
|
||||
Cyrillic Extended-B U+A640 - U+A697
|
||||
Bamum U+A6A0 - U+A6F7
|
||||
Modifier Tone Letters U+A700 - U+A71F
|
||||
Latin Extended-D U+A720 - U+A7FF
|
||||
Syloti Nagri U+A800 - U+A82B
|
||||
Common Indic Number Forms U+A830 - U+A839
|
||||
Phags-pa U+A840 - U+A877
|
||||
Saurashtra U+A880 - U+A8D9
|
||||
Devanagari Extended U+A8E0 - U+A8FB
|
||||
Kayah Li U+A900 - U+A92F
|
||||
Rejang U+A930 - U+A95F
|
||||
Hangul Jamo Extended-A U+A960 - U+A97C
|
||||
Javanese U+A980 - U+A9DF
|
||||
Cham U+AA00 - U+AA5F
|
||||
Myanmar Extended-A U+AA60 - U+AA7B
|
||||
Tai Viet U+AA80 - U+AADF
|
||||
Ethiopic Extended-A U+AB01 - U+AB2E
|
||||
Meetei Mayek U+ABC0 - U+ABF9
|
||||
Hangul Syllables U+AC00 - U+D7A3
|
||||
Hangul Jamo Extended-B U+D7B0 - U+D7FB
|
||||
High Surrogates U+D800 - U+DB7F
|
||||
High Private Use Surrogates U+DB80 - U+DBFF
|
||||
Low Surrogates U+DC00 - U+DFFF
|
||||
Private Use Area U+E000 - U+F8FF
|
||||
CJK Compatibility Ideographs U+F900 - U+FAD9
|
||||
Alphabetic Presentation Forms U+FB00 - U+FB4F
|
||||
Arabic Presentation Forms-A U+FB50 - U+FDFD
|
||||
Variation Selectors U+FE00 - U+FE0F
|
||||
Vertical Forms U+FE10 - U+FE19
|
||||
Combining Half Marks U+FE20 - U+FE26
|
||||
CJK Compatibility Forms U+FE30 - U+FE4F
|
||||
Small Form Variants U+FE50 - U+FE6B
|
||||
Arabic Presentation Forms-B U+FE70 - U+FEFF
|
||||
Halfwidth and Fullwidth Forms U+FF01 - U+FFEE
|
||||
Specials U+FFF9 - U+FFFD
|
||||
Linear B Syllabary U+10000 - U+1005D
|
||||
Linear B Ideograms U+10080 - U+100FA
|
||||
Aegean Numbers U+10100 - U+1013F
|
||||
Ancient Greek Numbers U+10140 - U+1018A
|
||||
Ancient Symbols U+10190 - U+1019B
|
||||
Phaistos Disc U+101D0 - U+101FD
|
||||
Lycian U+10280 - U+1029C
|
||||
Carian U+102A0 - U+102D0
|
||||
Old Italic U+10300 - U+10323
|
||||
Gothic U+10330 - U+1034A
|
||||
Ugaritic U+10380 - U+1039F
|
||||
Old Persian U+103A0 - U+103D5
|
||||
Deseret U+10400 - U+1044F
|
||||
Shavian U+10450 - U+1047F
|
||||
Osmanya U+10480 - U+104A9
|
||||
Cypriot Syllabary U+10800 - U+1083F
|
||||
Imperial Aramaic U+10840 - U+1085F
|
||||
Phoenician U+10900 - U+1091F
|
||||
Lydian U+10920 - U+1093F
|
||||
Kharoshthi U+10A00 - U+10A58
|
||||
Old South Arabian U+10A60 - U+10A7F
|
||||
Avestan U+10B00 - U+10B3F
|
||||
Inscriptional Parthian U+10B40 - U+10B5F
|
||||
Inscriptional Pahlavi U+10B60 - U+10B7F
|
||||
Old Turkic U+10C00 - U+10C48
|
||||
Rumi Numeral Symbols U+10E60 - U+10E7E
|
||||
Brahmi U+11000 - U+1106F
|
||||
Kaithi U+11080 - U+110C1
|
||||
Cuneiform U+12000 - U+1236E
|
||||
Cuneiform Numbers and Punctuation U+12400 - U+12473
|
||||
Egyptian Hieroglyphs U+13000 - U+1342E
|
||||
Bamum Supplement U+16800 - U+16A38
|
||||
Kana Supplement U+1B000 - U+1B001
|
||||
Byzantine Musical Symbols U+1D000 - U+1D0F5
|
||||
Musical Symbols U+1D100 - U+1D1DD
|
||||
Ancient Greek Musical Notation U+1D200 - U+1D245
|
||||
Tai Xuan Jing Symbols U+1D300 - U+1D356
|
||||
Counting Rod Numerals U+1D360 - U+1D371
|
||||
Mathematical Alphanumeric Symbols U+1D400 - U+1D7FF
|
||||
Mahjong Tiles U+1F000 - U+1F02B
|
||||
Domino Tiles U+1F030 - U+1F093
|
||||
Playing Cards U+1F0A0 - U+1F0DF
|
||||
Enclosed Alphanumeric Supplement U+1F100 - U+1F1FF
|
||||
Enclosed Ideographic Supplement U+1F200 - U+1F251
|
||||
Miscellaneous Symbols And Pictographs U+1F300 - U+1F5FF
|
||||
Emoticons U+1F601 - U+1F64F
|
||||
Transport And Map Symbols U+1F680 - U+1F6C5
|
||||
Alchemical Symbols U+1F700 - U+1F773
|
||||
CJK Unified Ideographs Extension B U+20000 - U+2A6D6
|
||||
CJK Unified Ideographs Extension C U+2A700 - U+2B734
|
||||
CJK Unified Ideographs Extension D U+2B740 - U+2B81D
|
||||
CJK Compatibility Ideographs Supplement U+2F800 - U+2FA1D
|
||||
Tags U+E0001 - U+E007F
|
||||
Variation Selectors Supplement U+E0100 - U+E01EF
|
||||
Supplementary Private Use Area-A U+F0000 - U+FFFFD
|
||||
Supplementary Private Use Area-B U+100000 - U+10FFFD
|
112
src/unicode.zig
Normal file
112
src/unicode.zig
Normal file
|
@ -0,0 +1,112 @@
|
|||
const std = @import("std");
|
||||
|
||||
// Pulled from: https://www.unicodepedia.com/groups/
|
||||
const ranges = @embedFile("ranges.txt");
|
||||
const eval_branch_quota_base = 18500;
|
||||
const range_count = blk: {
|
||||
// This should be related to the number of characters in our embedded file above
|
||||
@setEvalBranchQuota(eval_branch_quota_base);
|
||||
break :blk std.mem.count(u8, ranges, "\n");
|
||||
};
|
||||
const Ranges = struct {
|
||||
names: [range_count][]const u8 = undefined,
|
||||
starting_codepoints: [range_count]u21 = undefined,
|
||||
ending_codepoints: [range_count]u21 = undefined,
|
||||
current_inx: usize = 0,
|
||||
longest_name_len: usize = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn first(self: *Self) ?UnicodeGroup {
|
||||
self.reset();
|
||||
return self.next();
|
||||
}
|
||||
pub fn reset(self: *Self) void {
|
||||
self.current_inx = 0;
|
||||
}
|
||||
pub fn next(self: *Self) ?UnicodeGroup {
|
||||
if (self.current_inx == range_count) return null;
|
||||
self.current_inx += 1;
|
||||
return self.item(self.current_inx - 1);
|
||||
}
|
||||
pub fn item(self: Self, index: usize) UnicodeGroup {
|
||||
return .{
|
||||
.name = self.names[index],
|
||||
.starting_codepoint = self.starting_codepoints[index],
|
||||
.ending_codepoint = self.ending_codepoints[index],
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const _all_ranges = blk: {
|
||||
@setEvalBranchQuota(eval_branch_quota_base * 2);
|
||||
break :blk parseRanges(ranges) catch @compileError("Could not parse ranges.txt");
|
||||
};
|
||||
|
||||
pub fn all_ranges() Ranges {
|
||||
return .{
|
||||
.names = _all_ranges.names,
|
||||
.starting_codepoints = _all_ranges.starting_codepoints,
|
||||
.ending_codepoints = _all_ranges.ending_codepoints,
|
||||
.longest_name_len = _all_ranges.longest_name_len,
|
||||
};
|
||||
}
|
||||
|
||||
pub const UnicodeGroup = struct {
|
||||
name: []const u8,
|
||||
starting_codepoint: u21,
|
||||
ending_codepoint: u21,
|
||||
};
|
||||
|
||||
fn parseRanges(text: []const u8) !Ranges {
|
||||
var rc = Ranges{};
|
||||
var iterator = std.mem.splitSequence(u8, text, "\n");
|
||||
var inx: usize = 0;
|
||||
while (iterator.next()) |group|
|
||||
if (group.len > 0) {
|
||||
const uc = try parseGroup(group);
|
||||
rc.names[inx] = uc.name;
|
||||
rc.starting_codepoints[inx] = uc.starting_codepoint;
|
||||
rc.ending_codepoints[inx] = uc.ending_codepoint;
|
||||
rc.longest_name_len = @max(rc.longest_name_len, uc.name.len);
|
||||
inx += 1;
|
||||
};
|
||||
return rc;
|
||||
}
|
||||
|
||||
fn parseGroup(group_text: []const u8) !UnicodeGroup {
|
||||
// Basic Latin U+0 - U+7F
|
||||
var iterator = std.mem.splitSequence(u8, group_text, "\t");
|
||||
const name = std.mem.trimRight(u8, iterator.first(), " ");
|
||||
const range_text = iterator.next() orelse {
|
||||
std.log.err("failed parsing on group '{s}'", .{group_text});
|
||||
return error.NoRangeSpecifiedInGroup;
|
||||
};
|
||||
var range_iterator = std.mem.splitSequence(u8, range_text, " - ");
|
||||
const start_text = range_iterator.first();
|
||||
const end_text = range_iterator.next() orelse return error.NoEndingCodepointInGroup;
|
||||
return UnicodeGroup{
|
||||
.name = name,
|
||||
.starting_codepoint = try std.fmt.parseUnsigned(u21, start_text[2..], 16),
|
||||
.ending_codepoint = try std.fmt.parseUnsigned(u21, end_text[2..], 16),
|
||||
};
|
||||
}
|
||||
|
||||
test "check ranges" {
|
||||
var parsed_ranges = all_ranges();
|
||||
// Entry 8 should be:
|
||||
// Cyrillic U+400 - U+4FF
|
||||
try std.testing.expectEqual(@as(u21, 0x400), parsed_ranges.starting_codepoints[8]);
|
||||
try std.testing.expectEqual(@as(u21, 0x4ff), parsed_ranges.ending_codepoints[8]);
|
||||
try std.testing.expectEqualStrings("Cyrillic", parsed_ranges.names[8]);
|
||||
|
||||
var range = parsed_ranges.first().?;
|
||||
try std.testing.expectEqualStrings("Basic Latin", range.name);
|
||||
try std.testing.expectEqual(@as(u21, 0x0), range.starting_codepoint);
|
||||
try std.testing.expectEqual(@as(u21, 0x7f), range.ending_codepoint);
|
||||
|
||||
range = parsed_ranges.next().?;
|
||||
try std.testing.expectEqualStrings("Latin-1 Supplement", range.name);
|
||||
try std.testing.expectEqual(@as(u21, 0x80), range.starting_codepoint);
|
||||
try std.testing.expectEqual(@as(u21, 0xff), range.ending_codepoint);
|
||||
}
|
4
zig-via-docker
Executable file
4
zig-via-docker
Executable file
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
scriptpath="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
# podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder-alpine "$@"
|
||||
podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder "$@"
|
Loading…
Reference in New Issue
Block a user