initial source code

2023-07-29 20:10:04 -07:00 · 2023-07-29 20:10:04 -07:00 · 5feee8c0c3
commit 5feee8c0c3
parent c731c3a015
11 changed files with 1175 additions and 0 deletions
--- a/14
+++ b/14
@ -0,0 +1,14 @@
+FROM debian:bullseye
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+         libfontconfig-dev \
+         ca-certificates \
+         curl \
+         xz-utils \
+    && curl https://mirror.bazel.build/ziglang.org/builds/zig-linux-x86_64-0.11.0-dev.3886+0c1bfe271.tar.xz | tar -C /usr/local/ -xJ \
+    && apt-get -y remove curl xz-utils  \
+    && ln -s /usr/local/zig*/zig /usr/local/bin \
+    && rm -rf /var/lib/apt/lists/*
+
+ENTRYPOINT ["/usr/local/bin/zig"]
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Emil Lerch
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
--- a/build.zig
+++ b/build.zig
@ -0,0 +1,86 @@
+const std = @import("std");
+
+// Although this function looks imperative, note that its job is to
+// declaratively construct a build graph that will be executed by an external
+// runner.
+pub fn build(b: *std.Build) void {
+    // Standard target options allows the person running `zig build` to choose
+    // what target to build for. Here we do not override the defaults, which
+    // means any target is allowed, and the default is native. Other options
+    // for restricting supported target set are available.
+    const target = b.standardTargetOptions(.{});
+
+    // Standard optimization options allow the person running `zig build` to select
+    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
+    // set a preferred release mode, allowing the user to decide how to optimize.
+    const optimize = b.standardOptimizeOption(.{});
+
+    const exe = b.addExecutable(.{
+        .name = "fontfinder",
+        // In this case the main source file is merely a path, however, in more
+        // complicated build scripts, this could be a generated file.
+        .root_source_file = .{ .path = "src/main.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    configure(exe);
+
+    // This declares intent for the executable to be installed into the
+    // standard location when the user invokes the "install" step (the default
+    // step when running `zig build`).
+    b.installArtifact(exe);
+
+    // This *creates* a Run step in the build graph, to be executed when another
+    // step is evaluated that depends on it. The next line below will establish
+    // such a dependency.
+    const run_cmd = b.addRunArtifact(exe);
+
+    // By making the run step depend on the install step, it will be run from the
+    // installation directory rather than directly from within the cache directory.
+    // This is not necessary, however, if the application depends on other installed
+    // files, this ensures they will be present and in the expected location.
+    run_cmd.step.dependOn(b.getInstallStep());
+
+    // This allows the user to pass arguments to the application in the build
+    // command itself, like this: `zig build run -- arg1 arg2 etc`
+    if (b.args) |args| {
+        run_cmd.addArgs(args);
+    }
+
+    // This creates a build step. It will be visible in the `zig build --help` menu,
+    // and can be selected like this: `zig build run`
+    // This will evaluate the `run` step rather than the default, which is "install".
+    const run_step = b.step("run", "Run the app");
+    run_step.dependOn(&run_cmd.step);
+
+    // Creates a step for unit testing. This only builds the test executable
+    // but does not run it.
+    const unit_tests = b.addTest(.{
+        .root_source_file = .{ .path = "src/main.zig" },
+        .target = target,
+        .optimize = optimize,
+    });
+
+    configure(unit_tests);
+
+    const run_unit_tests = b.addRunArtifact(unit_tests);
+
+    // Similar to creating the run step earlier, this exposes a `test` step to
+    // the `zig build --help` menu, providing a way for the user to request
+    // running the unit tests.
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&run_unit_tests.step);
+}
+
+fn configure(object: anytype) void {
+    // object.linkage = .static;
+    object.linkLibC();
+
+    // Fontconfig must be installed. Docker can also be used (see Dockerfile)
+    object.addSystemIncludePath("/usr/include");
+    object.linkSystemLibrary("fontconfig");
+    // object.linkSystemLibrary("expat"); // fontconfig dependency - needed for static builds
+    object.addLibraryPath("/usr/lib");
+    object.addCSourceFile("src/fontconfig.c", &[_][]const u8{"-std=c99"});
+}
--- a/src/core
+++ b/src/core
--- a/src/fontconfig.c
+++ b/src/fontconfig.c
@ -0,0 +1,54 @@
+#include <stdlib.h>
+#include <fontconfig/fontconfig.h>
+
+/* #<{(| FcChar32 FcCharSetCount (const FcCharSet *a); |)}># */
+/* void printCharacters(FcPattern* fontPattern) { */
+/*     FcCharSet* charset; */
+/*     if (FcPatternGetCharSet(fontPattern, FC_CHARSET, 0, &charset) == FcResultMatch) { */
+/*         FcChar32 ucs4; */
+/*         FcCharSetIter iter; */
+/*         FcCharSetIterInit(charset, &iter); */
+/*         printf("Supported characters:\n"); */
+/*         while (FcCharSetIterNext(&iter, &ucs4)) { */
+/*             printf("%lc ", (wint_t)ucs4); */
+/*         } */
+/*         printf("\n"); */
+/*         FcCharSetDestroy(charset); */
+/*     } */
+/* } */
+
+const FcChar32 MAX_UNICODE = 0x10FFFD;
+
+void freeAllCharacters(unsigned int *chars) {
+  free(chars);
+}
+
+int allCharacters(void* fontPattern, FcChar32 ** chars) {
+  FcPattern* pat = (FcPattern*) fontPattern;
+  FcCharSet* charset;
+  if (FcPatternGetCharSet(pat, FC_CHARSET, 0, &charset) != FcResultMatch) {
+    return -1;
+  }
+  FcChar32 count = FcCharSetCount(charset);
+  unsigned int* char_array = (unsigned int*)malloc(count * sizeof(unsigned int));
+  *chars = char_array;
+
+  FcChar32 ucs4 = 0;
+  size_t found = 0;
+  size_t inx = 0;
+
+  while (found < count && inx < MAX_UNICODE) {
+    if (FcCharSetHasChar(charset, inx) == FcTrue) {
+      char_array[ucs4] = inx;
+      ucs4++;
+      found++;
+    }
+    inx++;
+  }
+  FcCharSetDestroy(charset);
+  if (found < count) {
+    freeAllCharacters(*chars);
+    return -2;
+  }
+  return ucs4;
+}
--- a/src/fontconfig.zig
+++ b/src/fontconfig.zig
@ -0,0 +1,284 @@
+const std = @import("std");
+const unicode = @import("unicode.zig");
+const c = @cImport({
+    @cInclude("fontconfig/fontconfig.h");
+});
+const log = std.log.scoped(.fontconfig);
+
+extern fn allCharacters(p: ?*const c.FcPattern, chars: *[*]u32) c_int;
+extern fn freeAllCharacters(chars: *[*]usize) void;
+
+pub const RangeFont = struct {
+    starting_codepoint: u21,
+    ending_codepoint: u21,
+    font: Font,
+};
+
+pub const Font = struct {
+    full_name: []const u8,
+    family: []const u8,
+    style: []const u8,
+    supported_chars: []const u21,
+
+    const Self = @This();
+
+    pub fn deinit(self: *Self) void {
+        freeAllCharacters(self.supported_chars.ptr);
+    }
+};
+
+pub const FontList = struct {
+    list: std.ArrayList(Font),
+    allocator: std.mem.Allocator,
+    pattern: *c.FcPattern,
+    fontset: *c.FcFontSet,
+
+    const Self = @This();
+    pub fn initCapacity(allocator: std.mem.Allocator, num: usize, pattern: *c.FcPattern, fontset: *c.FcFontSet) std.mem.Allocator.Error!Self {
+        var al = try std.ArrayList(Font).initCapacity(allocator, num);
+        return Self{
+            .allocator = allocator,
+            .list = al,
+            .pattern = pattern,
+            .fontset = fontset,
+        };
+    }
+
+    pub fn deinit(self: *Self) void {
+        c.FcPatternDestroy(self.pattern);
+        c.FcFontSetDestroy(self.fontset);
+        self.list.deinit();
+    }
+
+    pub fn addFontAssumeCapacity(
+        self: *Self,
+        full_name: []const u8,
+        family: []const u8,
+        style: []const u8,
+        supported_chars: []const u21,
+    ) !void {
+        self.list.appendAssumeCapacity(.{
+            .full_name = full_name,
+            .family = family,
+            .style = style,
+            .supported_chars = supported_chars,
+        });
+    }
+};
+
+var fc_config: ?*c.FcConfig = null;
+var deinited = false;
+// pub var test_should_deinit = true;
+/// De-initializes the underlying c library. Should only be called
+/// after all processing has completed
+pub fn deinit() void {
+    // https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r2370.html
+    // Says that "Note that calling this function with the return from FcConfigGetCurrent will place the library in an indeterminate state."
+    // However, it seems as though you can't do this either:
+    //
+    // 1. c.FcInitLoadConfigAndFonts();
+    // 2. c.FcConfigDestroy();
+    // 3. c.FcInitLoadConfigAndFonts();
+    // 4. c.FcConfigDestroy(); // Seg fault here
+    if (deinited) @panic("Cannot deinitialize this library more than once");
+    deinited = true;
+    if (fc_config) |conf| {
+        log.debug("destroying config: do not use library or call me again", .{});
+        c.FcConfigDestroy(conf);
+    }
+    fc_config = null;
+}
+
+pub const FontQuery = struct {
+    allocator: std.mem.Allocator,
+    // fc_config: ?*c.FcConfig = null,
+
+    const Self = @This();
+
+    pub fn init(allocator: std.mem.Allocator) Self {
+        return Self{
+            .allocator = allocator,
+        };
+    }
+    pub fn deinit(self: *Self) void {
+        _ = self;
+        // if (self.all_fonts) |a| a.deinit();
+    }
+
+    pub fn fontList(self: *Self, pattern: [:0]const u8) !FontList {
+        if (fc_config == null and deinited) @panic("fontconfig C library is in an inconsistent state - should not use");
+        if (fc_config == null) fc_config = c.FcInitLoadConfigAndFonts();
+        const config = if (fc_config) |conf| conf else return error.FontConfigInitLoadFailure;
+
+        // Pretty sure we want this...
+        const pat = c.FcNameParse(pattern);
+        // We cannot destroy the pattern until we're completely done
+        // This will be managed by FontList object
+        // defer if (pat != null) c.FcPatternDestroy(pat);
+
+        // const pat = c.FcPatternCreate(); // *FcPattern
+        // defer if (pat != null) c.FcPatternDestroy(pat);
+        //
+        // // FC_WEIGHT_NORMAL is 80
+        // // This is equivalent to "regular" style
+        // if (c.FcPatternAddInteger(pat, c.FC_WEIGHT, c.FC_WEIGHT_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
+        //
+        // // This is "normal" vs Bold or Italic
+        // if (c.FcPatternAddInteger(pat, c.FC_WIDTH, c.FC_WIDTH_NORMAL) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
+        //
+        // // Monospaced fonts
+        // if (c.FcPatternAddInteger(pat, c.FC_SPACING, c.FC_MONO) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
+        //
+        // // FC_SLANT_ROMAN is 0 (italic 100, oblique 110)
+        // if (c.FcPatternAddInteger(pat, c.FC_SLANT, c.FC_SLANT_ROMAN) != c.FcTrue) return error.FontConfigCouldNotSetPattern;
+        //
+        const os = c.FcObjectSetBuild(c.FC_FAMILY, c.FC_STYLE, c.FC_LANG, c.FC_FULLNAME, c.FC_CHARSET, @as(?*u8, null)); // *FcObjectSet
+        defer if (os != null) c.FcObjectSetDestroy(os);
+        const fs = c.FcFontList(config, pat, os); // FcFontSet
+        // TODO: Move this defer into deinit
+        // defer if (fs != null) c.FcFontSetDestroy(fs);
+
+        // Use the following only when needed. NameUnparse allocates memory
+        // log.debug("Total matching fonts: {d}. Pattern: {s}\n", .{ fs.*.nfont, c.FcNameUnparse(pat) });
+        log.debug("Total matching fonts: {d}", .{fs.*.nfont});
+        var rc = try FontList.initCapacity(self.allocator, @as(usize, @intCast(fs.*.nfont)), pat.?, fs.?);
+        errdefer rc.deinit();
+        for (0..@as(usize, @intCast(fs.*.nfont))) |i| {
+            const font = fs.*.fonts[i].?; // *FcPattern
+            var fullname: [*:0]c.FcChar8 = undefined;
+            var style: [*:0]c.FcChar8 = undefined;
+            var family: [*:0]c.FcChar8 = undefined;
+
+            var charset: [*]u21 = undefined;
+            const len = allCharacters(font, @ptrCast(&charset));
+            if (len < 0) return error.FontConfigCouldNotGetCharSet;
+
+            // https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r600.html
+            // Note that these (like FcPatternGet) do not make a copy of any data structure referenced by the return value
+            // https://refspecs.linuxfoundation.org/fontconfig-2.6.0/r570.html
+            // The value returned is not a copy, but rather refers to the data stored within the pattern directly. Applications must not free this value.
+            if (c.FcPatternGetString(font, c.FC_FULLNAME, 0, @as([*c][*c]c.FcChar8, @ptrCast(&fullname))) != c.FcResultMatch)
+                fullname = @constCast(@ptrCast("".ptr));
+            // return error.FontConfigCouldNotGetFontFullName;
+
+            if (c.FcPatternGetString(font, c.FC_FAMILY, 0, @as([*c][*c]c.FcChar8, @ptrCast(&family))) != c.FcResultMatch)
+                return error.FontConfigHasNoFamily;
+            if (c.FcPatternGetString(font, c.FC_STYLE, 0, @as([*c][*c]c.FcChar8, @ptrCast(&style))) != c.FcResultMatch)
+                return error.FontConfigHasNoStyle;
+
+            log.debug(
+                "Chars: {d:5.0} Family '{s}' Style '{s}' Full Name: {s}",
+                .{ @as(usize, @intCast(len)), family, style, fullname },
+            );
+
+            try rc.addFontAssumeCapacity(
+                fullname[0..std.mem.len(fullname)],
+                family[0..std.mem.len(family)],
+                style[0..std.mem.len(style)],
+                charset[0..@as(usize, @intCast(len))],
+            );
+        }
+        return rc;
+    }
+
+    pub fn fontsForRange(
+        self: *Self,
+        starting_codepoint: u21,
+        ending_codepoint: u21,
+        fonts: []const Font,
+        exclude_previous: bool,
+    ) ![]RangeFont {
+        // const group_len = group.ending_codepoint - group.starting_codepoint;
+        var rc = std.ArrayList(RangeFont).init(self.allocator);
+        defer rc.deinit();
+
+        var previously_supported = blk: {
+            if (!exclude_previous) break :blk null;
+            var al = try std.ArrayList(bool).initCapacity(self.allocator, ending_codepoint - starting_codepoint);
+            defer al.deinit();
+            for (starting_codepoint..ending_codepoint) |_|
+                al.appendAssumeCapacity(false);
+            break :blk try al.toOwnedSlice();
+        };
+        defer if (previously_supported) |p| self.allocator.free(p);
+
+        for (fonts) |font| {
+            var current_start = @as(u21, 0);
+            var current_end = @as(u21, 0);
+            var inx = @as(usize, 0);
+
+            var range_count = @as(usize, 0);
+            // Advance to the start of the range
+            while (inx < font.supported_chars.len and
+                font.supported_chars[inx] < starting_codepoint)
+                inx += 1;
+
+            while (inx < font.supported_chars.len and
+                font.supported_chars[inx] < ending_codepoint)
+            {
+                if (previously_supported) |p| {
+                    if (p[font.supported_chars[inx]]) {
+                        inx += 1;
+                        continue; // This was already supported - continue
+                    }
+                }
+                // We found the beginning of a range
+                current_start = font.supported_chars[inx];
+                current_end = font.supported_chars[inx];
+                if (previously_supported) |p|
+                    p[font.supported_chars[inx]] = true;
+
+                // Advance to the next supported character, then start checking for continuous ranges
+                inx += 1;
+                while (inx < font.supported_chars.len and
+                    font.supported_chars[inx] == current_end + 1 and
+                    font.supported_chars[inx] <= ending_codepoint and
+                    (!exclude_previous or !previously_supported.?[font.supported_chars[inx]]))
+                {
+                    if (previously_supported) |p|
+                        p[font.supported_chars[inx]] = true;
+                    inx += 1;
+                    current_end += 1;
+                }
+
+                // We've found the end of the range (which could be the end of a group)
+                // If we have not hit the stops, inx at this point is at the beginning of
+                // a new range
+                range_count += 1;
+                try rc.append(.{
+                    .font = font,
+                    .starting_codepoint = current_start,
+                    .ending_codepoint = current_end,
+                });
+            }
+        }
+        return rc.toOwnedSlice();
+    }
+};
+
+test {
+    std.testing.refAllDecls(@This()); // Only catches public decls
+}
+test "Get fonts" {
+    // std.testing.log_level = .debug;
+    log.debug("get fonts", .{});
+    var fq = FontQuery.init(std.testing.allocator);
+    defer fq.deinit();
+    var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
+    defer fl.deinit();
+    try std.testing.expect(fl.list.items.len > 0);
+    var matched = blk: {
+        for (fl.list.items) |item| {
+            log.debug("full_name: '{s}'", .{item.full_name});
+            if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
+                break :blk item;
+        }
+        break :blk null;
+    };
+    try std.testing.expect(matched != null);
+    try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
+}
+test {
+    // if (test_should_deinit) deinit();
+    deinit();
+}
--- a/src/main.zig
+++ b/src/main.zig
@ -0,0 +1,391 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const unicode = @import("unicode.zig");
+const fontconfig = @import("fontconfig.zig");
+
+const max_unicode: u21 = 0x10FFFD;
+const all_chars = blk: {
+    var all: [max_unicode + 1]u21 = undefined;
+    @setEvalBranchQuota(max_unicode);
+    for (0..max_unicode) |i|
+        all[i] = i;
+    break :blk all;
+};
+pub fn main() !u8 {
+    // TODO: Add back in
+    // defer fontconfig.deinit();
+    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena.deinit();
+    const allocator = arena.allocator();
+
+    // stdout is for the actual output of your application, for example if you
+    // are implementing gzip, then only the compressed bytes should be sent to
+    // stdout, not any debugging messages.
+    const stdout_file = std.io.getStdOut().writer();
+    var bw = std.io.bufferedWriter(stdout_file);
+    defer bw.flush() catch @panic("could not flush stdout"); // don't forget to flush!
+    const stdout = bw.writer();
+
+    // std.os.argv is os specific
+    var arg_iterator = std.process.args();
+    const arg0 = arg_iterator.next().?;
+    const options = parseCommandLine(&arg_iterator) catch |err| {
+        if (err == error.UserRequestedHelp) {
+            try usage(stdout, arg0);
+            return 0;
+        }
+        try usage(std.io.getStdErr().writer(), arg0);
+        return 2;
+    };
+
+    var unicode_ranges = unicode.all_ranges();
+    if (options.list_groups) {
+        defer unicode_ranges.reset();
+        while (unicode_ranges.next()) |range| {
+            try stdout.print("{s}", .{range.name});
+            for (range.name.len..unicode_ranges.longest_name_len + 2) |_|
+                try stdout.writeByte(' ');
+            try stdout.print("U+{X} - U+{X}\n", .{ range.starting_codepoint, range.ending_codepoint });
+        }
+        return 0;
+    }
+    if (options.list_fonts) {
+        var fq = fontconfig.FontQuery.init(allocator);
+        defer fq.deinit();
+        var fl = try fq.fontList(options.pattern);
+        var longest_family_name = @as(usize, 0);
+        var longest_style_name = @as(usize, 0);
+        for (fl.list.items) |f| {
+            longest_family_name = @max(f.family.len, longest_family_name);
+            longest_style_name = @max(f.style.len, longest_style_name);
+        }
+
+        std.sort.insertion(fontconfig.Font, fl.list.items, {}, cmpFont);
+        for (fl.list.items) |f| {
+            try stdout.print("Family: {s}", .{f.family});
+            for (f.family.len..longest_family_name + 1) |_|
+                try stdout.writeByte(' ');
+            try stdout.print("Chars: {d:5}\tStyle: {s}", .{ f.supported_chars.len, f.style });
+            for (f.style.len..longest_style_name + 1) |_|
+                try stdout.writeByte(' ');
+            try stdout.print("\tName: {s}\n", .{
+                f.full_name,
+            });
+        }
+        return 0;
+    }
+    const exclude_previous = options.fonts != null;
+    const fonts: []fontconfig.Font = blk: {
+        if (options.fonts == null) break :blk &[_]fontconfig.Font{};
+        const fo = options.fonts.?;
+        var si = std.mem.splitScalar(u8, fo, ',');
+        var fq = fontconfig.FontQuery.init(allocator);
+        defer fq.deinit();
+        var fl = try fq.fontList(options.pattern);
+        // This messes with data after, and we don't need to deinit anyway
+        // defer fl.deinit();
+        var al = try std.ArrayList(fontconfig.Font).initCapacity(allocator, std.mem.count(u8, fo, ",") + 2);
+        defer al.deinit();
+        while (si.next()) |font_str| {
+            const font = font_blk: {
+                for (fl.list.items) |f|
+                    if (std.ascii.eqlIgnoreCase(f.family, font_str))
+                        break :font_blk f;
+                try std.io.getStdErr().writer().print("Error: Font '{s}' not installed", .{font_str});
+                return 255;
+            };
+
+            al.appendAssumeCapacity(font);
+        }
+        al.appendAssumeCapacity(.{
+            .full_name = "Unsupported",
+            .family = "Unsupported by any preferred font",
+            .style = "Regular",
+            .supported_chars = &all_chars,
+        });
+        break :blk try al.toOwnedSlice();
+    };
+
+    const order_by_range = if (std.ascii.eqlIgnoreCase("font", options.order))
+        false
+    else if (std.ascii.eqlIgnoreCase("range", options.order))
+        true
+    else
+        null;
+    if (order_by_range == null) {
+        try std.io.getStdErr().writer().print("Error: Order type '{s}' invalid", .{options.order});
+        return 255;
+    }
+    std.log.debug("{0} prefered fonts:", .{fonts.len - 1});
+    for (fonts[0 .. fonts.len - 1]) |f|
+        std.log.debug("\t{s}", .{f.family});
+    if (options.groups) |group| {
+        while (unicode_ranges.next()) |range| {
+            var it = std.mem.splitScalar(u8, group, ',');
+            while (it.next()) |desired_group| {
+                if (std.mem.eql(u8, range.name, desired_group)) {
+                    try outputRange(
+                        allocator,
+                        range.starting_codepoint,
+                        range.ending_codepoint,
+                        fonts,
+                        exclude_previous,
+                        order_by_range.?,
+                        stdout,
+                    );
+                }
+            }
+        }
+    } else {
+        try outputRange(
+            allocator,
+            0,
+            max_unicode,
+            fonts,
+            exclude_previous,
+            order_by_range.?,
+            stdout,
+        );
+    }
+
+    return 0;
+}
+fn cmpFont(context: void, a: fontconfig.Font, b: fontconfig.Font) bool {
+    _ = context;
+    return std.mem.order(u8, a.family, b.family) == .lt; // a.family < b.family;
+}
+fn cmpRangeList(context: void, a: fontconfig.RangeFont, b: fontconfig.RangeFont) bool {
+    _ = context;
+    return a.starting_codepoint < b.starting_codepoint;
+}
+fn formatRangeFontEndingCodepoint(
+    data: fontconfig.RangeFont,
+    comptime fmt: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    if (data.starting_codepoint == data.ending_codepoint) return;
+    try std.fmt.format(writer, "-{" ++ fmt ++ "}", .{data.ending_codepoint});
+}
+fn fmtRangeFontEndingCodepoint(range_font: fontconfig.RangeFont) std.fmt.Formatter(formatRangeFontEndingCodepoint) {
+    return .{
+        .data = range_font,
+    };
+}
+fn outputRange(
+    allocator: std.mem.Allocator,
+    starting_codepoint: u21,
+    ending_codepoint: u21,
+    fonts: []const fontconfig.Font,
+    exclude_previous: bool,
+    order_by_range: bool,
+    writer: anytype,
+) !void {
+    var fq = fontconfig.FontQuery.init(allocator);
+    defer fq.deinit();
+    var range_fonts = try fq.fontsForRange(starting_codepoint, ending_codepoint, fonts, exclude_previous); // do we want hard limits around this?
+    defer allocator.free(range_fonts);
+
+    std.log.debug("Got {d} range fonts back from query", .{range_fonts.len});
+    if (order_by_range)
+        std.sort.insertion(fontconfig.RangeFont, range_fonts, {}, cmpRangeList);
+
+    for (range_fonts) |range_font| {
+        try writer.print("{s}U+{x}{x}={s}\n", .{
+            if (std.mem.eql(u8, range_font.font.full_name, "Unsupported")) "#" else "",
+            range_font.starting_codepoint,
+            fmtRangeFontEndingCodepoint(range_font), //.ending_codepoint,
+            range_font.font.family,
+        });
+    }
+}
+
+const Options = struct {
+    end_of_options_signifier: ?usize = null,
+    groups: ?[]const u8 = null,
+    fonts: ?[]const u8 = &[_]u8{},
+    list_groups: bool = false,
+    list_fonts: bool = false,
+    pattern: [:0]const u8 = ":regular:normal:spacing=100:slant=0",
+    order: [:0]const u8 = "font",
+};
+
+fn usage(writer: anytype, arg0: []const u8) !void {
+    try writer.print(
+        \\usage: {s} [OPTION]...
+        \\
+        \\Options:
+        \\  -p, --pattern     font pattern to use (Default: :regular:normal:spacing=100:slant=0)
+        \\  -g, --groups      group names to process, comma delimited (e.g. Thai,Lao - default is all groups)
+        \\  -f, --fonts       prefered fonts in order, comma delimited (e.g. "DejaVu Sans Mono,Hack Nerd Font" - default is all fonts)
+        \\                    note this will change the behavior such that ranges supported by the first font found will not
+        \\                    be considered for use by subsequent fonts
+        \\  -o, --order       order by (Default: font, can also order by range)
+        \\  -G, --list-groups list all groups and exit
+        \\  -F, --list-fonts  list all fonts matching pattern and exit
+        \\  -h, --help        display this help text and exit
+        \\
+    , .{arg0});
+}
+
+fn parseCommandLine(arg_iterator: anytype) !Options {
+    var current_arg: usize = 0;
+    var rc = Options{};
+    while (arg_iterator.next()) |arg| {
+        if (std.mem.eql(u8, arg, "--")) {
+            rc.end_of_options_signifier = current_arg + 1;
+            return rc;
+        }
+        if (try getArgValue(arg_iterator, arg, "groups", "g", .{})) |val| {
+            rc.groups = val;
+        } else if (try getArgValue(arg_iterator, arg, "pattern", "p", .{})) |val| {
+            rc.pattern = val;
+        } else if (try getArgValue(arg_iterator, arg, "fonts", "f", .{})) |val| {
+            rc.fonts = val;
+        } else if (try getArgValue(arg_iterator, arg, "order", "o", .{})) |val| {
+            rc.order = val;
+        } else if (try getArgValue(arg_iterator, arg, "list-groups", "G", .{ .is_bool = true })) |_| {
+            rc.list_groups = true;
+        } else if (try getArgValue(arg_iterator, arg, "list-fonts", "F", .{ .is_bool = true })) |_| {
+            rc.list_fonts = true;
+        } else if (try getArgValue(arg_iterator, arg, "help", "h", .{ .is_bool = true })) |_| {
+            return error.UserRequestedHelp;
+        } else {
+            if (!builtin.is_test)
+                try std.io.getStdErr().writer().print("invalid option: {s}\n\n", .{arg});
+            return error.InvalidOption;
+        }
+        current_arg += 1;
+    }
+    return rc;
+}
+const ArgOptions = struct {
+    is_bool: bool = false,
+    is_required: bool = false,
+};
+fn getArgValue(
+    arg_iterator: anytype,
+    arg: [:0]const u8,
+    comptime name: ?[]const u8,
+    comptime short_name: ?[]const u8,
+    arg_options: ArgOptions,
+) !?[:0]const u8 {
+    if (short_name) |s| {
+        if (std.mem.eql(u8, "-" ++ s, arg)) {
+            if (arg_options.is_bool) return arg;
+            if (arg_iterator.next()) |val| {
+                return val;
+            } else return error.NoValueOnFlag;
+        }
+    }
+    if (name) |n| {
+        if (std.mem.eql(u8, "--" ++ n, arg)) {
+            if (arg_options.is_bool) return "";
+            if (arg_iterator.next()) |val| {
+                return val;
+            } else return error.NoValueOnName;
+        }
+        if (std.mem.startsWith(u8, arg, "--" ++ n ++ "=")) {
+            if (arg_options.is_bool) return error.EqualsInvalidForBooleanArgument;
+            return arg[("--" ++ n ++ "=").len.. :0];
+        }
+    }
+    return null;
+}
+
+// Tests run in this order:
+//
+// 1. Main file
+//    - In order, from top to bottom
+// 2. Referenced file(s), if any
+//    - In order, from top to bottom
+//
+// libfontconfig gets inconsistent in a hurry with a lot of init/deinit, so
+// we only want to deinit once. Because we have no way of saying "go do other
+// tests, then come back", we have no way of controlling deinitialization other
+// than something that's not super obvious. So, we're adding this comment.
+// We will allow fontconfig tests to do our deinit() call, and we shall ignore
+// deinitialization here
+test "startup" {
+    // std.testing.log_level = .debug;
+}
+test "command line parses with short name" {
+    var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "-g Latin-1");
+    defer it.deinit();
+    const options = try parseCommandLine(&it);
+    try std.testing.expectEqualStrings("Latin-1", options.groups.?);
+}
+test "command line parses with long name no equals" {
+    var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups Latin-1");
+    defer it.deinit();
+    const options = try parseCommandLine(&it);
+    try std.testing.expectEqualStrings("Latin-1", options.groups.?);
+}
+test "command line parses with long name equals" {
+    var log_level = std.testing.log_level;
+    defer std.testing.log_level = log_level;
+    std.testing.log_level = .debug;
+    var it = try std.process.ArgIteratorGeneral(.{}).init(std.testing.allocator, "--groups=Latin-1");
+    defer it.deinit();
+    const options = try parseCommandLine(&it);
+    try std.testing.expectEqualStrings("Latin-1", options.groups.?);
+}
+test "Get ranges" {
+    std.log.debug("get ranges", .{});
+    // defer fontconfig.deinit();
+    var fq = fontconfig.FontQuery.init(std.testing.allocator);
+    defer fq.deinit();
+    var fl = try fq.fontList(":regular:normal:spacing=100:slant=0");
+    defer fl.deinit();
+    try std.testing.expect(fl.list.items.len > 0);
+    var matched = blk: {
+        for (fl.list.items) |item| {
+            std.log.debug("full_name: '{s}'", .{item.full_name});
+            if (std.mem.eql(u8, "DejaVu Sans Mono", item.full_name))
+                break :blk item;
+        }
+        break :blk null;
+    };
+    try std.testing.expect(matched != null);
+    const arr: []const fontconfig.Font = &[_]fontconfig.Font{matched.?};
+    var al = std.ArrayList(u8).init(std.testing.allocator);
+    defer al.deinit();
+    const range_name = "Basic Latin";
+    var matched_range = try blk: {
+        var unicode_ranges = unicode.all_ranges();
+        while (unicode_ranges.next()) |range| {
+            var it = std.mem.splitScalar(u8, range_name, ',');
+            while (it.next()) |desired_range| {
+                if (std.mem.eql(u8, range.name, desired_range)) {
+                    break :blk range;
+                }
+            }
+        }
+        break :blk error.RangeNotFound;
+    };
+    var log_level = std.testing.log_level;
+    std.testing.log_level = .debug;
+    defer std.testing.log_level = log_level;
+    try outputRange(std.testing.allocator, matched_range.starting_codepoint, matched_range.ending_codepoint, arr, false, al.writer());
+    try std.testing.expectEqualStrings(al.items, "U+20-7e=DejaVu Sans Mono\n");
+
+    std.log.debug("\nwhole unicode space:", .{});
+    try outputRange(std.testing.allocator, 0, max_unicode, arr, false, al.writer());
+    const expected =
+        \\U+20-7e=DejaVu Sans Mono
+        \\U+20-7e=DejaVu Sans Mono
+        \\U+a0-1c3=DejaVu Sans Mono
+        \\U+1cd-1e3=DejaVu Sans Mono
+        \\U+1e6-1f0=DejaVu Sans Mono
+        \\U+1f4-1f6=DejaVu Sans Mono
+    ;
+    try std.testing.expectStringStartsWith(al.items, expected);
+
+    // try std.testing.expectEqual(@as(usize, 3322), matched.?.supported_chars.len);
+}
+
+test "teardown, followed by libraries" {
+    std.testing.refAllDecls(@This()); // Only catches public decls
+    _ = @import("unicode.zig");
+}
--- a/src/ranges.txt
+++ b/src/ranges.txt
@ -0,0 +1,209 @@
+Basic Latin 	U+0 - U+7F
+Latin-1 Supplement 	U+80 - U+FF
+Latin Extended-A 	U+100 - U+17F
+Latin Extended-B 	U+180 - U+24F
+IPA Extensions 	U+250 - U+2AF
+Spacing Modifier Letters 	U+2B0 - U+2FF
+Combining Diacritical Marks 	U+300 - U+36F
+Greek and Coptic 	U+370 - U+3FF
+Cyrillic 	U+400 - U+4FF
+Cyrillic Supplement 	U+500 - U+527
+Armenian 	U+531 - U+58A
+Hebrew 	U+591 - U+5F4
+Arabic 	U+600 - U+6FF
+Syriac 	U+700 - U+74F
+Arabic Supplement 	U+750 - U+77F
+Thaana 	U+780 - U+7B1
+NKo 	U+7C0 - U+7FA
+Samaritan 	U+800 - U+83E
+Mandaic 	U+840 - U+85E
+Devanagari 	U+900 - U+97F
+Bengali 	U+981 - U+9FB
+Gurmukhi 	U+A01 - U+A75
+Gujarati 	U+A81 - U+AF1
+Oriya 	U+B01 - U+B77
+Tamil 	U+B82 - U+BFA
+Telugu 	U+C01 - U+C7F
+Kannada 	U+C82 - U+CF2
+Malayalam 	U+D02 - U+D7F
+Sinhala 	U+D82 - U+DF4
+Thai 	U+E01 - U+E5B
+Lao 	U+E81 - U+EDD
+Tibetan 	U+F00 - U+FDA
+Myanmar 	U+1000 - U+109F
+Georgian 	U+10A0 - U+10FC
+Hangul Jamo 	U+1100 - U+11FF
+Ethiopic 	U+1200 - U+137C
+Ethiopic Supplement 	U+1380 - U+1399
+Cherokee 	U+13A0 - U+13F4
+Unified Canadian Aboriginal Syllabics 	U+1400 - U+167F
+Ogham 	U+1680 - U+169C
+Runic 	U+16A0 - U+16F0
+Tagalog 	U+1700 - U+1714
+Hanunoo 	U+1720 - U+1736
+Buhid 	U+1740 - U+1753
+Tagbanwa 	U+1760 - U+1773
+Khmer 	U+1780 - U+17F9
+Mongolian 	U+1800 - U+18AA
+Unified Canadian Aboriginal Syllabics Extended 	U+18B0 - U+18F5
+Limbu 	U+1900 - U+194F
+Tai Le 	U+1950 - U+1974
+New Tai Lue 	U+1980 - U+19DF
+Khmer Symbols 	U+19E0 - U+19FF
+Buginese 	U+1A00 - U+1A1F
+Tai Tham 	U+1A20 - U+1AAD
+Balinese 	U+1B00 - U+1B7C
+Sundanese 	U+1B80 - U+1BB9
+Batak 	U+1BC0 - U+1BFF
+Lepcha 	U+1C00 - U+1C4F
+Ol Chiki 	U+1C50 - U+1C7F
+Vedic Extensions 	U+1CD0 - U+1CF2
+Phonetic Extensions 	U+1D00 - U+1D7F
+Phonetic Extensions Supplement 	U+1D80 - U+1DBF
+Combining Diacritical Marks Supplement 	U+1DC0 - U+1DFF
+Latin Extended Additional 	U+1E00 - U+1EFF
+Greek Extended 	U+1F00 - U+1FFE
+General Punctuation 	U+2000 - U+206F
+Superscripts and Subscripts 	U+2070 - U+209C
+Currency Symbols 	U+20A0 - U+20B9
+Combining Diacritical Marks for Symbols 	U+20D0 - U+20F0
+Letterlike Symbols 	U+2100 - U+214F
+Number Forms 	U+2150 - U+2189
+Arrows 	U+2190 - U+21FF
+Mathematical Operators 	U+2200 - U+22FF
+Miscellaneous Technical 	U+2300 - U+23F3
+Control Pictures 	U+2400 - U+2426
+Optical Character Recognition 	U+2440 - U+244A
+Enclosed Alphanumerics 	U+2460 - U+24FF
+Box Drawing 	U+2500 - U+257F
+Block Elements 	U+2580 - U+259F
+Geometric Shapes 	U+25A0 - U+25FF
+Miscellaneous Symbols 	U+2600 - U+26FF
+Dingbats 	U+2701 - U+27BF
+Miscellaneous Mathematical Symbols-A 	U+27C0 - U+27EF
+Supplemental Arrows-A 	U+27F0 - U+27FF
+Braille Patterns 	U+2800 - U+28FF
+Supplemental Arrows-B 	U+2900 - U+297F
+Miscellaneous Mathematical Symbols-B 	U+2980 - U+29FF
+Supplemental Mathematical Operators 	U+2A00 - U+2AFF
+Miscellaneous Symbols and Arrows 	U+2B00 - U+2B59
+Glagolitic 	U+2C00 - U+2C5E
+Latin Extended-C 	U+2C60 - U+2C7F
+Coptic 	U+2C80 - U+2CFF
+Georgian Supplement 	U+2D00 - U+2D25
+Tifinagh 	U+2D30 - U+2D7F
+Ethiopic Extended 	U+2D80 - U+2DDE
+Cyrillic Extended-A 	U+2DE0 - U+2DFF
+Supplemental Punctuation 	U+2E00 - U+2E31
+CJK Radicals Supplement 	U+2E80 - U+2EF3
+Kangxi Radicals 	U+2F00 - U+2FD5
+Ideographic Description Characters 	U+2FF0 - U+2FFB
+CJK Symbols and Punctuation 	U+3000 - U+303F
+Hiragana 	U+3041 - U+309F
+Katakana 	U+30A0 - U+30FF
+Bopomofo 	U+3105 - U+312D
+Hangul Compatibility Jamo 	U+3131 - U+318E
+Kanbun 	U+3190 - U+319F
+Bopomofo Extended 	U+31A0 - U+31BA
+CJK Strokes 	U+31C0 - U+31E3
+Katakana Phonetic Extensions 	U+31F0 - U+31FF
+Enclosed CJK Letters and Months 	U+3200 - U+32FE
+CJK Compatibility 	U+3300 - U+33FF
+CJK Unified Ideographs Extension A 	U+3400 - U+4DB5
+Yijing Hexagram Symbols 	U+4DC0 - U+4DFF
+CJK Unified Ideographs 	U+4E00 - U+9FCB
+Yi Syllables 	U+A000 - U+A48C
+Yi Radicals 	U+A490 - U+A4C6
+Lisu 	U+A4D0 - U+A4FF
+Vai 	U+A500 - U+A62B
+Cyrillic Extended-B 	U+A640 - U+A697
+Bamum 	U+A6A0 - U+A6F7
+Modifier Tone Letters 	U+A700 - U+A71F
+Latin Extended-D 	U+A720 - U+A7FF
+Syloti Nagri 	U+A800 - U+A82B
+Common Indic Number Forms 	U+A830 - U+A839
+Phags-pa 	U+A840 - U+A877
+Saurashtra 	U+A880 - U+A8D9
+Devanagari Extended 	U+A8E0 - U+A8FB
+Kayah Li 	U+A900 - U+A92F
+Rejang 	U+A930 - U+A95F
+Hangul Jamo Extended-A 	U+A960 - U+A97C
+Javanese 	U+A980 - U+A9DF
+Cham 	U+AA00 - U+AA5F
+Myanmar Extended-A 	U+AA60 - U+AA7B
+Tai Viet 	U+AA80 - U+AADF
+Ethiopic Extended-A 	U+AB01 - U+AB2E
+Meetei Mayek 	U+ABC0 - U+ABF9
+Hangul Syllables 	U+AC00 - U+D7A3
+Hangul Jamo Extended-B 	U+D7B0 - U+D7FB
+High Surrogates 	U+D800 - U+DB7F
+High Private Use Surrogates 	U+DB80 - U+DBFF
+Low Surrogates 	U+DC00 - U+DFFF
+Private Use Area 	U+E000 - U+F8FF
+CJK Compatibility Ideographs 	U+F900 - U+FAD9
+Alphabetic Presentation Forms 	U+FB00 - U+FB4F
+Arabic Presentation Forms-A 	U+FB50 - U+FDFD
+Variation Selectors 	U+FE00 - U+FE0F
+Vertical Forms 	U+FE10 - U+FE19
+Combining Half Marks 	U+FE20 - U+FE26
+CJK Compatibility Forms 	U+FE30 - U+FE4F
+Small Form Variants 	U+FE50 - U+FE6B
+Arabic Presentation Forms-B 	U+FE70 - U+FEFF
+Halfwidth and Fullwidth Forms 	U+FF01 - U+FFEE
+Specials 	U+FFF9 - U+FFFD
+Linear B Syllabary 	U+10000 - U+1005D
+Linear B Ideograms 	U+10080 - U+100FA
+Aegean Numbers 	U+10100 - U+1013F
+Ancient Greek Numbers 	U+10140 - U+1018A
+Ancient Symbols 	U+10190 - U+1019B
+Phaistos Disc 	U+101D0 - U+101FD
+Lycian 	U+10280 - U+1029C
+Carian 	U+102A0 - U+102D0
+Old Italic 	U+10300 - U+10323
+Gothic 	U+10330 - U+1034A
+Ugaritic 	U+10380 - U+1039F
+Old Persian 	U+103A0 - U+103D5
+Deseret 	U+10400 - U+1044F
+Shavian 	U+10450 - U+1047F
+Osmanya 	U+10480 - U+104A9
+Cypriot Syllabary 	U+10800 - U+1083F
+Imperial Aramaic 	U+10840 - U+1085F
+Phoenician 	U+10900 - U+1091F
+Lydian 	U+10920 - U+1093F
+Kharoshthi 	U+10A00 - U+10A58
+Old South Arabian 	U+10A60 - U+10A7F
+Avestan 	U+10B00 - U+10B3F
+Inscriptional Parthian 	U+10B40 - U+10B5F
+Inscriptional Pahlavi 	U+10B60 - U+10B7F
+Old Turkic 	U+10C00 - U+10C48
+Rumi Numeral Symbols 	U+10E60 - U+10E7E
+Brahmi 	U+11000 - U+1106F
+Kaithi 	U+11080 - U+110C1
+Cuneiform 	U+12000 - U+1236E
+Cuneiform Numbers and Punctuation 	U+12400 - U+12473
+Egyptian Hieroglyphs 	U+13000 - U+1342E
+Bamum Supplement 	U+16800 - U+16A38
+Kana Supplement 	U+1B000 - U+1B001
+Byzantine Musical Symbols 	U+1D000 - U+1D0F5
+Musical Symbols 	U+1D100 - U+1D1DD
+Ancient Greek Musical Notation 	U+1D200 - U+1D245
+Tai Xuan Jing Symbols 	U+1D300 - U+1D356
+Counting Rod Numerals 	U+1D360 - U+1D371
+Mathematical Alphanumeric Symbols 	U+1D400 - U+1D7FF
+Mahjong Tiles 	U+1F000 - U+1F02B
+Domino Tiles 	U+1F030 - U+1F093
+Playing Cards 	U+1F0A0 - U+1F0DF
+Enclosed Alphanumeric Supplement 	U+1F100 - U+1F1FF
+Enclosed Ideographic Supplement 	U+1F200 - U+1F251
+Miscellaneous Symbols And Pictographs 	U+1F300 - U+1F5FF
+Emoticons 	U+1F601 - U+1F64F
+Transport And Map Symbols 	U+1F680 - U+1F6C5
+Alchemical Symbols 	U+1F700 - U+1F773
+CJK Unified Ideographs Extension B 	U+20000 - U+2A6D6
+CJK Unified Ideographs Extension C 	U+2A700 - U+2B734
+CJK Unified Ideographs Extension D 	U+2B740 - U+2B81D
+CJK Compatibility Ideographs Supplement 	U+2F800 - U+2FA1D
+Tags 	U+E0001 - U+E007F
+Variation Selectors Supplement 	U+E0100 - U+E01EF
+Supplementary Private Use Area-A 	U+F0000 - U+FFFFD
+Supplementary Private Use Area-B 	U+100000 - U+10FFFD
--- a/src/unicode.zig
+++ b/src/unicode.zig
@ -0,0 +1,112 @@
+const std = @import("std");
+
+// Pulled from: https://www.unicodepedia.com/groups/
+const ranges = @embedFile("ranges.txt");
+const eval_branch_quota_base = 18500;
+const range_count = blk: {
+    // This should be related to the number of characters in our embedded file above
+    @setEvalBranchQuota(eval_branch_quota_base);
+    break :blk std.mem.count(u8, ranges, "\n");
+};
+const Ranges = struct {
+    names: [range_count][]const u8 = undefined,
+    starting_codepoints: [range_count]u21 = undefined,
+    ending_codepoints: [range_count]u21 = undefined,
+    current_inx: usize = 0,
+    longest_name_len: usize = 0,
+
+    const Self = @This();
+
+    pub fn first(self: *Self) ?UnicodeGroup {
+        self.reset();
+        return self.next();
+    }
+    pub fn reset(self: *Self) void {
+        self.current_inx = 0;
+    }
+    pub fn next(self: *Self) ?UnicodeGroup {
+        if (self.current_inx == range_count) return null;
+        self.current_inx += 1;
+        return self.item(self.current_inx - 1);
+    }
+    pub fn item(self: Self, index: usize) UnicodeGroup {
+        return .{
+            .name = self.names[index],
+            .starting_codepoint = self.starting_codepoints[index],
+            .ending_codepoint = self.ending_codepoints[index],
+        };
+    }
+};
+
+const _all_ranges = blk: {
+    @setEvalBranchQuota(eval_branch_quota_base * 2);
+    break :blk parseRanges(ranges) catch @compileError("Could not parse ranges.txt");
+};
+
+pub fn all_ranges() Ranges {
+    return .{
+        .names = _all_ranges.names,
+        .starting_codepoints = _all_ranges.starting_codepoints,
+        .ending_codepoints = _all_ranges.ending_codepoints,
+        .longest_name_len = _all_ranges.longest_name_len,
+    };
+}
+
+pub const UnicodeGroup = struct {
+    name: []const u8,
+    starting_codepoint: u21,
+    ending_codepoint: u21,
+};
+
+fn parseRanges(text: []const u8) !Ranges {
+    var rc = Ranges{};
+    var iterator = std.mem.splitSequence(u8, text, "\n");
+    var inx: usize = 0;
+    while (iterator.next()) |group|
+        if (group.len > 0) {
+            const uc = try parseGroup(group);
+            rc.names[inx] = uc.name;
+            rc.starting_codepoints[inx] = uc.starting_codepoint;
+            rc.ending_codepoints[inx] = uc.ending_codepoint;
+            rc.longest_name_len = @max(rc.longest_name_len, uc.name.len);
+            inx += 1;
+        };
+    return rc;
+}
+
+fn parseGroup(group_text: []const u8) !UnicodeGroup {
+    // Basic Latin 	U+0 - U+7F
+    var iterator = std.mem.splitSequence(u8, group_text, "\t");
+    const name = std.mem.trimRight(u8, iterator.first(), " ");
+    const range_text = iterator.next() orelse {
+        std.log.err("failed parsing on group '{s}'", .{group_text});
+        return error.NoRangeSpecifiedInGroup;
+    };
+    var range_iterator = std.mem.splitSequence(u8, range_text, " - ");
+    const start_text = range_iterator.first();
+    const end_text = range_iterator.next() orelse return error.NoEndingCodepointInGroup;
+    return UnicodeGroup{
+        .name = name,
+        .starting_codepoint = try std.fmt.parseUnsigned(u21, start_text[2..], 16),
+        .ending_codepoint = try std.fmt.parseUnsigned(u21, end_text[2..], 16),
+    };
+}
+
+test "check ranges" {
+    var parsed_ranges = all_ranges();
+    // Entry 8 should be:
+    // Cyrillic 	U+400 - U+4FF
+    try std.testing.expectEqual(@as(u21, 0x400), parsed_ranges.starting_codepoints[8]);
+    try std.testing.expectEqual(@as(u21, 0x4ff), parsed_ranges.ending_codepoints[8]);
+    try std.testing.expectEqualStrings("Cyrillic", parsed_ranges.names[8]);
+
+    var range = parsed_ranges.first().?;
+    try std.testing.expectEqualStrings("Basic Latin", range.name);
+    try std.testing.expectEqual(@as(u21, 0x0), range.starting_codepoint);
+    try std.testing.expectEqual(@as(u21, 0x7f), range.ending_codepoint);
+
+    range = parsed_ranges.next().?;
+    try std.testing.expectEqualStrings("Latin-1 Supplement", range.name);
+    try std.testing.expectEqual(@as(u21, 0x80), range.starting_codepoint);
+    try std.testing.expectEqual(@as(u21, 0xff), range.ending_codepoint);
+}
--- a/4
+++ b/4
@ -0,0 +1,4 @@
+#!/bin/sh
+scriptpath="$( cd "$(dirname "$0")" ; pwd -P )"
+# podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder-alpine "$@"
+podman run -t --rm -v "$HOME/.cache:/root/.cache" -v "${scriptpath}:/app" -w /app fontfinder "$@"