vaxis has its own handling of this - use that

This commit is contained in:
Emil Lerch 2026-06-28 07:56:24 -07:00
parent 8b85dcb9ea
commit 9f47d46d9a
Signed by: lobo
GPG key ID: A7B62D657EF764F8
4 changed files with 238 additions and 87 deletions

View file

@ -430,6 +430,32 @@ test "parse: single ack with two notes round-trips" {
try testing.expectEqualStrings("Will trim by Q3 2026.", entry.notes[1]);
}
test "round-trip: a note with emoji and accented text survives format + parse" {
// UTF-8 note content (emoji, accented letters) must survive the
// SRF format -> parse cycle byte-for-byte. Multi-byte sequences
// can't collide with SRF's ASCII delimiters (',' '::' newline), so
// this is safe; the test pins that guarantee for the ack-note flow.
const a = testing.allocator;
const records = [_]JournalRecord{
.{ .acknowledgment = .{
.observation = "position_concentration",
.target = "NVDA",
.acknowledged_at = Date.fromYmd(2026, 6, 12),
.state = .acknowledged,
} },
.{ .note = .{ .line = "Trim by Q3 \u{1F389} - café conviction" } },
};
var buf: std.Io.Writer.Allocating = .init(a);
defer buf.deinit();
try buf.writer.print("{f}", .{srf.fmt(JournalRecord, &records, .{})});
var journal = try parse(a, buf.writer.buffered());
defer journal.deinit();
try testing.expectEqual(@as(usize, 1), journal.entries.len);
try testing.expectEqual(@as(usize, 1), journal.entries[0].notes.len);
try testing.expectEqualStrings("Trim by Q3 \u{1F389} - café conviction", journal.entries[0].notes[0]);
}
test "parse: notes attach to the most-recent preceding ack" {
const data =
\\#!srfv1

View file

@ -1698,7 +1698,8 @@ pub const App = struct {
}
}
pub fn drawStyledContent(_: *App, _: std.mem.Allocator, buf: []vaxis.Cell, width: u16, height: u16, lines: []const StyledLine) !void {
pub fn drawStyledContent(self: *App, _: std.mem.Allocator, buf: []vaxis.Cell, width: u16, height: u16, lines: []const StyledLine) !void {
const method = self.gwidthMethod();
for (lines, 0..) |line, row| {
if (row >= height) break;
// Fill row with style bg
@ -1713,10 +1714,41 @@ pub const App = struct {
buf[row * width + ci] = .{ .char = .{ .grapheme = graphemes[ci] }, .style = s };
}
} else {
// UTF-8 aware rendering: byte index and column index tracked separately
// Grapheme-cluster aware rendering. Segment `line.text`
// into grapheme clusters and measure each cluster's
// display width with the SAME method vaxis uses at render
// time (`caps.unicode`), then set the cell width
// explicitly and advance the display column by that width.
// That keeps three things in lockstep - our buffer
// columns, vaxis's per-cell cursor advance (it steps its
// buffer index by the cell's width, skipping the covered
// columns), and the terminal - so wide glyphs (CJK and any
// emoji: VS16, ZWJ, skin-tone, or flag sequences) don't
// desync and smear as the content scrolls. A wide cluster
// leaves its trailing column(s) as the row-fill blank;
// vaxis never emits those because the prior cell is wide.
var col: usize = 0;
var bi: usize = 0;
while (bi < line.text.len and col < width) {
var giter = vaxis.unicode.graphemeIterator(line.text);
while (giter.next()) |g| {
if (col >= width) break;
const cluster = g.bytes(line.text);
// Fast path: a single-byte cluster below 0x80 is a
// standalone ASCII char (combining marks are
// multi-byte, so they'd make the cluster longer) -
// always width 1. Skips the gwidth state machine for
// the overwhelmingly common case of ASCII table text.
const gw: u16 = if (g.len == 1 and cluster[0] < 0x80)
1
else
vaxis.gwidth.gwidth(cluster, method);
// Zero-width cluster (e.g. a leading combining mark
// with no base): skip it rather than let the next
// glyph overwrite this cell or the column desync.
if (gw == 0) continue;
// A wide cluster that would overrun the row edge is
// dropped (matches the prior truncation behavior).
if (col + gw > width) break;
var s = line.style;
// `spans` (if present) takes precedence over `alt_*`.
// Iterate forward; the LAST span that contains `col`
@ -1744,47 +1776,27 @@ pub const App = struct {
} else if (line.alt_style) |alt| {
if (col >= line.alt_start and col < line.alt_end) s = alt;
}
const byte = line.text[bi];
if (byte < 0x80) {
// ASCII: single byte, single column
buf[row * width + col] = .{ .char = .{ .grapheme = ascii_g[byte] }, .style = s };
bi += 1;
col += 1;
} else {
// Multi-byte UTF-8: determine sequence length
const seq_len: usize = if (byte >= 0xF0) 4 else if (byte >= 0xE0) 3 else if (byte >= 0xC0) 2 else 1;
var end = @min(bi + seq_len, line.text.len);
// Fold a trailing U+FE0F (bytes EF B8 8F) variation
// selector into THIS cell and report the cell as 2
// display columns wide. zfin's wide glyphs (severity
// / status-grid emoji) all carry FE0F to force emoji
// presentation; vaxis defaults a cell to width 1 and
// only measures the grapheme when width == 0, so
// without this it tracks each emoji as a single
// column. Its partial-redraw diff then advances the
// terminal cursor one column short per emoji, which
// is what smears stale glyphs across the screen as
// the findings list scrolls. Telling it the true
// width keeps the cursor accounting aligned. The
// skipped second column stays the row-fill blank;
// vaxis skips it because it advances its buffer index
// by the cell width. Only fold when both columns fit.
var cell_width: u8 = 1;
if (col + 1 < width and end + 3 <= line.text.len and
line.text[end] == 0xEF and line.text[end + 1] == 0xB8 and line.text[end + 2] == 0x8F)
{
end += 3;
cell_width = 2;
}
buf[row * width + col] = .{ .char = .{ .grapheme = line.text[bi..end], .width = cell_width }, .style = s };
bi = end;
col += cell_width;
}
buf[row * width + col] = .{ .char = .{ .grapheme = cluster, .width = @intCast(gw) }, .style = s };
col += gw;
}
}
}
}
/// The grapheme-width method vaxis will use when it paints this
/// frame's cells. Reading it from the live `caps.unicode` (instead
/// of hardcoding) is what keeps `drawStyledContent`'s column
/// advance in agreement with vaxis's own per-cell measurement: on a
/// Unicode-width terminal both count an emoji as 2 columns; on a
/// legacy wcwidth terminal both count it the same narrower way.
/// Falls back to `.unicode` only before `vx_app` is wired (early
/// startup / tests), when nothing is actually being painted.
fn gwidthMethod(self: *const App) vaxis.gwidth.Method {
const va = self.vx_app orelse return .unicode;
return va.vx.caps.unicode;
}
/// Render a prompt + live input buffer + blinking cursor + right-
/// aligned hint into the status-bar cell buffer. Shared between
/// `.symbol_input` and `.date_input` modes - only the prompt and
@ -2895,21 +2907,28 @@ test "resetStatus: clears an active status override" {
try testing.expectEqual(@as(usize, 0), app.status_len);
}
// drawStyledContent: wide-glyph (FE0F) width handling
// drawStyledContent: grapheme-width handling
test "drawStyledContent: folds a trailing FE0F into one width-2 cell" {
// vx_app is null in these tests, so `gwidthMethod` falls back to
// `.unicode` - the modern terminal interpretation, where emoji are
// 2 columns. A real session uses the terminal's detected method.
test "drawStyledContent: a 2-column emoji occupies one wide cell" {
var app: App = undefined;
app.vx_app = null;
const w: u16 = 8;
var buf: [w]vaxis.Cell = undefined;
const lines = [_]StyledLine{
.{ .text = "a\u{26A0}\u{FE0F}b", .style = .{} }, // a + warning-emoji + b
};
try App.drawStyledContent(undefined, undefined, &buf, w, 1, &lines);
try app.drawStyledContent(undefined, &buf, w, 1, &lines);
// col 0: plain ASCII, default width 1.
// col 0: plain ASCII, width 1.
try testing.expectEqualStrings("a", buf[0].char.grapheme);
try testing.expectEqual(@as(u8, 1), buf[0].char.width);
// col 1: both codepoints (base + FE0F) in one cell, 2 cols wide.
// col 1: the whole grapheme cluster (base + FE0F) in one cell,
// measured 2 columns wide.
try testing.expectEqualStrings("\u{26A0}\u{FE0F}", buf[1].char.grapheme);
try testing.expectEqual(@as(u8, 2), buf[1].char.width);
@ -2921,30 +2940,69 @@ test "drawStyledContent: folds a trailing FE0F into one width-2 cell" {
try testing.expectEqualStrings("b", buf[3].char.grapheme);
}
test "drawStyledContent: a multibyte glyph without FE0F stays one column" {
test "drawStyledContent: a default-presentation emoji (no FE0F) is still 2 wide" {
// The whole point of generalizing past the FE0F special-case: a
// user-typed emoji like the party popper carries no variation
// selector but is still 2 columns. The renderer must measure it.
var app: App = undefined;
app.vx_app = null;
const w: u16 = 8;
var buf: [w]vaxis.Cell = undefined;
const lines = [_]StyledLine{
.{ .text = "x\u{1F389}y", .style = .{} }, // x + party-popper + y
};
try app.drawStyledContent(undefined, &buf, w, 1, &lines);
try testing.expectEqualStrings("x", buf[0].char.grapheme);
try testing.expectEqualStrings("\u{1F389}", buf[1].char.grapheme);
try testing.expectEqual(@as(u8, 2), buf[1].char.width);
try testing.expectEqualStrings(" ", buf[2].char.grapheme); // covered column
try testing.expectEqualStrings("y", buf[3].char.grapheme); // after the emoji
}
test "drawStyledContent: a ZWJ emoji sequence is one cluster in one cell" {
// Woman astronaut = woman + ZWJ + rocket; a single grapheme
// cluster, width 2 under the unicode method.
var app: App = undefined;
app.vx_app = null;
const w: u16 = 8;
var buf: [w]vaxis.Cell = undefined;
const lines = [_]StyledLine{
.{ .text = "\u{1F469}\u{200D}\u{1F680}z", .style = .{} },
};
try app.drawStyledContent(undefined, &buf, w, 1, &lines);
try testing.expectEqualStrings("\u{1F469}\u{200D}\u{1F680}", buf[0].char.grapheme);
try testing.expectEqual(@as(u8, 2), buf[0].char.width);
try testing.expectEqualStrings("z", buf[2].char.grapheme); // after the 2-wide cluster
}
test "drawStyledContent: a multibyte glyph that is 1 column stays one column" {
var app: App = undefined;
app.vx_app = null;
const w: u16 = 4;
var buf: [w]vaxis.Cell = undefined;
const lines = [_]StyledLine{
.{ .text = "\u{2014}x", .style = .{} }, // em-dash (1 col) + x
};
try App.drawStyledContent(undefined, undefined, &buf, w, 1, &lines);
try app.drawStyledContent(undefined, &buf, w, 1, &lines);
try testing.expectEqualStrings("\u{2014}", buf[0].char.grapheme);
try testing.expectEqual(@as(u8, 1), buf[0].char.width);
try testing.expectEqualStrings("x", buf[1].char.grapheme);
}
test "drawStyledContent: an FE0F emoji with no room for two columns is not folded" {
test "drawStyledContent: a wide emoji with no room at the edge is dropped" {
// width 2: 'a' takes col 0, leaving only col 1 - not enough room
// for a 2-wide cell, so the emoji is left at width 1 (truncation
// territory at the right edge, same as before the fold).
// for a 2-wide cell, so the emoji is truncated at the edge and
// col 1 stays the row-fill blank.
var app: App = undefined;
app.vx_app = null;
const w: u16 = 2;
var buf: [w]vaxis.Cell = undefined;
const lines = [_]StyledLine{
.{ .text = "a\u{26A0}\u{FE0F}", .style = .{} },
};
try App.drawStyledContent(undefined, undefined, &buf, w, 1, &lines);
try app.drawStyledContent(undefined, &buf, w, 1, &lines);
try testing.expectEqualStrings("a", buf[0].char.grapheme);
try testing.expectEqual(@as(u8, 1), buf[1].char.width);
try testing.expectEqualStrings(" ", buf[1].char.grapheme);
}
// symbol toggle helpers

View file

@ -72,7 +72,7 @@ pub fn handleKey(buf: []u8, len: *usize, key: vaxis.Key) Result {
return .committed;
}
if (key.codepoint == vaxis.Key.backspace) {
if (len.* > 0) len.* -= 1;
len.* = graphemeBackspaceLen(buf[0..len.*]);
return .edited;
}
// Ctrl+U: clear entire input (readline convention)
@ -120,7 +120,8 @@ pub const MultiResult = enum {
/// => `.committed`. `len.*` unchanged so the caller can flush
/// any final unfinished fragment before joining all fragments
/// and writing the journal record.
/// - **Backspace, Ctrl+U, printable ASCII**: same as `handleKey`.
/// - **Backspace** (deletes the last grapheme cluster), **Ctrl+U**,
/// and printable text input: same as `handleKey`.
pub fn handleKeyMulti(buf: []u8, len: *usize, key: vaxis.Key) MultiResult {
if (key.codepoint == vaxis.Key.escape) {
len.* = 0;
@ -145,7 +146,7 @@ pub fn handleKeyMulti(buf: []u8, len: *usize, key: vaxis.Key) MultiResult {
return .fragment;
}
if (key.codepoint == vaxis.Key.backspace) {
if (len.* > 0) len.* -= 1;
len.* = graphemeBackspaceLen(buf[0..len.*]);
return .edited;
}
if (key.matches('u', .{ .ctrl = true })) {
@ -166,24 +167,29 @@ pub fn handleKeyMulti(buf: []u8, len: *usize, key: vaxis.Key) MultiResult {
/// (which vaxis turns on), a Shift+a press reports `codepoint = 'a'`
/// (the base-layout key) with `text = "A"`; keying off the codepoint
/// alone silently downcases everything and turns shifted symbols
/// (`!@#`) back into their digits. Only single printable-ASCII text
/// bytes are taken: these buffers are ASCII (ticker symbols, ack-note
/// reasoning), so a stray multibyte grapheme or control sequence
/// shouldn't land in them. When no text is reported (legacy terminal,
/// no Kitty protocol) we fall back to the codepoint, which already
/// carries the shifted value in that mode.
/// (`!@#`) back into their digits. We take a single printable-ASCII
/// byte or any well-formed multi-byte UTF-8 grapheme (accented
/// letters, CJK, emoji), rejecting lone control bytes - so notes can
/// hold whatever the user types or pastes. When no text is reported
/// (legacy terminal, no Kitty protocol) we fall back to the codepoint,
/// which carries the shifted value in that mode (ASCII only).
fn appendPrintable(buf: []u8, len: *usize, key: vaxis.Key) bool {
if (key.text) |text| {
if (text.len == 1 and std.ascii.isPrint(text[0]) and len.* < buf.len) {
buf[len.*] = text[0];
len.* += 1;
return true;
}
// Text was reported but isn't a single printable ASCII byte
// (multibyte grapheme, control char, or the buffer is full).
// Do NOT fall through to the codepoint path: that would
// re-append a downcased/duplicate byte.
return false;
// Accept the terminal-resolved text when it's typed content: a
// single printable ASCII byte, or any well-formed multi-byte
// UTF-8 grapheme (accented letters, CJK, emoji - including
// pasted ones, which arrive as key events under bracketed
// paste). Reject a lone control byte and anything that won't
// fit. When text is present we never fall through to the
// codepoint path - that would re-append a downcased/duplicate
// byte.
if (text.len == 0) return false;
if (text.len == 1 and !std.ascii.isPrint(text[0])) return false;
if (text.len > 1 and !std.unicode.utf8ValidateSlice(text)) return false;
if (len.* + text.len > buf.len) return false;
@memcpy(buf[len.*..][0..text.len], text);
len.* += text.len;
return true;
}
if (key.codepoint < std.math.maxInt(u7) and std.ascii.isPrint(@intCast(key.codepoint)) and len.* < buf.len) {
buf[len.*] = @intCast(key.codepoint);
@ -193,6 +199,19 @@ fn appendPrintable(buf: []u8, len: *usize, key: vaxis.Key) bool {
return false;
}
/// New buffer length after deleting the final grapheme cluster - the
/// result of one Backspace. Cluster-aware so backspacing an emoji
/// (which can be several codepoints: ZWJ joins, skin-tone modifiers,
/// a trailing FE0F) removes the whole glyph instead of leaving a
/// mangled partial UTF-8 sequence behind. For pure ASCII this removes
/// exactly one byte.
fn graphemeBackspaceLen(text: []const u8) usize {
var iter = vaxis.unicode.graphemeIterator(text);
var last_start: usize = 0;
while (iter.next()) |g| last_start = g.start;
return last_start;
}
// Tests
const testing = std.testing;
@ -255,26 +274,62 @@ test "handleKey: legacy terminal (no text) falls back to the codepoint" {
try testing.expectEqual(@as(u8, 'A'), buf[0]);
}
test "handleKey: multibyte/control text is not appended" {
// A non-ASCII grapheme or control sequence in `text` must not
// corrupt the ASCII buffer, and must not fall through to the
// codepoint path.
test "handleKey: a multibyte UTF-8 grapheme is appended whole" {
// Em-dash (3 bytes). Multi-byte text is accepted now; the whole
// grapheme lands in the buffer.
var buf: [16]u8 = undefined;
var len: usize = 0;
const result = handleKey(&buf, &len, .{ .codepoint = 0x2014, .text = "\u{2014}" });
try testing.expectEqual(Result.edited, result);
try testing.expectEqual(@as(usize, 3), len);
try testing.expectEqualStrings("\u{2014}", buf[0..len]);
}
test "handleKey: an emoji grapheme is appended whole" {
// Party popper, 4 bytes.
var buf: [16]u8 = undefined;
var len: usize = 0;
const result = handleKey(&buf, &len, .{ .codepoint = 0x1F389, .text = "\u{1F389}" });
try testing.expectEqual(Result.edited, result);
try testing.expectEqual(@as(usize, 4), len);
try testing.expectEqualStrings("\u{1F389}", buf[0..len]);
}
test "handleKey: a lone control byte in text is rejected" {
var buf: [16]u8 = undefined;
var len: usize = 0;
try testing.expectEqual(Result.ignored, handleKey(&buf, &len, .{ .codepoint = 0x2014, .text = "\u{2014}" }));
try testing.expectEqual(@as(usize, 0), len);
try testing.expectEqual(Result.ignored, handleKey(&buf, &len, .{ .codepoint = vaxis.Key.tab, .text = "\t" }));
try testing.expectEqual(@as(usize, 0), len);
}
test "handleKey: multibyte append respects buffer capacity" {
// 4-byte emoji into a 3-byte buffer: doesn't fit, nothing appended.
var buf: [3]u8 = undefined;
var len: usize = 0;
try testing.expectEqual(Result.ignored, handleKey(&buf, &len, .{ .codepoint = 0x1F389, .text = "\u{1F389}" }));
try testing.expectEqual(@as(usize, 0), len);
}
test "handleKey: backspace decrements len" {
var buf: [16]u8 = undefined;
@memcpy(buf[0..3], "abc");
var len: usize = 3;
const result = handleKey(&buf, &len, .{ .codepoint = vaxis.Key.backspace });
try testing.expectEqual(Result.edited, result);
try testing.expectEqual(@as(usize, 2), len);
}
test "handleKey: backspace removes a whole emoji grapheme cluster" {
// Wave + skin-tone modifier is one cluster (8 bytes); backspace
// must remove all of it, not leave a mangled partial sequence.
var buf: [16]u8 = undefined;
@memcpy(buf[0..8], "\u{1F44B}\u{1F3FF}");
var len: usize = 8;
const result = handleKey(&buf, &len, .{ .codepoint = vaxis.Key.backspace });
try testing.expectEqual(Result.edited, result);
try testing.expectEqual(@as(usize, 0), len);
}
test "handleKey: backspace at len=0 stays at 0" {
var buf: [16]u8 = undefined;
var len: usize = 0;
@ -387,12 +442,26 @@ test "handleKeyMulti: legacy terminal (no text) falls back to the codepoint" {
test "handleKeyMulti: backspace decrements len" {
var buf: [64]u8 = undefined;
@memcpy(buf[0..3], "abc");
var len: usize = 3;
const result = handleKeyMulti(&buf, &len, .{ .codepoint = vaxis.Key.backspace });
try testing.expectEqual(MultiResult.edited, result);
try testing.expectEqual(@as(usize, 2), len);
}
test "handleKeyMulti: Shift+letter and an emoji both append (notes accept any grapheme)" {
var buf: [64]u8 = undefined;
var len: usize = 0;
// Shift+a -> "A"
_ = handleKeyMulti(&buf, &len, .{ .codepoint = 'a', .text = "A", .mods = .{ .shift = true } });
// a multi-codepoint emoji (woman astronaut: woman + ZWJ + rocket)
_ = handleKeyMulti(&buf, &len, .{ .codepoint = 0x1F469, .text = "\u{1F469}\u{200D}\u{1F680}" });
try testing.expectEqualStrings("A\u{1F469}\u{200D}\u{1F680}", buf[0..len]);
// Backspace removes the whole emoji cluster, leaving just "A".
_ = handleKeyMulti(&buf, &len, .{ .codepoint = vaxis.Key.backspace });
try testing.expectEqualStrings("A", buf[0..len]);
}
test "handleKeyMulti: ctrl+U clears buffer" {
var buf: [64]u8 = undefined;
var len: usize = 5;

View file

@ -1473,14 +1473,12 @@ pub fn buildStyledLines(state: *State, app: *App, arena: std.mem.Allocator) ![]c
/// Per-finding-row glyph indicating severity. Each glyph string
/// includes a trailing U+FE0F variation selector to force emoji
/// (two-display-column) presentation. The selector is also the
/// signal `drawStyledContent` keys off: when it sees a sequence
/// followed by FE0F it folds both codepoints into one buffer cell
/// and marks it `width = 2`, so vaxis advances the terminal cursor
/// by the true width. Without that, vaxis would track the emoji as
/// one column and its scroll-time partial redraws would smear stale
/// glyphs. (The renderer owns the width fix now; the FE0F here just
/// guarantees the glyph it's correcting is genuinely 2 wide.)
/// (two-display-column) presentation - without it some terminals
/// draw a narrow text-style glyph. The renderer (`drawStyledContent`)
/// measures every grapheme's width with `gwidth` and sizes the cell
/// accordingly, so it handles these correctly along with any other
/// wide glyph; the FE0F here is purely about presentation, not a
/// renderer hint.
fn severityGlyph(sev: observations.Severity) []const u8 {
return switch (sev) {
.warn => "⚠️", // U+26A0 + FE0F
@ -1499,8 +1497,8 @@ fn severityGlyph(sev: observations.Severity) []const u8 {
/// in the renderer means the renderer is ready when the async
/// path lands.
/// Per-check status-grid glyph. See `severityGlyph` for the
/// FE0F-trailing convention - it forces emoji presentation and lets
/// `drawStyledContent` mark the cell two columns wide.
/// FE0F-trailing convention (emoji presentation; the renderer
/// measures width generically via gwidth).
fn checkStatusGlyph(result: observations.CheckResult) []const u8 {
return switch (result) {
.pass => "\u{FE0F}",