diff --git a/src/data/Journal.zig b/src/data/Journal.zig index 47d4ccc..33347ca 100644 --- a/src/data/Journal.zig +++ b/src/data/Journal.zig @@ -430,6 +430,32 @@ test "parse: single ack with two notes round-trips" { try testing.expectEqualStrings("Will trim by Q3 2026.", entry.notes[1]); } +test "round-trip: a note with emoji and accented text survives format + parse" { + // UTF-8 note content (emoji, accented letters) must survive the + // SRF format -> parse cycle byte-for-byte. Multi-byte sequences + // can't collide with SRF's ASCII delimiters (',' '::' newline), so + // this is safe; the test pins that guarantee for the ack-note flow. + const a = testing.allocator; + const records = [_]JournalRecord{ + .{ .acknowledgment = .{ + .observation = "position_concentration", + .target = "NVDA", + .acknowledged_at = Date.fromYmd(2026, 6, 12), + .state = .acknowledged, + } }, + .{ .note = .{ .line = "Trim by Q3 \u{1F389} - café conviction" } }, + }; + var buf: std.Io.Writer.Allocating = .init(a); + defer buf.deinit(); + try buf.writer.print("{f}", .{srf.fmt(JournalRecord, &records, .{})}); + + var journal = try parse(a, buf.writer.buffered()); + defer journal.deinit(); + try testing.expectEqual(@as(usize, 1), journal.entries.len); + try testing.expectEqual(@as(usize, 1), journal.entries[0].notes.len); + try testing.expectEqualStrings("Trim by Q3 \u{1F389} - café conviction", journal.entries[0].notes[0]); +} + test "parse: notes attach to the most-recent preceding ack" { const data = \\#!srfv1 diff --git a/src/tui.zig b/src/tui.zig index 64ab42a..a5d8006 100644 --- a/src/tui.zig +++ b/src/tui.zig @@ -1698,7 +1698,8 @@ pub const App = struct { } } - pub fn drawStyledContent(_: *App, _: std.mem.Allocator, buf: []vaxis.Cell, width: u16, height: u16, lines: []const StyledLine) !void { + pub fn drawStyledContent(self: *App, _: std.mem.Allocator, buf: []vaxis.Cell, width: u16, height: u16, lines: []const StyledLine) !void { + const method = self.gwidthMethod(); for (lines, 0..) |line, row| { if (row >= height) break; // Fill row with style bg @@ -1713,10 +1714,41 @@ pub const App = struct { buf[row * width + ci] = .{ .char = .{ .grapheme = graphemes[ci] }, .style = s }; } } else { - // UTF-8 aware rendering: byte index and column index tracked separately + // Grapheme-cluster aware rendering. Segment `line.text` + // into grapheme clusters and measure each cluster's + // display width with the SAME method vaxis uses at render + // time (`caps.unicode`), then set the cell width + // explicitly and advance the display column by that width. + // That keeps three things in lockstep - our buffer + // columns, vaxis's per-cell cursor advance (it steps its + // buffer index by the cell's width, skipping the covered + // columns), and the terminal - so wide glyphs (CJK and any + // emoji: VS16, ZWJ, skin-tone, or flag sequences) don't + // desync and smear as the content scrolls. A wide cluster + // leaves its trailing column(s) as the row-fill blank; + // vaxis never emits those because the prior cell is wide. var col: usize = 0; - var bi: usize = 0; - while (bi < line.text.len and col < width) { + var giter = vaxis.unicode.graphemeIterator(line.text); + while (giter.next()) |g| { + if (col >= width) break; + const cluster = g.bytes(line.text); + // Fast path: a single-byte cluster below 0x80 is a + // standalone ASCII char (combining marks are + // multi-byte, so they'd make the cluster longer) - + // always width 1. Skips the gwidth state machine for + // the overwhelmingly common case of ASCII table text. + const gw: u16 = if (g.len == 1 and cluster[0] < 0x80) + 1 + else + vaxis.gwidth.gwidth(cluster, method); + // Zero-width cluster (e.g. a leading combining mark + // with no base): skip it rather than let the next + // glyph overwrite this cell or the column desync. + if (gw == 0) continue; + // A wide cluster that would overrun the row edge is + // dropped (matches the prior truncation behavior). + if (col + gw > width) break; + var s = line.style; // `spans` (if present) takes precedence over `alt_*`. // Iterate forward; the LAST span that contains `col` @@ -1744,47 +1776,27 @@ pub const App = struct { } else if (line.alt_style) |alt| { if (col >= line.alt_start and col < line.alt_end) s = alt; } - const byte = line.text[bi]; - if (byte < 0x80) { - // ASCII: single byte, single column - buf[row * width + col] = .{ .char = .{ .grapheme = ascii_g[byte] }, .style = s }; - bi += 1; - col += 1; - } else { - // Multi-byte UTF-8: determine sequence length - const seq_len: usize = if (byte >= 0xF0) 4 else if (byte >= 0xE0) 3 else if (byte >= 0xC0) 2 else 1; - var end = @min(bi + seq_len, line.text.len); - // Fold a trailing U+FE0F (bytes EF B8 8F) variation - // selector into THIS cell and report the cell as 2 - // display columns wide. zfin's wide glyphs (severity - // / status-grid emoji) all carry FE0F to force emoji - // presentation; vaxis defaults a cell to width 1 and - // only measures the grapheme when width == 0, so - // without this it tracks each emoji as a single - // column. Its partial-redraw diff then advances the - // terminal cursor one column short per emoji, which - // is what smears stale glyphs across the screen as - // the findings list scrolls. Telling it the true - // width keeps the cursor accounting aligned. The - // skipped second column stays the row-fill blank; - // vaxis skips it because it advances its buffer index - // by the cell width. Only fold when both columns fit. - var cell_width: u8 = 1; - if (col + 1 < width and end + 3 <= line.text.len and - line.text[end] == 0xEF and line.text[end + 1] == 0xB8 and line.text[end + 2] == 0x8F) - { - end += 3; - cell_width = 2; - } - buf[row * width + col] = .{ .char = .{ .grapheme = line.text[bi..end], .width = cell_width }, .style = s }; - bi = end; - col += cell_width; - } + + buf[row * width + col] = .{ .char = .{ .grapheme = cluster, .width = @intCast(gw) }, .style = s }; + col += gw; } } } } + /// The grapheme-width method vaxis will use when it paints this + /// frame's cells. Reading it from the live `caps.unicode` (instead + /// of hardcoding) is what keeps `drawStyledContent`'s column + /// advance in agreement with vaxis's own per-cell measurement: on a + /// Unicode-width terminal both count an emoji as 2 columns; on a + /// legacy wcwidth terminal both count it the same narrower way. + /// Falls back to `.unicode` only before `vx_app` is wired (early + /// startup / tests), when nothing is actually being painted. + fn gwidthMethod(self: *const App) vaxis.gwidth.Method { + const va = self.vx_app orelse return .unicode; + return va.vx.caps.unicode; + } + /// Render a prompt + live input buffer + blinking cursor + right- /// aligned hint into the status-bar cell buffer. Shared between /// `.symbol_input` and `.date_input` modes - only the prompt and @@ -2895,21 +2907,28 @@ test "resetStatus: clears an active status override" { try testing.expectEqual(@as(usize, 0), app.status_len); } -// ── drawStyledContent: wide-glyph (FE0F) width handling ──────── +// ── drawStyledContent: grapheme-width handling ───────────────── -test "drawStyledContent: folds a trailing FE0F into one width-2 cell" { +// vx_app is null in these tests, so `gwidthMethod` falls back to +// `.unicode` - the modern terminal interpretation, where emoji are +// 2 columns. A real session uses the terminal's detected method. + +test "drawStyledContent: a 2-column emoji occupies one wide cell" { + var app: App = undefined; + app.vx_app = null; const w: u16 = 8; var buf: [w]vaxis.Cell = undefined; const lines = [_]StyledLine{ .{ .text = "a\u{26A0}\u{FE0F}b", .style = .{} }, // a + warning-emoji + b }; - try App.drawStyledContent(undefined, undefined, &buf, w, 1, &lines); + try app.drawStyledContent(undefined, &buf, w, 1, &lines); - // col 0: plain ASCII, default width 1. + // col 0: plain ASCII, width 1. try testing.expectEqualStrings("a", buf[0].char.grapheme); try testing.expectEqual(@as(u8, 1), buf[0].char.width); - // col 1: both codepoints (base + FE0F) in one cell, 2 cols wide. + // col 1: the whole grapheme cluster (base + FE0F) in one cell, + // measured 2 columns wide. try testing.expectEqualStrings("\u{26A0}\u{FE0F}", buf[1].char.grapheme); try testing.expectEqual(@as(u8, 2), buf[1].char.width); @@ -2921,30 +2940,69 @@ test "drawStyledContent: folds a trailing FE0F into one width-2 cell" { try testing.expectEqualStrings("b", buf[3].char.grapheme); } -test "drawStyledContent: a multibyte glyph without FE0F stays one column" { +test "drawStyledContent: a default-presentation emoji (no FE0F) is still 2 wide" { + // The whole point of generalizing past the FE0F special-case: a + // user-typed emoji like the party popper carries no variation + // selector but is still 2 columns. The renderer must measure it. + var app: App = undefined; + app.vx_app = null; + const w: u16 = 8; + var buf: [w]vaxis.Cell = undefined; + const lines = [_]StyledLine{ + .{ .text = "x\u{1F389}y", .style = .{} }, // x + party-popper + y + }; + try app.drawStyledContent(undefined, &buf, w, 1, &lines); + try testing.expectEqualStrings("x", buf[0].char.grapheme); + try testing.expectEqualStrings("\u{1F389}", buf[1].char.grapheme); + try testing.expectEqual(@as(u8, 2), buf[1].char.width); + try testing.expectEqualStrings(" ", buf[2].char.grapheme); // covered column + try testing.expectEqualStrings("y", buf[3].char.grapheme); // after the emoji +} + +test "drawStyledContent: a ZWJ emoji sequence is one cluster in one cell" { + // Woman astronaut = woman + ZWJ + rocket; a single grapheme + // cluster, width 2 under the unicode method. + var app: App = undefined; + app.vx_app = null; + const w: u16 = 8; + var buf: [w]vaxis.Cell = undefined; + const lines = [_]StyledLine{ + .{ .text = "\u{1F469}\u{200D}\u{1F680}z", .style = .{} }, + }; + try app.drawStyledContent(undefined, &buf, w, 1, &lines); + try testing.expectEqualStrings("\u{1F469}\u{200D}\u{1F680}", buf[0].char.grapheme); + try testing.expectEqual(@as(u8, 2), buf[0].char.width); + try testing.expectEqualStrings("z", buf[2].char.grapheme); // after the 2-wide cluster +} + +test "drawStyledContent: a multibyte glyph that is 1 column stays one column" { + var app: App = undefined; + app.vx_app = null; const w: u16 = 4; var buf: [w]vaxis.Cell = undefined; const lines = [_]StyledLine{ .{ .text = "\u{2014}x", .style = .{} }, // em-dash (1 col) + x }; - try App.drawStyledContent(undefined, undefined, &buf, w, 1, &lines); + try app.drawStyledContent(undefined, &buf, w, 1, &lines); try testing.expectEqualStrings("\u{2014}", buf[0].char.grapheme); try testing.expectEqual(@as(u8, 1), buf[0].char.width); try testing.expectEqualStrings("x", buf[1].char.grapheme); } -test "drawStyledContent: an FE0F emoji with no room for two columns is not folded" { +test "drawStyledContent: a wide emoji with no room at the edge is dropped" { // width 2: 'a' takes col 0, leaving only col 1 - not enough room - // for a 2-wide cell, so the emoji is left at width 1 (truncation - // territory at the right edge, same as before the fold). + // for a 2-wide cell, so the emoji is truncated at the edge and + // col 1 stays the row-fill blank. + var app: App = undefined; + app.vx_app = null; const w: u16 = 2; var buf: [w]vaxis.Cell = undefined; const lines = [_]StyledLine{ .{ .text = "a\u{26A0}\u{FE0F}", .style = .{} }, }; - try App.drawStyledContent(undefined, undefined, &buf, w, 1, &lines); + try app.drawStyledContent(undefined, &buf, w, 1, &lines); try testing.expectEqualStrings("a", buf[0].char.grapheme); - try testing.expectEqual(@as(u8, 1), buf[1].char.width); + try testing.expectEqualStrings(" ", buf[1].char.grapheme); } // ── symbol toggle helpers ───────────────────────────────────── diff --git a/src/tui/input_buffer.zig b/src/tui/input_buffer.zig index 1f768ef..cbbe2ac 100644 --- a/src/tui/input_buffer.zig +++ b/src/tui/input_buffer.zig @@ -72,7 +72,7 @@ pub fn handleKey(buf: []u8, len: *usize, key: vaxis.Key) Result { return .committed; } if (key.codepoint == vaxis.Key.backspace) { - if (len.* > 0) len.* -= 1; + len.* = graphemeBackspaceLen(buf[0..len.*]); return .edited; } // Ctrl+U: clear entire input (readline convention) @@ -120,7 +120,8 @@ pub const MultiResult = enum { /// => `.committed`. `len.*` unchanged so the caller can flush /// any final unfinished fragment before joining all fragments /// and writing the journal record. -/// - **Backspace, Ctrl+U, printable ASCII**: same as `handleKey`. +/// - **Backspace** (deletes the last grapheme cluster), **Ctrl+U**, +/// and printable text input: same as `handleKey`. pub fn handleKeyMulti(buf: []u8, len: *usize, key: vaxis.Key) MultiResult { if (key.codepoint == vaxis.Key.escape) { len.* = 0; @@ -145,7 +146,7 @@ pub fn handleKeyMulti(buf: []u8, len: *usize, key: vaxis.Key) MultiResult { return .fragment; } if (key.codepoint == vaxis.Key.backspace) { - if (len.* > 0) len.* -= 1; + len.* = graphemeBackspaceLen(buf[0..len.*]); return .edited; } if (key.matches('u', .{ .ctrl = true })) { @@ -166,24 +167,29 @@ pub fn handleKeyMulti(buf: []u8, len: *usize, key: vaxis.Key) MultiResult { /// (which vaxis turns on), a Shift+a press reports `codepoint = 'a'` /// (the base-layout key) with `text = "A"`; keying off the codepoint /// alone silently downcases everything and turns shifted symbols -/// (`!@#`) back into their digits. Only single printable-ASCII text -/// bytes are taken: these buffers are ASCII (ticker symbols, ack-note -/// reasoning), so a stray multibyte grapheme or control sequence -/// shouldn't land in them. When no text is reported (legacy terminal, -/// no Kitty protocol) we fall back to the codepoint, which already -/// carries the shifted value in that mode. +/// (`!@#`) back into their digits. We take a single printable-ASCII +/// byte or any well-formed multi-byte UTF-8 grapheme (accented +/// letters, CJK, emoji), rejecting lone control bytes - so notes can +/// hold whatever the user types or pastes. When no text is reported +/// (legacy terminal, no Kitty protocol) we fall back to the codepoint, +/// which carries the shifted value in that mode (ASCII only). fn appendPrintable(buf: []u8, len: *usize, key: vaxis.Key) bool { if (key.text) |text| { - if (text.len == 1 and std.ascii.isPrint(text[0]) and len.* < buf.len) { - buf[len.*] = text[0]; - len.* += 1; - return true; - } - // Text was reported but isn't a single printable ASCII byte - // (multibyte grapheme, control char, or the buffer is full). - // Do NOT fall through to the codepoint path: that would - // re-append a downcased/duplicate byte. - return false; + // Accept the terminal-resolved text when it's typed content: a + // single printable ASCII byte, or any well-formed multi-byte + // UTF-8 grapheme (accented letters, CJK, emoji - including + // pasted ones, which arrive as key events under bracketed + // paste). Reject a lone control byte and anything that won't + // fit. When text is present we never fall through to the + // codepoint path - that would re-append a downcased/duplicate + // byte. + if (text.len == 0) return false; + if (text.len == 1 and !std.ascii.isPrint(text[0])) return false; + if (text.len > 1 and !std.unicode.utf8ValidateSlice(text)) return false; + if (len.* + text.len > buf.len) return false; + @memcpy(buf[len.*..][0..text.len], text); + len.* += text.len; + return true; } if (key.codepoint < std.math.maxInt(u7) and std.ascii.isPrint(@intCast(key.codepoint)) and len.* < buf.len) { buf[len.*] = @intCast(key.codepoint); @@ -193,6 +199,19 @@ fn appendPrintable(buf: []u8, len: *usize, key: vaxis.Key) bool { return false; } +/// New buffer length after deleting the final grapheme cluster - the +/// result of one Backspace. Cluster-aware so backspacing an emoji +/// (which can be several codepoints: ZWJ joins, skin-tone modifiers, +/// a trailing FE0F) removes the whole glyph instead of leaving a +/// mangled partial UTF-8 sequence behind. For pure ASCII this removes +/// exactly one byte. +fn graphemeBackspaceLen(text: []const u8) usize { + var iter = vaxis.unicode.graphemeIterator(text); + var last_start: usize = 0; + while (iter.next()) |g| last_start = g.start; + return last_start; +} + // ── Tests ───────────────────────────────────────────────────── const testing = std.testing; @@ -255,26 +274,62 @@ test "handleKey: legacy terminal (no text) falls back to the codepoint" { try testing.expectEqual(@as(u8, 'A'), buf[0]); } -test "handleKey: multibyte/control text is not appended" { - // A non-ASCII grapheme or control sequence in `text` must not - // corrupt the ASCII buffer, and must not fall through to the - // codepoint path. +test "handleKey: a multibyte UTF-8 grapheme is appended whole" { + // Em-dash (3 bytes). Multi-byte text is accepted now; the whole + // grapheme lands in the buffer. + var buf: [16]u8 = undefined; + var len: usize = 0; + const result = handleKey(&buf, &len, .{ .codepoint = 0x2014, .text = "\u{2014}" }); + try testing.expectEqual(Result.edited, result); + try testing.expectEqual(@as(usize, 3), len); + try testing.expectEqualStrings("\u{2014}", buf[0..len]); +} + +test "handleKey: an emoji grapheme is appended whole" { + // Party popper, 4 bytes. + var buf: [16]u8 = undefined; + var len: usize = 0; + const result = handleKey(&buf, &len, .{ .codepoint = 0x1F389, .text = "\u{1F389}" }); + try testing.expectEqual(Result.edited, result); + try testing.expectEqual(@as(usize, 4), len); + try testing.expectEqualStrings("\u{1F389}", buf[0..len]); +} + +test "handleKey: a lone control byte in text is rejected" { var buf: [16]u8 = undefined; var len: usize = 0; - try testing.expectEqual(Result.ignored, handleKey(&buf, &len, .{ .codepoint = 0x2014, .text = "\u{2014}" })); - try testing.expectEqual(@as(usize, 0), len); try testing.expectEqual(Result.ignored, handleKey(&buf, &len, .{ .codepoint = vaxis.Key.tab, .text = "\t" })); try testing.expectEqual(@as(usize, 0), len); } +test "handleKey: multibyte append respects buffer capacity" { + // 4-byte emoji into a 3-byte buffer: doesn't fit, nothing appended. + var buf: [3]u8 = undefined; + var len: usize = 0; + try testing.expectEqual(Result.ignored, handleKey(&buf, &len, .{ .codepoint = 0x1F389, .text = "\u{1F389}" })); + try testing.expectEqual(@as(usize, 0), len); +} + test "handleKey: backspace decrements len" { var buf: [16]u8 = undefined; + @memcpy(buf[0..3], "abc"); var len: usize = 3; const result = handleKey(&buf, &len, .{ .codepoint = vaxis.Key.backspace }); try testing.expectEqual(Result.edited, result); try testing.expectEqual(@as(usize, 2), len); } +test "handleKey: backspace removes a whole emoji grapheme cluster" { + // Wave + skin-tone modifier is one cluster (8 bytes); backspace + // must remove all of it, not leave a mangled partial sequence. + var buf: [16]u8 = undefined; + @memcpy(buf[0..8], "\u{1F44B}\u{1F3FF}"); + var len: usize = 8; + const result = handleKey(&buf, &len, .{ .codepoint = vaxis.Key.backspace }); + try testing.expectEqual(Result.edited, result); + try testing.expectEqual(@as(usize, 0), len); +} + test "handleKey: backspace at len=0 stays at 0" { var buf: [16]u8 = undefined; var len: usize = 0; @@ -387,12 +442,26 @@ test "handleKeyMulti: legacy terminal (no text) falls back to the codepoint" { test "handleKeyMulti: backspace decrements len" { var buf: [64]u8 = undefined; + @memcpy(buf[0..3], "abc"); var len: usize = 3; const result = handleKeyMulti(&buf, &len, .{ .codepoint = vaxis.Key.backspace }); try testing.expectEqual(MultiResult.edited, result); try testing.expectEqual(@as(usize, 2), len); } +test "handleKeyMulti: Shift+letter and an emoji both append (notes accept any grapheme)" { + var buf: [64]u8 = undefined; + var len: usize = 0; + // Shift+a -> "A" + _ = handleKeyMulti(&buf, &len, .{ .codepoint = 'a', .text = "A", .mods = .{ .shift = true } }); + // a multi-codepoint emoji (woman astronaut: woman + ZWJ + rocket) + _ = handleKeyMulti(&buf, &len, .{ .codepoint = 0x1F469, .text = "\u{1F469}\u{200D}\u{1F680}" }); + try testing.expectEqualStrings("A\u{1F469}\u{200D}\u{1F680}", buf[0..len]); + // Backspace removes the whole emoji cluster, leaving just "A". + _ = handleKeyMulti(&buf, &len, .{ .codepoint = vaxis.Key.backspace }); + try testing.expectEqualStrings("A", buf[0..len]); +} + test "handleKeyMulti: ctrl+U clears buffer" { var buf: [64]u8 = undefined; var len: usize = 5; diff --git a/src/tui/review_tab.zig b/src/tui/review_tab.zig index c1f79db..129ad31 100644 --- a/src/tui/review_tab.zig +++ b/src/tui/review_tab.zig @@ -1473,14 +1473,12 @@ pub fn buildStyledLines(state: *State, app: *App, arena: std.mem.Allocator) ![]c /// Per-finding-row glyph indicating severity. Each glyph string /// includes a trailing U+FE0F variation selector to force emoji -/// (two-display-column) presentation. The selector is also the -/// signal `drawStyledContent` keys off: when it sees a sequence -/// followed by FE0F it folds both codepoints into one buffer cell -/// and marks it `width = 2`, so vaxis advances the terminal cursor -/// by the true width. Without that, vaxis would track the emoji as -/// one column and its scroll-time partial redraws would smear stale -/// glyphs. (The renderer owns the width fix now; the FE0F here just -/// guarantees the glyph it's correcting is genuinely 2 wide.) +/// (two-display-column) presentation - without it some terminals +/// draw a narrow text-style glyph. The renderer (`drawStyledContent`) +/// measures every grapheme's width with `gwidth` and sizes the cell +/// accordingly, so it handles these correctly along with any other +/// wide glyph; the FE0F here is purely about presentation, not a +/// renderer hint. fn severityGlyph(sev: observations.Severity) []const u8 { return switch (sev) { .warn => "⚠️", // U+26A0 + FE0F @@ -1499,8 +1497,8 @@ fn severityGlyph(sev: observations.Severity) []const u8 { /// in the renderer means the renderer is ready when the async /// path lands. /// Per-check status-grid glyph. See `severityGlyph` for the -/// FE0F-trailing convention - it forces emoji presentation and lets -/// `drawStyledContent` mark the cell two columns wide. +/// FE0F-trailing convention (emoji presentation; the renderer +/// measures width generically via gwidth). fn checkStatusGlyph(result: observations.CheckResult) []const u8 { return switch (result) { .pass => "✅\u{FE0F}",