better output through markdown->html (then proper escaping to atom feed)

This commit is contained in:
Emil Lerch 2025-07-15 15:42:30 -07:00
parent 2f14d94dc4
commit a059264150
Signed by: lobo
GPG key ID: A7B62D657EF764F8
5 changed files with 740 additions and 19 deletions

View file

@ -15,7 +15,7 @@ repos:
- id: zig-build
- repo: local
hooks:
- id: zlint
- id: test
name: Run zig build test
entry: zig
args: ["build", "--verbose", "test"]

View file

@ -4,17 +4,156 @@ const ArrayList = std.ArrayList;
const zeit = @import("zeit");
const Release = @import("main.zig").Release;
const markdown = @import("markdown.zig");
fn escapeXml(writer: anytype, input: []const u8) !void {
for (input) |char| {
var i: usize = 0;
var open_spans: u8 = 0; // Track number of open spans
while (i < input.len) {
const char = input[i];
// Handle ANSI escape sequences
if (char == 0x1B and i + 1 < input.len and input[i + 1] == '[') {
// Found ANSI escape sequence, convert to HTML
i += 2; // Skip ESC and [
const code_start = i;
// Find the end of the ANSI sequence
while (i < input.len) {
const c = input[i];
i += 1;
// ANSI sequences end with a letter (A-Z, a-z)
if ((c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z')) {
// Extract the numeric codes
const codes = input[code_start .. i - 1];
try convertAnsiToHtml(writer, codes, c, &open_spans);
break;
}
}
continue;
}
switch (char) {
'<' => try writer.writeAll("&lt;"),
'>' => try writer.writeAll("&gt;"),
'&' => try writer.writeAll("&amp;"),
'"' => try writer.writeAll("&quot;"),
'\'' => try writer.writeAll("&apos;"),
else => try writer.writeByte(char),
// Valid XML characters: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
0x09, 0x0A, 0x0D => try writer.writeByte(char), // Tab, LF, CR
else => {
if (char >= 0x20 and char <= 0x7F) {
// Printable ASCII (excluding already handled special chars)
try writer.writeByte(char);
} else if (char >= 0x80) {
// Extended ASCII (will be handled as UTF-8)
try writer.writeByte(char);
} else if (char < 0x20) {
// Other control characters - replace with space to preserve spacing
try writer.writeByte(' ');
} else {
// else skip completely invalid characters
const start = if (i < 10) 0 else i - 10;
std.log.warn("invalid character 0x{x} encountered, skipping. Previous {} chars: {s}", .{ char, i - start, input[start..i] });
}
},
}
i += 1;
}
// Close any remaining open spans
while (open_spans > 0) {
try writer.writeAll("</span>");
open_spans -= 1;
}
}
fn convertAnsiToHtml(writer: anytype, codes: []const u8, end_char: u8, open_spans: *u8) !void {
// Only handle SGR (Select Graphic Rendition) sequences that end with 'm'
if (end_char != 'm') {
return; // Skip non-color sequences
}
// Parse semicolon-separated codes
var code_iter = std.mem.splitScalar(u8, codes, ';');
var has_styles = false;
// Use a fixed buffer for styles to avoid allocation
var styles_buf: [256]u8 = undefined;
var styles_len: usize = 0;
while (code_iter.next()) |code_str| {
const code = std.fmt.parseInt(u8, std.mem.trim(u8, code_str, " "), 10) catch continue;
switch (code) {
0 => {
// Reset - close all open spans
while (open_spans.* > 0) {
try writer.writeAll("</span>");
open_spans.* -= 1;
}
return;
},
1 => {
// Bold
const style = if (has_styles) ";font-weight:bold" else "font-weight:bold";
if (styles_len + style.len < styles_buf.len) {
@memcpy(styles_buf[styles_len .. styles_len + style.len], style);
styles_len += style.len;
has_styles = true;
}
},
22 => {
// Normal intensity (turn off bold) - close current span and open new one without bold
if (open_spans.* > 0) {
try writer.writeAll("</span>");
open_spans.* -= 1;
}
// Don't add font-weight:normal as a new style, just close the bold span
return;
},
30 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#000000"), // Black
31 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#800000"), // Red
32 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#008000"), // Green
33 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#808000"), // Yellow
34 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#000080"), // Blue
35 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#800080"), // Magenta
36 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#008080"), // Cyan
37 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#c0c0c0"), // White
39 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:inherit"), // Default foreground
90 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#808080"), // Bright Black (Gray)
91 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ff0000"), // Bright Red
92 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#00ff00"), // Bright Green
93 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ffff00"), // Bright Yellow
94 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#0000ff"), // Bright Blue
95 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ff00ff"), // Bright Magenta
96 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#00ffff"), // Bright Cyan
97 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ffffff"), // Bright White
else => {}, // Ignore unknown codes
}
}
if (has_styles) {
try writer.writeAll("<span style=\"");
try writer.writeAll(styles_buf[0..styles_len]);
try writer.writeAll("\">");
open_spans.* += 1;
}
}
fn appendColorToBuffer(styles_buf: *[256]u8, styles_len: *usize, has_styles: *bool, color: []const u8) !void {
const prefix = if (has_styles.*) ";" else "";
const total_len = prefix.len + color.len;
if (styles_len.* + total_len < styles_buf.len) {
if (prefix.len > 0) {
@memcpy(styles_buf[styles_len.* .. styles_len.* + prefix.len], prefix);
styles_len.* += prefix.len;
}
@memcpy(styles_buf[styles_len.* .. styles_len.* + color.len], color);
styles_len.* += color.len;
has_styles.* = true;
}
}
@ -69,9 +208,19 @@ pub fn generateFeed(allocator: Allocator, releases: []const Release) ![]u8 {
try escapeXml(writer, release.provider);
try writer.writeAll("</name></author>\n");
try writer.writeAll(" <summary>");
try escapeXml(writer, release.description);
try writer.writeAll("</summary>\n");
// Convert markdown to HTML
const conversion_result = try markdown.convertMarkdownToHtml(allocator, release.description);
defer conversion_result.deinit(allocator);
// Add content with proper type attribute and XML-escaped HTML
try writer.writeAll(" <content type=\"html\">");
try escapeXml(writer, conversion_result.html);
try writer.writeAll("</content>\n");
// Add fallback metadata if markdown conversion used fallback
if (conversion_result.has_fallback) {
try writer.writeAll(" <category term=\"markdown-fallback\" label=\"Contains unprocessed markdown\"/>\n");
}
try writer.writeAll(" <category term=\"");
try escapeXml(writer, release.provider);
@ -85,6 +234,25 @@ pub fn generateFeed(allocator: Allocator, releases: []const Release) ![]u8 {
return buffer.toOwnedSlice();
}
test "XML escaping with ANSI sequences" {
const allocator = std.testing.allocator;
var buffer = ArrayList(u8).init(allocator);
defer buffer.deinit();
// Test input with ANSI color codes like those found in terminal output
const input = "Test \x1B[36mcolored\x1B[0m text and \x1B[1mbold\x1B[22m formatting";
try escapeXml(buffer.writer(), input);
const result = try buffer.toOwnedSlice();
defer allocator.free(result);
// ANSI sequences should be converted to HTML spans
try std.testing.expect(std.mem.indexOf(u8, result, "<span style=\"color:#008080\">") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "</span>") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "colored") != null);
}
test "XML escaping" {
const allocator = std.testing.allocator;
@ -101,7 +269,7 @@ test "XML escaping" {
try std.testing.expectEqualStrings(expected, result);
}
test "Atom feed generation" {
test "Atom feed generation with markdown" {
const allocator = std.testing.allocator;
const releases = [_]Release{
@ -110,7 +278,7 @@ test "Atom feed generation" {
.tag_name = "v1.0.0",
.published_at = "2024-01-01T00:00:00Z",
.html_url = "https://github.com/test/repo/releases/tag/v1.0.0",
.description = "Test release",
.description = "## What's Changed\n* Fixed bug\n* Added feature",
.provider = "github",
},
};
@ -121,6 +289,31 @@ test "Atom feed generation" {
try std.testing.expect(std.mem.indexOf(u8, atom_content, "test/repo") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "v1.0.0") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<feed xmlns=\"http://www.w3.org/2005/Atom\">") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<content type=\"html\">") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "&lt;h2&gt;What&amp;apos;s Changed&lt;/h2&gt;") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "&lt;ul&gt;") != null);
}
test "Atom feed with fallback markdown" {
const allocator = std.testing.allocator;
const releases = [_]Release{
Release{
.repo_name = "test/repo",
.tag_name = "v1.0.0",
.published_at = "2024-01-01T00:00:00Z",
.html_url = "https://github.com/test/repo/releases/tag/v1.0.0",
.description = "```javascript\nconst x = 1;\n```",
.provider = "github",
},
};
const atom_content = try generateFeed(allocator, &releases);
defer allocator.free(atom_content);
// Should contain fallback metadata
try std.testing.expect(std.mem.indexOf(u8, atom_content, "markdown-fallback") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "&lt;pre&gt;") != null);
}
test "Atom feed with special characters" {
@ -140,13 +333,10 @@ test "Atom feed with special characters" {
const atom_content = try generateFeed(allocator, &releases);
defer allocator.free(atom_content);
// Verify special characters are properly escaped
// Verify special characters are properly escaped in title
try std.testing.expect(std.mem.indexOf(u8, atom_content, "&lt;script&gt;") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "&amp; more") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "&quot;release&quot;") != null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "&lt;special&gt;") != null);
// Verify raw special characters are not present
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<script>") == null);
try std.testing.expect(std.mem.indexOf(u8, atom_content, "\"release\"") == null);
}

510
src/markdown.zig Normal file
View file

@ -0,0 +1,510 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const testing = std.testing;
pub const ConversionResult = struct {
html: []u8,
has_fallback: bool,
pub fn deinit(self: ConversionResult, allocator: Allocator) void {
allocator.free(self.html);
}
};
/// Convert markdown text to HTML with fallback to <pre> blocks for unhandled content
pub fn convertMarkdownToHtml(allocator: Allocator, markdown: []const u8) !ConversionResult {
var result = ArrayList(u8).init(allocator);
defer result.deinit();
var has_fallback = false;
var lines = std.mem.splitScalar(u8, markdown, '\n');
var in_list = false;
var list_type: ?u8 = null; // '*' or '-'
while (lines.next()) |line| {
const trimmed = std.mem.trim(u8, line, " \t\r");
if (trimmed.len == 0) {
try result.appendSlice("<br/>\n");
continue;
}
// Handle headers
if (std.mem.startsWith(u8, trimmed, "## ")) {
if (in_list) {
try result.appendSlice("</ul>\n");
in_list = false;
list_type = null;
}
const header_text = trimmed[3..];
try result.appendSlice("<h2>");
try appendEscapedHtml(&result, header_text);
try result.appendSlice("</h2>\n");
continue;
}
if (std.mem.startsWith(u8, trimmed, "### ")) {
if (in_list) {
try result.appendSlice("</ul>\n");
in_list = false;
list_type = null;
}
const header_text = trimmed[4..];
try result.appendSlice("<h3>");
try appendEscapedHtml(&result, header_text);
try result.appendSlice("</h3>\n");
continue;
}
if (std.mem.startsWith(u8, trimmed, "#### ")) {
if (in_list) {
try result.appendSlice("</ul>\n");
in_list = false;
list_type = null;
}
const header_text = trimmed[5..];
try result.appendSlice("<h4>");
try appendEscapedHtml(&result, header_text);
try result.appendSlice("</h4>\n");
continue;
}
if (std.mem.startsWith(u8, trimmed, "##### ")) {
if (in_list) {
try result.appendSlice("</ul>\n");
in_list = false;
list_type = null;
}
const header_text = trimmed[6..];
try result.appendSlice("<h5>");
try appendEscapedHtml(&result, header_text);
try result.appendSlice("</h5>\n");
continue;
}
// Handle list items
if (std.mem.startsWith(u8, trimmed, "* ") or std.mem.startsWith(u8, trimmed, "- ")) {
const current_marker = trimmed[0];
const item_text = trimmed[2..];
if (!in_list or list_type != current_marker) {
if (in_list) {
try result.appendSlice("</ul>\n");
}
try result.appendSlice("<ul>\n");
in_list = true;
list_type = current_marker;
}
try result.appendSlice("<li>");
try appendProcessedText(&result, item_text);
try result.appendSlice("</li>\n");
continue;
}
// Close list if we're in one and this isn't a list item
if (in_list) {
try result.appendSlice("</ul>\n");
in_list = false;
list_type = null;
}
// Check for complex markdown patterns that we don't handle
if (hasComplexMarkdown(trimmed)) {
has_fallback = true;
try result.appendSlice("<pre>");
try appendEscapedHtml(&result, trimmed);
try result.appendSlice("</pre>\n");
continue;
}
// Regular paragraph
try result.appendSlice("<p>");
try appendProcessedText(&result, trimmed);
try result.appendSlice("</p>\n");
}
// Close any remaining list
if (in_list) {
try result.appendSlice("</ul>\n");
}
return ConversionResult{
.html = try result.toOwnedSlice(),
.has_fallback = has_fallback,
};
}
/// Process text for inline formatting (links, bold, italic)
fn appendProcessedText(result: *ArrayList(u8), text: []const u8) !void {
var i: usize = 0;
while (i < text.len) {
// Handle markdown links [text](url)
if (text[i] == '[') {
if (findMarkdownLink(text[i..])) |link_info| {
try result.appendSlice("<a href=\"");
try appendEscapedHtml(result, link_info.url);
try result.appendSlice("\">");
try appendEscapedHtml(result, link_info.text);
try result.appendSlice("</a>");
i += link_info.total_len;
continue;
}
}
// Handle bold **text**
if (i + 1 < text.len and text[i] == '*' and text[i + 1] == '*') {
if (findBoldText(text[i..])) |bold_info| {
try result.appendSlice("<strong>");
try appendEscapedHtml(result, bold_info.text);
try result.appendSlice("</strong>");
i += bold_info.total_len;
continue;
}
}
// Handle italic *text* (but not if it's part of **)
if (text[i] == '*' and (i == 0 or text[i - 1] != '*') and (i + 1 >= text.len or text[i + 1] != '*')) {
if (findItalicText(text[i..])) |italic_info| {
try result.appendSlice("<em>");
try appendEscapedHtml(result, italic_info.text);
try result.appendSlice("</em>");
i += italic_info.total_len;
continue;
}
}
// Handle inline code `text`
if (text[i] == '`') {
if (findInlineCode(text[i..])) |code_info| {
try result.appendSlice("<code>");
try appendEscapedHtml(result, code_info.text);
try result.appendSlice("</code>");
i += code_info.total_len;
continue;
}
}
// Regular character - escape for HTML
switch (text[i]) {
'<' => try result.appendSlice("&lt;"),
'>' => try result.appendSlice("&gt;"),
'&' => try result.appendSlice("&amp;"),
'"' => try result.appendSlice("&quot;"),
'\'' => try result.appendSlice("&apos;"),
else => try result.append(text[i]),
}
i += 1;
}
}
/// Append HTML-escaped text
fn appendEscapedHtml(result: *ArrayList(u8), text: []const u8) !void {
for (text) |char| {
switch (char) {
'<' => try result.appendSlice("&lt;"),
'>' => try result.appendSlice("&gt;"),
'&' => try result.appendSlice("&amp;"),
'"' => try result.appendSlice("&quot;"),
'\'' => try result.appendSlice("&apos;"),
else => try result.append(char),
}
}
}
const LinkInfo = struct {
text: []const u8,
url: []const u8,
total_len: usize,
};
/// Find markdown link pattern [text](url)
fn findMarkdownLink(text: []const u8) ?LinkInfo {
if (text.len < 4 or text[0] != '[') return null;
// Find closing ]
var bracket_end: ?usize = null;
for (text[1..], 1..) |char, i| {
if (char == ']') {
bracket_end = i;
break;
}
}
const bracket_pos = bracket_end orelse return null;
if (bracket_pos + 1 >= text.len or text[bracket_pos + 1] != '(') return null;
// Find closing )
var paren_end: ?usize = null;
for (text[bracket_pos + 2 ..], bracket_pos + 2..) |char, i| {
if (char == ')') {
paren_end = i;
break;
}
}
const paren_pos = paren_end orelse return null;
return LinkInfo{
.text = text[1..bracket_pos],
.url = text[bracket_pos + 2 .. paren_pos],
.total_len = paren_pos + 1,
};
}
const TextInfo = struct {
text: []const u8,
total_len: usize,
};
/// Find bold text **text**
fn findBoldText(text: []const u8) ?TextInfo {
if (text.len < 4 or !std.mem.startsWith(u8, text, "**")) return null;
// Find closing **
var i: usize = 2;
while (i + 1 < text.len) {
if (text[i] == '*' and text[i + 1] == '*') {
return TextInfo{
.text = text[2..i],
.total_len = i + 2,
};
}
i += 1;
}
return null;
}
/// Find italic text *text*
fn findItalicText(text: []const u8) ?TextInfo {
if (text.len < 3 or text[0] != '*') return null;
// Find closing *
for (text[1..], 1..) |char, i| {
if (char == '*') {
return TextInfo{
.text = text[1..i],
.total_len = i + 1,
};
}
}
return null;
}
/// Find inline code `text`
fn findInlineCode(text: []const u8) ?TextInfo {
if (text.len < 3 or text[0] != '`') return null;
// Find closing `
for (text[1..], 1..) |char, i| {
if (char == '`') {
return TextInfo{
.text = text[1..i],
.total_len = i + 1,
};
}
}
return null;
}
/// Check if text contains complex markdown patterns we don't handle
fn hasComplexMarkdown(text: []const u8) bool {
// Code blocks
if (std.mem.indexOf(u8, text, "```") != null) return true;
// Tables
if (std.mem.indexOf(u8, text, "|") != null) return true;
// Images
if (std.mem.indexOf(u8, text, "![") != null) return true;
// Block quotes
if (std.mem.startsWith(u8, text, "> ")) return true;
// Horizontal rules
if (std.mem.eql(u8, text, "---") or std.mem.eql(u8, text, "***")) return true;
// HTML tags (already HTML, might be complex)
if (std.mem.indexOf(u8, text, "<") != null and std.mem.indexOf(u8, text, ">") != null) return true;
return false;
}
// Tests
test "convert headers" {
const allocator = testing.allocator;
const markdown = "## What's Changed\n### Bug Fixes\n#### Details\n##### Notes";
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
const expected = "<h2>What&apos;s Changed</h2>\n<h3>Bug Fixes</h3>\n<h4>Details</h4>\n<h5>Notes</h5>\n";
try testing.expectEqualStrings(expected, result.html);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Headers test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "convert lists" {
const allocator = testing.allocator;
const markdown = "* First item\n* Second item\n- Different marker\n- Another item";
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
const expected = "<ul>\n<li>First item</li>\n<li>Second item</li>\n</ul>\n<ul>\n<li>Different marker</li>\n<li>Another item</li>\n</ul>\n";
try testing.expectEqualStrings(expected, result.html);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Lists test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "convert links" {
const allocator = testing.allocator;
const markdown = "Check out [GitHub](https://github.com) for more info.";
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
const expected = "<p>Check out <a href=\"https://github.com\">GitHub</a> for more info.</p>\n";
try testing.expectEqualStrings(expected, result.html);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Links test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "convert bold and italic" {
const allocator = testing.allocator;
const markdown = "This is **bold** and this is *italic* text.";
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
const expected = "<p>This is <strong>bold</strong> and this is <em>italic</em> text.</p>\n";
try testing.expectEqualStrings(expected, result.html);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Bold/Italic test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "convert inline code" {
const allocator = testing.allocator;
const markdown = "Use the `git commit` command to save changes.";
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
const expected = "<p>Use the <code>git commit</code> command to save changes.</p>\n";
try testing.expectEqualStrings(expected, result.html);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Inline code test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "fallback for complex markdown" {
const allocator = testing.allocator;
const markdown = "```javascript\nconst x = 1;\n```\n\n| Column 1 | Column 2 |\n|----------|----------|\n| Data | More |";
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
try testing.expect(result.has_fallback);
try testing.expect(std.mem.indexOf(u8, result.html, "<pre>") != null);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Fallback test - Input: {s}\nOutput: {s}\nHas fallback: {}\n", .{ markdown, result.html, result.has_fallback });
}
}
test "real release note example" {
const allocator = testing.allocator;
// Example from actual release notes in the feed
const markdown =
\\## What's Changed
\\
\\* Not generating undo records for insertions into tables created by the same transaction (performance)
\\* Fastpath intra-page navigation in B-tree (performance)
\\* OrioleDB database cluster rewind (experimental feature)
\\* Support of tablespaces
\\* Support of more than 32 columns for Oriole table
\\* Fallback to simple reindex instead of concurrent (sql syntax compatibility)
\\
\\**Full Changelog**: https://github.com/orioledb/orioledb/compare/beta11...beta12
;
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
// Should contain proper HTML structure
try testing.expect(std.mem.indexOf(u8, result.html, "<h2>What&apos;s Changed</h2>") != null);
try testing.expect(std.mem.indexOf(u8, result.html, "<ul>") != null);
try testing.expect(std.mem.indexOf(u8, result.html, "<li>Not generating undo records") != null);
try testing.expect(std.mem.indexOf(u8, result.html, "<strong>Full Changelog</strong>") != null);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Real example test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "mixed content with headers and lists" {
const allocator = testing.allocator;
// Another real example pattern
const markdown =
\\## KraftKit v0.11.6-212-g74599361 (2025-07-13T12:45:17Z)
\\
\\This is the pre-release version of KraftKit.
\\
\\## Changelog
\\### 🤖 Bumps
\\* 41a6a089d3ca955711a5f5291b0ef82aa14d5792: gomod(deps): Bump github.com/charmbracelet/bubbletea from 1.3.5 to 1.3.6 (@dependabot[bot])
\\* ef77627f58e50f5ad027ff06c8d365db57feb020: gomod(deps): Bump golang.org/x/term from 0.32.0 to 0.33.0 (@dependabot[bot])
;
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
// Should contain proper HTML structure
try testing.expect(std.mem.indexOf(u8, result.html, "<h2>KraftKit v0.11.6-212-g74599361") != null);
try testing.expect(std.mem.indexOf(u8, result.html, "<h2>Changelog</h2>") != null);
try testing.expect(std.mem.indexOf(u8, result.html, "<h3>🤖 Bumps</h3>") != null);
try testing.expect(std.mem.indexOf(u8, result.html, "<ul>") != null);
try testing.expect(std.mem.indexOf(u8, result.html, "<li>41a6a089d3ca955711a5f5291b0ef82aa14d5792") != null);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Mixed content test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "html escaping" {
const allocator = testing.allocator;
const markdown = "## Test <script>alert('xss')</script> & \"quotes\"";
const result = try convertMarkdownToHtml(allocator, markdown);
defer result.deinit(allocator);
const expected = "<h2>Test &lt;script&gt;alert(&apos;xss&apos;)&lt;/script&gt; &amp; &quot;quotes&quot;</h2>\n";
try testing.expectEqualStrings(expected, result.html);
try testing.expect(!result.has_fallback);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("HTML escaping test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}

View file

@ -86,8 +86,11 @@ fn parseEntry(allocator: Allocator, entry_xml: []const u8) !Release {
release.published_at = updated;
}
// Parse summary (description)
if (extractTagContent(entry_xml, "summary", allocator)) |summary| {
// Parse content (description) - try content first, then fall back to summary
if (extractTagContent(entry_xml, "content", allocator)) |content| {
allocator.free(release.description);
release.description = content;
} else if (extractTagContent(entry_xml, "summary", allocator)) |summary| {
allocator.free(release.description);
release.description = summary;
}
@ -115,6 +118,23 @@ fn extractTagContent(xml: []const u8, tag_name: []const u8, allocator: Allocator
return unescapeXml(allocator, content) catch null;
}
}
// Also try with attributes (e.g., <content type="html">)
const open_tag_with_attrs = std.fmt.allocPrint(allocator, "<{s} ", .{tag_name}) catch return null;
defer allocator.free(open_tag_with_attrs);
if (std.mem.indexOf(u8, xml, open_tag_with_attrs)) |start_pos| {
// Find the end of the opening tag
if (std.mem.indexOf(u8, xml[start_pos..], ">")) |tag_end_offset| {
const content_start = start_pos + tag_end_offset + 1;
if (std.mem.indexOf(u8, xml[content_start..], close_tag)) |end_offset| {
const content_end = content_start + end_offset;
const content = xml[content_start..content_end];
return unescapeXml(allocator, content) catch null;
}
}
}
return null;
}

View file

@ -48,14 +48,14 @@ test "round trip: generate atom feed and parse it back" {
try testing.expectEqualStrings("v1.0.0", parsed_releases.items[0].tag_name);
try testing.expectEqualStrings("2024-01-01T00:00:00Z", parsed_releases.items[0].published_at);
try testing.expectEqualStrings("https://github.com/test/repo1/releases/tag/v1.0.0", parsed_releases.items[0].html_url);
try testing.expectEqualStrings("First release", parsed_releases.items[0].description);
try testing.expectEqualStrings("<p>First release</p>\n", parsed_releases.items[0].description);
try testing.expectEqualStrings("github", parsed_releases.items[0].provider);
try testing.expectEqualStrings("test/repo2", parsed_releases.items[1].repo_name);
try testing.expectEqualStrings("v2.0.0", parsed_releases.items[1].tag_name);
try testing.expectEqualStrings("2024-01-02T00:00:00Z", parsed_releases.items[1].published_at);
try testing.expectEqualStrings("https://github.com/test/repo2/releases/tag/v2.0.0", parsed_releases.items[1].html_url);
try testing.expectEqualStrings("Second release", parsed_releases.items[1].description);
try testing.expectEqualStrings("<p>Second release</p>\n", parsed_releases.items[1].description);
try testing.expectEqualStrings("github", parsed_releases.items[1].provider);
}
@ -78,10 +78,11 @@ test "parse atom feed with special characters" {
const atom_content = try atom.generateFeed(allocator, &original_releases);
defer allocator.free(atom_content);
// Verify the XML contains escaped characters
// Verify the XML contains escaped characters in the title (not in content)
try testing.expect(std.mem.indexOf(u8, atom_content, "&lt;script&gt;") != null);
try testing.expect(std.mem.indexOf(u8, atom_content, "&amp; more") != null);
try testing.expect(std.mem.indexOf(u8, atom_content, "&quot;release&quot;") != null);
// The content will be XML-escaped HTML, so quotes in HTML will be &amp;quot;
try testing.expect(std.mem.indexOf(u8, atom_content, "&amp;quot;release&amp;quot;") != null);
// Parse it back (this should unescape the characters)
var parsed_releases = try xml_parser.parseAtomFeed(allocator, atom_content);
@ -96,7 +97,7 @@ test "parse atom feed with special characters" {
try testing.expectEqual(@as(usize, 1), parsed_releases.items.len);
try testing.expectEqualStrings("test/repo<script>", parsed_releases.items[0].repo_name);
try testing.expectEqualStrings("v1.0.0 & more", parsed_releases.items[0].tag_name);
try testing.expectEqualStrings("Test \"release\" with <special> chars & symbols", parsed_releases.items[0].description);
try testing.expectEqualStrings("<pre>Test &quot;release&quot; with &lt;special&gt; chars &amp; symbols</pre>\n", parsed_releases.items[0].description);
}
test "parse malformed atom feed gracefully" {