better output through markdown->html (then proper escaping to atom feed)
This commit is contained in:
parent
2f14d94dc4
commit
a059264150
5 changed files with 740 additions and 19 deletions
|
@ -15,7 +15,7 @@ repos:
|
||||||
- id: zig-build
|
- id: zig-build
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: zlint
|
- id: test
|
||||||
name: Run zig build test
|
name: Run zig build test
|
||||||
entry: zig
|
entry: zig
|
||||||
args: ["build", "--verbose", "test"]
|
args: ["build", "--verbose", "test"]
|
||||||
|
|
212
src/atom.zig
212
src/atom.zig
|
@ -4,17 +4,156 @@ const ArrayList = std.ArrayList;
|
||||||
const zeit = @import("zeit");
|
const zeit = @import("zeit");
|
||||||
|
|
||||||
const Release = @import("main.zig").Release;
|
const Release = @import("main.zig").Release;
|
||||||
|
const markdown = @import("markdown.zig");
|
||||||
|
|
||||||
fn escapeXml(writer: anytype, input: []const u8) !void {
|
fn escapeXml(writer: anytype, input: []const u8) !void {
|
||||||
for (input) |char| {
|
var i: usize = 0;
|
||||||
|
var open_spans: u8 = 0; // Track number of open spans
|
||||||
|
|
||||||
|
while (i < input.len) {
|
||||||
|
const char = input[i];
|
||||||
|
|
||||||
|
// Handle ANSI escape sequences
|
||||||
|
if (char == 0x1B and i + 1 < input.len and input[i + 1] == '[') {
|
||||||
|
// Found ANSI escape sequence, convert to HTML
|
||||||
|
i += 2; // Skip ESC and [
|
||||||
|
const code_start = i;
|
||||||
|
|
||||||
|
// Find the end of the ANSI sequence
|
||||||
|
while (i < input.len) {
|
||||||
|
const c = input[i];
|
||||||
|
i += 1;
|
||||||
|
// ANSI sequences end with a letter (A-Z, a-z)
|
||||||
|
if ((c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z')) {
|
||||||
|
// Extract the numeric codes
|
||||||
|
const codes = input[code_start .. i - 1];
|
||||||
|
try convertAnsiToHtml(writer, codes, c, &open_spans);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
switch (char) {
|
switch (char) {
|
||||||
'<' => try writer.writeAll("<"),
|
'<' => try writer.writeAll("<"),
|
||||||
'>' => try writer.writeAll(">"),
|
'>' => try writer.writeAll(">"),
|
||||||
'&' => try writer.writeAll("&"),
|
'&' => try writer.writeAll("&"),
|
||||||
'"' => try writer.writeAll("""),
|
'"' => try writer.writeAll("""),
|
||||||
'\'' => try writer.writeAll("'"),
|
'\'' => try writer.writeAll("'"),
|
||||||
else => try writer.writeByte(char),
|
// Valid XML characters: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
|
||||||
|
0x09, 0x0A, 0x0D => try writer.writeByte(char), // Tab, LF, CR
|
||||||
|
else => {
|
||||||
|
if (char >= 0x20 and char <= 0x7F) {
|
||||||
|
// Printable ASCII (excluding already handled special chars)
|
||||||
|
try writer.writeByte(char);
|
||||||
|
} else if (char >= 0x80) {
|
||||||
|
// Extended ASCII (will be handled as UTF-8)
|
||||||
|
try writer.writeByte(char);
|
||||||
|
} else if (char < 0x20) {
|
||||||
|
// Other control characters - replace with space to preserve spacing
|
||||||
|
try writer.writeByte(' ');
|
||||||
|
} else {
|
||||||
|
// else skip completely invalid characters
|
||||||
|
const start = if (i < 10) 0 else i - 10;
|
||||||
|
std.log.warn("invalid character 0x{x} encountered, skipping. Previous {} chars: {s}", .{ char, i - start, input[start..i] });
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close any remaining open spans
|
||||||
|
while (open_spans > 0) {
|
||||||
|
try writer.writeAll("</span>");
|
||||||
|
open_spans -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn convertAnsiToHtml(writer: anytype, codes: []const u8, end_char: u8, open_spans: *u8) !void {
|
||||||
|
// Only handle SGR (Select Graphic Rendition) sequences that end with 'm'
|
||||||
|
if (end_char != 'm') {
|
||||||
|
return; // Skip non-color sequences
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse semicolon-separated codes
|
||||||
|
var code_iter = std.mem.splitScalar(u8, codes, ';');
|
||||||
|
var has_styles = false;
|
||||||
|
|
||||||
|
// Use a fixed buffer for styles to avoid allocation
|
||||||
|
var styles_buf: [256]u8 = undefined;
|
||||||
|
var styles_len: usize = 0;
|
||||||
|
|
||||||
|
while (code_iter.next()) |code_str| {
|
||||||
|
const code = std.fmt.parseInt(u8, std.mem.trim(u8, code_str, " "), 10) catch continue;
|
||||||
|
|
||||||
|
switch (code) {
|
||||||
|
0 => {
|
||||||
|
// Reset - close all open spans
|
||||||
|
while (open_spans.* > 0) {
|
||||||
|
try writer.writeAll("</span>");
|
||||||
|
open_spans.* -= 1;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
1 => {
|
||||||
|
// Bold
|
||||||
|
const style = if (has_styles) ";font-weight:bold" else "font-weight:bold";
|
||||||
|
if (styles_len + style.len < styles_buf.len) {
|
||||||
|
@memcpy(styles_buf[styles_len .. styles_len + style.len], style);
|
||||||
|
styles_len += style.len;
|
||||||
|
has_styles = true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
22 => {
|
||||||
|
// Normal intensity (turn off bold) - close current span and open new one without bold
|
||||||
|
if (open_spans.* > 0) {
|
||||||
|
try writer.writeAll("</span>");
|
||||||
|
open_spans.* -= 1;
|
||||||
|
}
|
||||||
|
// Don't add font-weight:normal as a new style, just close the bold span
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
30 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#000000"), // Black
|
||||||
|
31 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#800000"), // Red
|
||||||
|
32 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#008000"), // Green
|
||||||
|
33 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#808000"), // Yellow
|
||||||
|
34 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#000080"), // Blue
|
||||||
|
35 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#800080"), // Magenta
|
||||||
|
36 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#008080"), // Cyan
|
||||||
|
37 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#c0c0c0"), // White
|
||||||
|
39 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:inherit"), // Default foreground
|
||||||
|
90 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#808080"), // Bright Black (Gray)
|
||||||
|
91 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ff0000"), // Bright Red
|
||||||
|
92 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#00ff00"), // Bright Green
|
||||||
|
93 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ffff00"), // Bright Yellow
|
||||||
|
94 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#0000ff"), // Bright Blue
|
||||||
|
95 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ff00ff"), // Bright Magenta
|
||||||
|
96 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#00ffff"), // Bright Cyan
|
||||||
|
97 => try appendColorToBuffer(&styles_buf, &styles_len, &has_styles, "color:#ffffff"), // Bright White
|
||||||
|
else => {}, // Ignore unknown codes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_styles) {
|
||||||
|
try writer.writeAll("<span style=\"");
|
||||||
|
try writer.writeAll(styles_buf[0..styles_len]);
|
||||||
|
try writer.writeAll("\">");
|
||||||
|
open_spans.* += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn appendColorToBuffer(styles_buf: *[256]u8, styles_len: *usize, has_styles: *bool, color: []const u8) !void {
|
||||||
|
const prefix = if (has_styles.*) ";" else "";
|
||||||
|
const total_len = prefix.len + color.len;
|
||||||
|
|
||||||
|
if (styles_len.* + total_len < styles_buf.len) {
|
||||||
|
if (prefix.len > 0) {
|
||||||
|
@memcpy(styles_buf[styles_len.* .. styles_len.* + prefix.len], prefix);
|
||||||
|
styles_len.* += prefix.len;
|
||||||
|
}
|
||||||
|
@memcpy(styles_buf[styles_len.* .. styles_len.* + color.len], color);
|
||||||
|
styles_len.* += color.len;
|
||||||
|
has_styles.* = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,9 +208,19 @@ pub fn generateFeed(allocator: Allocator, releases: []const Release) ![]u8 {
|
||||||
try escapeXml(writer, release.provider);
|
try escapeXml(writer, release.provider);
|
||||||
try writer.writeAll("</name></author>\n");
|
try writer.writeAll("</name></author>\n");
|
||||||
|
|
||||||
try writer.writeAll(" <summary>");
|
// Convert markdown to HTML
|
||||||
try escapeXml(writer, release.description);
|
const conversion_result = try markdown.convertMarkdownToHtml(allocator, release.description);
|
||||||
try writer.writeAll("</summary>\n");
|
defer conversion_result.deinit(allocator);
|
||||||
|
|
||||||
|
// Add content with proper type attribute and XML-escaped HTML
|
||||||
|
try writer.writeAll(" <content type=\"html\">");
|
||||||
|
try escapeXml(writer, conversion_result.html);
|
||||||
|
try writer.writeAll("</content>\n");
|
||||||
|
|
||||||
|
// Add fallback metadata if markdown conversion used fallback
|
||||||
|
if (conversion_result.has_fallback) {
|
||||||
|
try writer.writeAll(" <category term=\"markdown-fallback\" label=\"Contains unprocessed markdown\"/>\n");
|
||||||
|
}
|
||||||
|
|
||||||
try writer.writeAll(" <category term=\"");
|
try writer.writeAll(" <category term=\"");
|
||||||
try escapeXml(writer, release.provider);
|
try escapeXml(writer, release.provider);
|
||||||
|
@ -85,6 +234,25 @@ pub fn generateFeed(allocator: Allocator, releases: []const Release) ![]u8 {
|
||||||
return buffer.toOwnedSlice();
|
return buffer.toOwnedSlice();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "XML escaping with ANSI sequences" {
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
|
||||||
|
var buffer = ArrayList(u8).init(allocator);
|
||||||
|
defer buffer.deinit();
|
||||||
|
|
||||||
|
// Test input with ANSI color codes like those found in terminal output
|
||||||
|
const input = "Test \x1B[36mcolored\x1B[0m text and \x1B[1mbold\x1B[22m formatting";
|
||||||
|
try escapeXml(buffer.writer(), input);
|
||||||
|
|
||||||
|
const result = try buffer.toOwnedSlice();
|
||||||
|
defer allocator.free(result);
|
||||||
|
|
||||||
|
// ANSI sequences should be converted to HTML spans
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, result, "<span style=\"color:#008080\">") != null);
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, result, "</span>") != null);
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, result, "colored") != null);
|
||||||
|
}
|
||||||
|
|
||||||
test "XML escaping" {
|
test "XML escaping" {
|
||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
|
|
||||||
|
@ -101,7 +269,7 @@ test "XML escaping" {
|
||||||
try std.testing.expectEqualStrings(expected, result);
|
try std.testing.expectEqualStrings(expected, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Atom feed generation" {
|
test "Atom feed generation with markdown" {
|
||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
|
|
||||||
const releases = [_]Release{
|
const releases = [_]Release{
|
||||||
|
@ -110,7 +278,7 @@ test "Atom feed generation" {
|
||||||
.tag_name = "v1.0.0",
|
.tag_name = "v1.0.0",
|
||||||
.published_at = "2024-01-01T00:00:00Z",
|
.published_at = "2024-01-01T00:00:00Z",
|
||||||
.html_url = "https://github.com/test/repo/releases/tag/v1.0.0",
|
.html_url = "https://github.com/test/repo/releases/tag/v1.0.0",
|
||||||
.description = "Test release",
|
.description = "## What's Changed\n* Fixed bug\n* Added feature",
|
||||||
.provider = "github",
|
.provider = "github",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -121,6 +289,31 @@ test "Atom feed generation" {
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "test/repo") != null);
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "test/repo") != null);
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "v1.0.0") != null);
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "v1.0.0") != null);
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<feed xmlns=\"http://www.w3.org/2005/Atom\">") != null);
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<feed xmlns=\"http://www.w3.org/2005/Atom\">") != null);
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<content type=\"html\">") != null);
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<h2>What&apos;s Changed</h2>") != null);
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<ul>") != null);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "Atom feed with fallback markdown" {
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
|
|
||||||
|
const releases = [_]Release{
|
||||||
|
Release{
|
||||||
|
.repo_name = "test/repo",
|
||||||
|
.tag_name = "v1.0.0",
|
||||||
|
.published_at = "2024-01-01T00:00:00Z",
|
||||||
|
.html_url = "https://github.com/test/repo/releases/tag/v1.0.0",
|
||||||
|
.description = "```javascript\nconst x = 1;\n```",
|
||||||
|
.provider = "github",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const atom_content = try generateFeed(allocator, &releases);
|
||||||
|
defer allocator.free(atom_content);
|
||||||
|
|
||||||
|
// Should contain fallback metadata
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "markdown-fallback") != null);
|
||||||
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<pre>") != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Atom feed with special characters" {
|
test "Atom feed with special characters" {
|
||||||
|
@ -140,13 +333,10 @@ test "Atom feed with special characters" {
|
||||||
const atom_content = try generateFeed(allocator, &releases);
|
const atom_content = try generateFeed(allocator, &releases);
|
||||||
defer allocator.free(atom_content);
|
defer allocator.free(atom_content);
|
||||||
|
|
||||||
// Verify special characters are properly escaped
|
// Verify special characters are properly escaped in title
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<script>") != null);
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<script>") != null);
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "& more") != null);
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "& more") != null);
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, ""release"") != null);
|
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<special>") != null);
|
|
||||||
|
|
||||||
// Verify raw special characters are not present
|
// Verify raw special characters are not present
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<script>") == null);
|
try std.testing.expect(std.mem.indexOf(u8, atom_content, "<script>") == null);
|
||||||
try std.testing.expect(std.mem.indexOf(u8, atom_content, "\"release\"") == null);
|
|
||||||
}
|
}
|
||||||
|
|
510
src/markdown.zig
Normal file
510
src/markdown.zig
Normal file
|
@ -0,0 +1,510 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
const ArrayList = std.ArrayList;
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
pub const ConversionResult = struct {
|
||||||
|
html: []u8,
|
||||||
|
has_fallback: bool,
|
||||||
|
|
||||||
|
pub fn deinit(self: ConversionResult, allocator: Allocator) void {
|
||||||
|
allocator.free(self.html);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Convert markdown text to HTML with fallback to <pre> blocks for unhandled content
|
||||||
|
pub fn convertMarkdownToHtml(allocator: Allocator, markdown: []const u8) !ConversionResult {
|
||||||
|
var result = ArrayList(u8).init(allocator);
|
||||||
|
defer result.deinit();
|
||||||
|
|
||||||
|
var has_fallback = false;
|
||||||
|
var lines = std.mem.splitScalar(u8, markdown, '\n');
|
||||||
|
var in_list = false;
|
||||||
|
var list_type: ?u8 = null; // '*' or '-'
|
||||||
|
|
||||||
|
while (lines.next()) |line| {
|
||||||
|
const trimmed = std.mem.trim(u8, line, " \t\r");
|
||||||
|
|
||||||
|
if (trimmed.len == 0) {
|
||||||
|
try result.appendSlice("<br/>\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle headers
|
||||||
|
if (std.mem.startsWith(u8, trimmed, "## ")) {
|
||||||
|
if (in_list) {
|
||||||
|
try result.appendSlice("</ul>\n");
|
||||||
|
in_list = false;
|
||||||
|
list_type = null;
|
||||||
|
}
|
||||||
|
const header_text = trimmed[3..];
|
||||||
|
try result.appendSlice("<h2>");
|
||||||
|
try appendEscapedHtml(&result, header_text);
|
||||||
|
try result.appendSlice("</h2>\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std.mem.startsWith(u8, trimmed, "### ")) {
|
||||||
|
if (in_list) {
|
||||||
|
try result.appendSlice("</ul>\n");
|
||||||
|
in_list = false;
|
||||||
|
list_type = null;
|
||||||
|
}
|
||||||
|
const header_text = trimmed[4..];
|
||||||
|
try result.appendSlice("<h3>");
|
||||||
|
try appendEscapedHtml(&result, header_text);
|
||||||
|
try result.appendSlice("</h3>\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std.mem.startsWith(u8, trimmed, "#### ")) {
|
||||||
|
if (in_list) {
|
||||||
|
try result.appendSlice("</ul>\n");
|
||||||
|
in_list = false;
|
||||||
|
list_type = null;
|
||||||
|
}
|
||||||
|
const header_text = trimmed[5..];
|
||||||
|
try result.appendSlice("<h4>");
|
||||||
|
try appendEscapedHtml(&result, header_text);
|
||||||
|
try result.appendSlice("</h4>\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std.mem.startsWith(u8, trimmed, "##### ")) {
|
||||||
|
if (in_list) {
|
||||||
|
try result.appendSlice("</ul>\n");
|
||||||
|
in_list = false;
|
||||||
|
list_type = null;
|
||||||
|
}
|
||||||
|
const header_text = trimmed[6..];
|
||||||
|
try result.appendSlice("<h5>");
|
||||||
|
try appendEscapedHtml(&result, header_text);
|
||||||
|
try result.appendSlice("</h5>\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle list items
|
||||||
|
if (std.mem.startsWith(u8, trimmed, "* ") or std.mem.startsWith(u8, trimmed, "- ")) {
|
||||||
|
const current_marker = trimmed[0];
|
||||||
|
const item_text = trimmed[2..];
|
||||||
|
|
||||||
|
if (!in_list or list_type != current_marker) {
|
||||||
|
if (in_list) {
|
||||||
|
try result.appendSlice("</ul>\n");
|
||||||
|
}
|
||||||
|
try result.appendSlice("<ul>\n");
|
||||||
|
in_list = true;
|
||||||
|
list_type = current_marker;
|
||||||
|
}
|
||||||
|
|
||||||
|
try result.appendSlice("<li>");
|
||||||
|
try appendProcessedText(&result, item_text);
|
||||||
|
try result.appendSlice("</li>\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close list if we're in one and this isn't a list item
|
||||||
|
if (in_list) {
|
||||||
|
try result.appendSlice("</ul>\n");
|
||||||
|
in_list = false;
|
||||||
|
list_type = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for complex markdown patterns that we don't handle
|
||||||
|
if (hasComplexMarkdown(trimmed)) {
|
||||||
|
has_fallback = true;
|
||||||
|
try result.appendSlice("<pre>");
|
||||||
|
try appendEscapedHtml(&result, trimmed);
|
||||||
|
try result.appendSlice("</pre>\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regular paragraph
|
||||||
|
try result.appendSlice("<p>");
|
||||||
|
try appendProcessedText(&result, trimmed);
|
||||||
|
try result.appendSlice("</p>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close any remaining list
|
||||||
|
if (in_list) {
|
||||||
|
try result.appendSlice("</ul>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ConversionResult{
|
||||||
|
.html = try result.toOwnedSlice(),
|
||||||
|
.has_fallback = has_fallback,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process text for inline formatting (links, bold, italic)
|
||||||
|
fn appendProcessedText(result: *ArrayList(u8), text: []const u8) !void {
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < text.len) {
|
||||||
|
// Handle markdown links [text](url)
|
||||||
|
if (text[i] == '[') {
|
||||||
|
if (findMarkdownLink(text[i..])) |link_info| {
|
||||||
|
try result.appendSlice("<a href=\"");
|
||||||
|
try appendEscapedHtml(result, link_info.url);
|
||||||
|
try result.appendSlice("\">");
|
||||||
|
try appendEscapedHtml(result, link_info.text);
|
||||||
|
try result.appendSlice("</a>");
|
||||||
|
i += link_info.total_len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle bold **text**
|
||||||
|
if (i + 1 < text.len and text[i] == '*' and text[i + 1] == '*') {
|
||||||
|
if (findBoldText(text[i..])) |bold_info| {
|
||||||
|
try result.appendSlice("<strong>");
|
||||||
|
try appendEscapedHtml(result, bold_info.text);
|
||||||
|
try result.appendSlice("</strong>");
|
||||||
|
i += bold_info.total_len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle italic *text* (but not if it's part of **)
|
||||||
|
if (text[i] == '*' and (i == 0 or text[i - 1] != '*') and (i + 1 >= text.len or text[i + 1] != '*')) {
|
||||||
|
if (findItalicText(text[i..])) |italic_info| {
|
||||||
|
try result.appendSlice("<em>");
|
||||||
|
try appendEscapedHtml(result, italic_info.text);
|
||||||
|
try result.appendSlice("</em>");
|
||||||
|
i += italic_info.total_len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle inline code `text`
|
||||||
|
if (text[i] == '`') {
|
||||||
|
if (findInlineCode(text[i..])) |code_info| {
|
||||||
|
try result.appendSlice("<code>");
|
||||||
|
try appendEscapedHtml(result, code_info.text);
|
||||||
|
try result.appendSlice("</code>");
|
||||||
|
i += code_info.total_len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regular character - escape for HTML
|
||||||
|
switch (text[i]) {
|
||||||
|
'<' => try result.appendSlice("<"),
|
||||||
|
'>' => try result.appendSlice(">"),
|
||||||
|
'&' => try result.appendSlice("&"),
|
||||||
|
'"' => try result.appendSlice("""),
|
||||||
|
'\'' => try result.appendSlice("'"),
|
||||||
|
else => try result.append(text[i]),
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append HTML-escaped text
|
||||||
|
fn appendEscapedHtml(result: *ArrayList(u8), text: []const u8) !void {
|
||||||
|
for (text) |char| {
|
||||||
|
switch (char) {
|
||||||
|
'<' => try result.appendSlice("<"),
|
||||||
|
'>' => try result.appendSlice(">"),
|
||||||
|
'&' => try result.appendSlice("&"),
|
||||||
|
'"' => try result.appendSlice("""),
|
||||||
|
'\'' => try result.appendSlice("'"),
|
||||||
|
else => try result.append(char),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const LinkInfo = struct {
|
||||||
|
text: []const u8,
|
||||||
|
url: []const u8,
|
||||||
|
total_len: usize,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Find markdown link pattern [text](url)
|
||||||
|
fn findMarkdownLink(text: []const u8) ?LinkInfo {
|
||||||
|
if (text.len < 4 or text[0] != '[') return null;
|
||||||
|
|
||||||
|
// Find closing ]
|
||||||
|
var bracket_end: ?usize = null;
|
||||||
|
for (text[1..], 1..) |char, i| {
|
||||||
|
if (char == ']') {
|
||||||
|
bracket_end = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const bracket_pos = bracket_end orelse return null;
|
||||||
|
if (bracket_pos + 1 >= text.len or text[bracket_pos + 1] != '(') return null;
|
||||||
|
|
||||||
|
// Find closing )
|
||||||
|
var paren_end: ?usize = null;
|
||||||
|
for (text[bracket_pos + 2 ..], bracket_pos + 2..) |char, i| {
|
||||||
|
if (char == ')') {
|
||||||
|
paren_end = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const paren_pos = paren_end orelse return null;
|
||||||
|
|
||||||
|
return LinkInfo{
|
||||||
|
.text = text[1..bracket_pos],
|
||||||
|
.url = text[bracket_pos + 2 .. paren_pos],
|
||||||
|
.total_len = paren_pos + 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const TextInfo = struct {
|
||||||
|
text: []const u8,
|
||||||
|
total_len: usize,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Find bold text **text**
|
||||||
|
fn findBoldText(text: []const u8) ?TextInfo {
|
||||||
|
if (text.len < 4 or !std.mem.startsWith(u8, text, "**")) return null;
|
||||||
|
|
||||||
|
// Find closing **
|
||||||
|
var i: usize = 2;
|
||||||
|
while (i + 1 < text.len) {
|
||||||
|
if (text[i] == '*' and text[i + 1] == '*') {
|
||||||
|
return TextInfo{
|
||||||
|
.text = text[2..i],
|
||||||
|
.total_len = i + 2,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find italic text *text*
|
||||||
|
fn findItalicText(text: []const u8) ?TextInfo {
|
||||||
|
if (text.len < 3 or text[0] != '*') return null;
|
||||||
|
|
||||||
|
// Find closing *
|
||||||
|
for (text[1..], 1..) |char, i| {
|
||||||
|
if (char == '*') {
|
||||||
|
return TextInfo{
|
||||||
|
.text = text[1..i],
|
||||||
|
.total_len = i + 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find inline code `text`
|
||||||
|
fn findInlineCode(text: []const u8) ?TextInfo {
|
||||||
|
if (text.len < 3 or text[0] != '`') return null;
|
||||||
|
|
||||||
|
// Find closing `
|
||||||
|
for (text[1..], 1..) |char, i| {
|
||||||
|
if (char == '`') {
|
||||||
|
return TextInfo{
|
||||||
|
.text = text[1..i],
|
||||||
|
.total_len = i + 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if text contains complex markdown patterns we don't handle
|
||||||
|
fn hasComplexMarkdown(text: []const u8) bool {
|
||||||
|
// Code blocks
|
||||||
|
if (std.mem.indexOf(u8, text, "```") != null) return true;
|
||||||
|
|
||||||
|
// Tables
|
||||||
|
if (std.mem.indexOf(u8, text, "|") != null) return true;
|
||||||
|
|
||||||
|
// Images
|
||||||
|
if (std.mem.indexOf(u8, text, " for more info.";
|
||||||
|
const result = try convertMarkdownToHtml(allocator, markdown);
|
||||||
|
defer result.deinit(allocator);
|
||||||
|
|
||||||
|
const expected = "<p>Check out <a href=\"https://github.com\">GitHub</a> for more info.</p>\n";
|
||||||
|
try testing.expectEqualStrings(expected, result.html);
|
||||||
|
try testing.expect(!result.has_fallback);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("Links test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "convert bold and italic" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
|
||||||
|
const markdown = "This is **bold** and this is *italic* text.";
|
||||||
|
const result = try convertMarkdownToHtml(allocator, markdown);
|
||||||
|
defer result.deinit(allocator);
|
||||||
|
|
||||||
|
const expected = "<p>This is <strong>bold</strong> and this is <em>italic</em> text.</p>\n";
|
||||||
|
try testing.expectEqualStrings(expected, result.html);
|
||||||
|
try testing.expect(!result.has_fallback);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("Bold/Italic test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "convert inline code" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
|
||||||
|
const markdown = "Use the `git commit` command to save changes.";
|
||||||
|
const result = try convertMarkdownToHtml(allocator, markdown);
|
||||||
|
defer result.deinit(allocator);
|
||||||
|
|
||||||
|
const expected = "<p>Use the <code>git commit</code> command to save changes.</p>\n";
|
||||||
|
try testing.expectEqualStrings(expected, result.html);
|
||||||
|
try testing.expect(!result.has_fallback);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("Inline code test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fallback for complex markdown" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
|
||||||
|
const markdown = "```javascript\nconst x = 1;\n```\n\n| Column 1 | Column 2 |\n|----------|----------|\n| Data | More |";
|
||||||
|
const result = try convertMarkdownToHtml(allocator, markdown);
|
||||||
|
defer result.deinit(allocator);
|
||||||
|
|
||||||
|
try testing.expect(result.has_fallback);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<pre>") != null);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("Fallback test - Input: {s}\nOutput: {s}\nHas fallback: {}\n", .{ markdown, result.html, result.has_fallback });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "real release note example" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
|
||||||
|
// Example from actual release notes in the feed
|
||||||
|
const markdown =
|
||||||
|
\\## What's Changed
|
||||||
|
\\
|
||||||
|
\\* Not generating undo records for insertions into tables created by the same transaction (performance)
|
||||||
|
\\* Fastpath intra-page navigation in B-tree (performance)
|
||||||
|
\\* OrioleDB database cluster rewind (experimental feature)
|
||||||
|
\\* Support of tablespaces
|
||||||
|
\\* Support of more than 32 columns for Oriole table
|
||||||
|
\\* Fallback to simple reindex instead of concurrent (sql syntax compatibility)
|
||||||
|
\\
|
||||||
|
\\**Full Changelog**: https://github.com/orioledb/orioledb/compare/beta11...beta12
|
||||||
|
;
|
||||||
|
|
||||||
|
const result = try convertMarkdownToHtml(allocator, markdown);
|
||||||
|
defer result.deinit(allocator);
|
||||||
|
|
||||||
|
// Should contain proper HTML structure
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<h2>What's Changed</h2>") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<ul>") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<li>Not generating undo records") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<strong>Full Changelog</strong>") != null);
|
||||||
|
try testing.expect(!result.has_fallback);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("Real example test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "mixed content with headers and lists" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
|
||||||
|
// Another real example pattern
|
||||||
|
const markdown =
|
||||||
|
\\## KraftKit v0.11.6-212-g74599361 (2025-07-13T12:45:17Z)
|
||||||
|
\\
|
||||||
|
\\This is the pre-release version of KraftKit.
|
||||||
|
\\
|
||||||
|
\\## Changelog
|
||||||
|
\\### 🤖 Bumps
|
||||||
|
\\* 41a6a089d3ca955711a5f5291b0ef82aa14d5792: gomod(deps): Bump github.com/charmbracelet/bubbletea from 1.3.5 to 1.3.6 (@dependabot[bot])
|
||||||
|
\\* ef77627f58e50f5ad027ff06c8d365db57feb020: gomod(deps): Bump golang.org/x/term from 0.32.0 to 0.33.0 (@dependabot[bot])
|
||||||
|
;
|
||||||
|
|
||||||
|
const result = try convertMarkdownToHtml(allocator, markdown);
|
||||||
|
defer result.deinit(allocator);
|
||||||
|
|
||||||
|
// Should contain proper HTML structure
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<h2>KraftKit v0.11.6-212-g74599361") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<h2>Changelog</h2>") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<h3>🤖 Bumps</h3>") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<ul>") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result.html, "<li>41a6a089d3ca955711a5f5291b0ef82aa14d5792") != null);
|
||||||
|
try testing.expect(!result.has_fallback);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("Mixed content test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "html escaping" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
|
||||||
|
const markdown = "## Test <script>alert('xss')</script> & \"quotes\"";
|
||||||
|
const result = try convertMarkdownToHtml(allocator, markdown);
|
||||||
|
defer result.deinit(allocator);
|
||||||
|
|
||||||
|
const expected = "<h2>Test <script>alert('xss')</script> & "quotes"</h2>\n";
|
||||||
|
try testing.expectEqualStrings(expected, result.html);
|
||||||
|
try testing.expect(!result.has_fallback);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("HTML escaping test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||||
|
}
|
||||||
|
}
|
|
@ -86,8 +86,11 @@ fn parseEntry(allocator: Allocator, entry_xml: []const u8) !Release {
|
||||||
release.published_at = updated;
|
release.published_at = updated;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse summary (description)
|
// Parse content (description) - try content first, then fall back to summary
|
||||||
if (extractTagContent(entry_xml, "summary", allocator)) |summary| {
|
if (extractTagContent(entry_xml, "content", allocator)) |content| {
|
||||||
|
allocator.free(release.description);
|
||||||
|
release.description = content;
|
||||||
|
} else if (extractTagContent(entry_xml, "summary", allocator)) |summary| {
|
||||||
allocator.free(release.description);
|
allocator.free(release.description);
|
||||||
release.description = summary;
|
release.description = summary;
|
||||||
}
|
}
|
||||||
|
@ -115,6 +118,23 @@ fn extractTagContent(xml: []const u8, tag_name: []const u8, allocator: Allocator
|
||||||
return unescapeXml(allocator, content) catch null;
|
return unescapeXml(allocator, content) catch null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Also try with attributes (e.g., <content type="html">)
|
||||||
|
const open_tag_with_attrs = std.fmt.allocPrint(allocator, "<{s} ", .{tag_name}) catch return null;
|
||||||
|
defer allocator.free(open_tag_with_attrs);
|
||||||
|
|
||||||
|
if (std.mem.indexOf(u8, xml, open_tag_with_attrs)) |start_pos| {
|
||||||
|
// Find the end of the opening tag
|
||||||
|
if (std.mem.indexOf(u8, xml[start_pos..], ">")) |tag_end_offset| {
|
||||||
|
const content_start = start_pos + tag_end_offset + 1;
|
||||||
|
if (std.mem.indexOf(u8, xml[content_start..], close_tag)) |end_offset| {
|
||||||
|
const content_end = content_start + end_offset;
|
||||||
|
const content = xml[content_start..content_end];
|
||||||
|
return unescapeXml(allocator, content) catch null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,14 +48,14 @@ test "round trip: generate atom feed and parse it back" {
|
||||||
try testing.expectEqualStrings("v1.0.0", parsed_releases.items[0].tag_name);
|
try testing.expectEqualStrings("v1.0.0", parsed_releases.items[0].tag_name);
|
||||||
try testing.expectEqualStrings("2024-01-01T00:00:00Z", parsed_releases.items[0].published_at);
|
try testing.expectEqualStrings("2024-01-01T00:00:00Z", parsed_releases.items[0].published_at);
|
||||||
try testing.expectEqualStrings("https://github.com/test/repo1/releases/tag/v1.0.0", parsed_releases.items[0].html_url);
|
try testing.expectEqualStrings("https://github.com/test/repo1/releases/tag/v1.0.0", parsed_releases.items[0].html_url);
|
||||||
try testing.expectEqualStrings("First release", parsed_releases.items[0].description);
|
try testing.expectEqualStrings("<p>First release</p>\n", parsed_releases.items[0].description);
|
||||||
try testing.expectEqualStrings("github", parsed_releases.items[0].provider);
|
try testing.expectEqualStrings("github", parsed_releases.items[0].provider);
|
||||||
|
|
||||||
try testing.expectEqualStrings("test/repo2", parsed_releases.items[1].repo_name);
|
try testing.expectEqualStrings("test/repo2", parsed_releases.items[1].repo_name);
|
||||||
try testing.expectEqualStrings("v2.0.0", parsed_releases.items[1].tag_name);
|
try testing.expectEqualStrings("v2.0.0", parsed_releases.items[1].tag_name);
|
||||||
try testing.expectEqualStrings("2024-01-02T00:00:00Z", parsed_releases.items[1].published_at);
|
try testing.expectEqualStrings("2024-01-02T00:00:00Z", parsed_releases.items[1].published_at);
|
||||||
try testing.expectEqualStrings("https://github.com/test/repo2/releases/tag/v2.0.0", parsed_releases.items[1].html_url);
|
try testing.expectEqualStrings("https://github.com/test/repo2/releases/tag/v2.0.0", parsed_releases.items[1].html_url);
|
||||||
try testing.expectEqualStrings("Second release", parsed_releases.items[1].description);
|
try testing.expectEqualStrings("<p>Second release</p>\n", parsed_releases.items[1].description);
|
||||||
try testing.expectEqualStrings("github", parsed_releases.items[1].provider);
|
try testing.expectEqualStrings("github", parsed_releases.items[1].provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,10 +78,11 @@ test "parse atom feed with special characters" {
|
||||||
const atom_content = try atom.generateFeed(allocator, &original_releases);
|
const atom_content = try atom.generateFeed(allocator, &original_releases);
|
||||||
defer allocator.free(atom_content);
|
defer allocator.free(atom_content);
|
||||||
|
|
||||||
// Verify the XML contains escaped characters
|
// Verify the XML contains escaped characters in the title (not in content)
|
||||||
try testing.expect(std.mem.indexOf(u8, atom_content, "<script>") != null);
|
try testing.expect(std.mem.indexOf(u8, atom_content, "<script>") != null);
|
||||||
try testing.expect(std.mem.indexOf(u8, atom_content, "& more") != null);
|
try testing.expect(std.mem.indexOf(u8, atom_content, "& more") != null);
|
||||||
try testing.expect(std.mem.indexOf(u8, atom_content, ""release"") != null);
|
// The content will be XML-escaped HTML, so quotes in HTML will be &quot;
|
||||||
|
try testing.expect(std.mem.indexOf(u8, atom_content, "&quot;release&quot;") != null);
|
||||||
|
|
||||||
// Parse it back (this should unescape the characters)
|
// Parse it back (this should unescape the characters)
|
||||||
var parsed_releases = try xml_parser.parseAtomFeed(allocator, atom_content);
|
var parsed_releases = try xml_parser.parseAtomFeed(allocator, atom_content);
|
||||||
|
@ -96,7 +97,7 @@ test "parse atom feed with special characters" {
|
||||||
try testing.expectEqual(@as(usize, 1), parsed_releases.items.len);
|
try testing.expectEqual(@as(usize, 1), parsed_releases.items.len);
|
||||||
try testing.expectEqualStrings("test/repo<script>", parsed_releases.items[0].repo_name);
|
try testing.expectEqualStrings("test/repo<script>", parsed_releases.items[0].repo_name);
|
||||||
try testing.expectEqualStrings("v1.0.0 & more", parsed_releases.items[0].tag_name);
|
try testing.expectEqualStrings("v1.0.0 & more", parsed_releases.items[0].tag_name);
|
||||||
try testing.expectEqualStrings("Test \"release\" with <special> chars & symbols", parsed_releases.items[0].description);
|
try testing.expectEqualStrings("<pre>Test "release" with <special> chars & symbols</pre>\n", parsed_releases.items[0].description);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "parse malformed atom feed gracefully" {
|
test "parse malformed atom feed gracefully" {
|
||||||
|
|
Loading…
Add table
Reference in a new issue