html passthrough in markdown
All checks were successful
Build and Release / build (push) Successful in 2m13s
Build and Release / sign (push) Successful in 1m29s

This commit is contained in:
Emil Lerch 2025-07-20 19:46:39 -07:00
parent 6c8cdbd8ae
commit 3b243beb7e
Signed by: lobo
GPG key ID: A7B62D657EF764F8
2 changed files with 154 additions and 14 deletions

View file

@ -9,6 +9,7 @@ const GitLab = @import("providers/GitLab.zig");
const Forgejo = @import("providers/Forgejo.zig");
const SourceHut = @import("providers/SourceHut.zig");
const config = @import("config.zig");
const utils = @import("utils.zig");
fn testPrint(comptime fmt: []const u8, args: anytype) void {
if (build_options.test_debug) {
@ -24,7 +25,7 @@ test "Atom feed validates against W3C validator" {
Release{
.repo_name = "ziglang/zig",
.tag_name = "0.14.0",
.published_at = "2024-12-19T00:00:00Z",
.published_at = try utils.parseReleaseTimestamp("2024-12-19T00:00:00Z"),
.html_url = "https://github.com/ziglang/zig/releases/tag/0.14.0",
.description = "Zig 0.14.0 release with many improvements",
.provider = "github",
@ -33,7 +34,7 @@ test "Atom feed validates against W3C validator" {
Release{
.repo_name = "example/test",
.tag_name = "v1.2.3",
.published_at = "2024-12-18T12:30:00Z",
.published_at = try utils.parseReleaseTimestamp("2024-12-18T12:30:00Z"),
.html_url = "https://github.com/example/test/releases/tag/v1.2.3",
.description = "Bug fixes and performance improvements",
.provider = "github",
@ -583,21 +584,13 @@ test "SourceHut commit date fetching" {
try testing.expect(release.repo_name.len > 0);
try testing.expect(release.tag_name.len > 0);
try testing.expect(release.html_url.len > 0);
try testing.expect(release.published_at.len > 0);
try testing.expectEqualStrings("sourcehut", release.provider);
// Check if we got a real commit date vs epoch fallback
if (std.mem.eql(u8, release.published_at, "1970-01-01T00:00:00Z")) {
if (release.published_at == 0) {
epoch_dates += 1;
testPrint(" -> Using epoch fallback date\n", .{});
} else {
valid_dates += 1;
testPrint(" -> Got real commit date\n", .{});
// Verify the date format looks reasonable (should be ISO 8601)
try testing.expect(release.published_at.len >= 19); // At least YYYY-MM-DDTHH:MM:SS
try testing.expect(std.mem.indexOf(u8, release.published_at, "T") != null);
}
} else valid_dates += 1;
}
testPrint("SourceHut commit date summary: {} valid dates, {} epoch fallbacks\n", .{ valid_dates, epoch_dates });

View file

@ -153,6 +153,13 @@ pub fn convertMarkdownToHtml(allocator: Allocator, markdown: []const u8) !Conver
list_type = null;
}
// Check if this is a safe HTML line that can be passed through
if (isSafeHtmlLine(trimmed)) {
try result.appendSlice(trimmed);
try result.appendSlice("\n");
continue;
}
// Check for complex markdown patterns that we don't handle
if (hasComplexMarkdown(trimmed)) {
has_fallback = true;
@ -373,8 +380,122 @@ fn hasComplexMarkdown(text: []const u8) bool {
// Horizontal rules
if (std.mem.eql(u8, text, "---") or std.mem.eql(u8, text, "***")) return true;
// HTML tags (already HTML, might be complex)
if (std.mem.indexOf(u8, text, "<") != null and std.mem.indexOf(u8, text, ">") != null) return true;
// Only treat as complex HTML if it contains potentially dangerous tags
if (containsDangerousHtml(text)) return true;
return false;
}
/// Check if a line contains only safe HTML that can be passed through as-is
fn isSafeHtmlLine(text: []const u8) bool {
// If no HTML tags, not an HTML line
if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
return false;
}
// List of safe HTML patterns that can be passed through exactly
const safe_exact_patterns = [_][]const u8{
"<details>",
"</details>",
"<summary>",
"</summary>",
"<br>",
"<br/>",
"<br />",
};
// Check if the line exactly matches a safe pattern (ignoring whitespace)
const trimmed_text = std.mem.trim(u8, text, " \t");
for (safe_exact_patterns) |pattern| {
if (std.mem.eql(u8, trimmed_text, pattern)) {
return true;
}
}
// Check for safe HTML with content (like <summary>text</summary>)
if (isSafeHtmlWithContent(trimmed_text)) {
return true;
}
return false;
}
/// Check if text is safe HTML that contains content (like <summary>text</summary>)
fn isSafeHtmlWithContent(text: []const u8) bool {
// Safe tags that can contain content
const safe_content_tags = [_][]const u8{
"summary",
"code",
"em",
"strong",
"b",
"i",
};
// Check if it's a simple pattern like <tag>content</tag>
if (text.len < 7) return false; // Minimum: <a>x</a>
if (text[0] != '<') return false;
// Find the end of the opening tag
var tag_end: usize = 1;
while (tag_end < text.len and text[tag_end] != '>') {
tag_end += 1;
}
if (tag_end >= text.len) return false;
const tag_name = text[1..tag_end];
// Check if this is a safe tag
var is_safe_tag = false;
for (safe_content_tags) |safe_tag| {
if (std.mem.eql(u8, tag_name, safe_tag)) {
is_safe_tag = true;
break;
}
}
if (!is_safe_tag) return false;
// Check if it ends with the corresponding closing tag
const expected_closing = std.fmt.allocPrint(std.heap.page_allocator, "</{s}>", .{tag_name}) catch return false;
defer std.heap.page_allocator.free(expected_closing);
return std.mem.endsWith(u8, text, expected_closing);
}
/// Check if text contains HTML that should be treated as complex/dangerous
fn containsDangerousHtml(text: []const u8) bool {
// If no HTML tags, it's safe
if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
return false;
}
// Dangerous patterns that should trigger fallback (case-insensitive check)
const dangerous_patterns = [_][]const u8{ "script", "iframe", "object", "embed", "form", "input", "button", "select", "textarea", "style", "link", "meta" };
// Simple case-insensitive check for dangerous patterns
var i: usize = 0;
while (i < text.len) {
if (text[i] == '<') {
// Extract tag name
i += 1;
const tag_start = i;
while (i < text.len and text[i] != ' ' and text[i] != '>' and text[i] != '/') {
i += 1;
}
if (i > tag_start) {
const tag_name = text[tag_start..i];
for (dangerous_patterns) |dangerous| {
if (std.ascii.eqlIgnoreCase(tag_name, dangerous)) {
return true;
}
}
}
} else {
i += 1;
}
}
return false;
}
@ -612,3 +733,29 @@ test "html escaping" {
std.debug.print("HTML escaping test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
}
}
test "safe HTML passthrough" {
const allocator = testing.allocator;
// Test details/summary tags
const markdown1 = "<details>\n<summary>Click to expand</summary>\nContent here\n</details>";
const result1 = try convertMarkdownToHtml(allocator, markdown1);
defer result1.deinit(allocator);
try testing.expect(std.mem.indexOf(u8, result1.html, "<details>") != null);
try testing.expect(std.mem.indexOf(u8, result1.html, "<summary>") != null);
try testing.expect(std.mem.indexOf(u8, result1.html, "</details>") != null);
try testing.expect(!result1.has_fallback);
// Test that dangerous HTML still triggers fallback
const markdown2 = "<script>alert('xss')</script>";
const result2 = try convertMarkdownToHtml(allocator, markdown2);
defer result2.deinit(allocator);
try testing.expect(result2.has_fallback);
try testing.expect(std.mem.indexOf(u8, result2.html, "<pre>") != null);
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
std.debug.print("Safe HTML test - Input: {s}\nOutput: {s}\nHas fallback: {}\n", .{ markdown1, result1.html, result1.has_fallback });
}
}