From 3b243beb7e098ca4169b1aaba2dc67b2d4704c28 Mon Sep 17 00:00:00 2001
From: Emil Lerch <emil@lerch.org>
Date: Sun, 20 Jul 2025 19:46:39 -0700
Subject: [PATCH] html passthrough in markdown

---
 src/integration_tests.zig |  17 ++---
 src/markdown.zig          | 151 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 154 insertions(+), 14 deletions(-)

diff --git a/src/integration_tests.zig b/src/integration_tests.zig
index a922ec7..23474c7 100644
--- a/src/integration_tests.zig
+++ b/src/integration_tests.zig
@@ -9,6 +9,7 @@ const GitLab = @import("providers/GitLab.zig");
 const Forgejo = @import("providers/Forgejo.zig");
 const SourceHut = @import("providers/SourceHut.zig");
 const config = @import("config.zig");
+const utils = @import("utils.zig");
 
 fn testPrint(comptime fmt: []const u8, args: anytype) void {
     if (build_options.test_debug) {
@@ -24,7 +25,7 @@ test "Atom feed validates against W3C validator" {
         Release{
             .repo_name = "ziglang/zig",
             .tag_name = "0.14.0",
-            .published_at = "2024-12-19T00:00:00Z",
+            .published_at = try utils.parseReleaseTimestamp("2024-12-19T00:00:00Z"),
             .html_url = "https://github.com/ziglang/zig/releases/tag/0.14.0",
             .description = "Zig 0.14.0 release with many improvements",
             .provider = "github",
@@ -33,7 +34,7 @@ test "Atom feed validates against W3C validator" {
         Release{
             .repo_name = "example/test",
             .tag_name = "v1.2.3",
-            .published_at = "2024-12-18T12:30:00Z",
+            .published_at = try utils.parseReleaseTimestamp("2024-12-18T12:30:00Z"),
             .html_url = "https://github.com/example/test/releases/tag/v1.2.3",
             .description = "Bug fixes and performance improvements",
             .provider = "github",
@@ -583,21 +584,13 @@ test "SourceHut commit date fetching" {
         try testing.expect(release.repo_name.len > 0);
         try testing.expect(release.tag_name.len > 0);
         try testing.expect(release.html_url.len > 0);
-        try testing.expect(release.published_at.len > 0);
         try testing.expectEqualStrings("sourcehut", release.provider);
 
         // Check if we got a real commit date vs epoch fallback
-        if (std.mem.eql(u8, release.published_at, "1970-01-01T00:00:00Z")) {
+        if (release.published_at == 0) {
             epoch_dates += 1;
             testPrint("  -> Using epoch fallback date\n", .{});
-        } else {
-            valid_dates += 1;
-            testPrint("  -> Got real commit date\n", .{});
-
-            // Verify the date format looks reasonable (should be ISO 8601)
-            try testing.expect(release.published_at.len >= 19); // At least YYYY-MM-DDTHH:MM:SS
-            try testing.expect(std.mem.indexOf(u8, release.published_at, "T") != null);
-        }
+        } else valid_dates += 1;
     }
 
     testPrint("SourceHut commit date summary: {} valid dates, {} epoch fallbacks\n", .{ valid_dates, epoch_dates });
diff --git a/src/markdown.zig b/src/markdown.zig
index c24bc96..6c11a29 100644
--- a/src/markdown.zig
+++ b/src/markdown.zig
@@ -153,6 +153,13 @@ pub fn convertMarkdownToHtml(allocator: Allocator, markdown: []const u8) !Conver
             list_type = null;
         }
 
+        // Check if this is a safe HTML line that can be passed through
+        if (isSafeHtmlLine(trimmed)) {
+            try result.appendSlice(trimmed);
+            try result.appendSlice("\n");
+            continue;
+        }
+
         // Check for complex markdown patterns that we don't handle
         if (hasComplexMarkdown(trimmed)) {
             has_fallback = true;
@@ -373,8 +380,122 @@ fn hasComplexMarkdown(text: []const u8) bool {
     // Horizontal rules
     if (std.mem.eql(u8, text, "---") or std.mem.eql(u8, text, "***")) return true;
 
-    // HTML tags (already HTML, might be complex)
-    if (std.mem.indexOf(u8, text, "<") != null and std.mem.indexOf(u8, text, ">") != null) return true;
+    // Only treat as complex HTML if it contains potentially dangerous tags
+    if (containsDangerousHtml(text)) return true;
+
+    return false;
+}
+
+/// Check if a line contains only safe HTML that can be passed through as-is
+fn isSafeHtmlLine(text: []const u8) bool {
+    // If no HTML tags, not an HTML line
+    if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
+        return false;
+    }
+
+    // List of safe HTML patterns that can be passed through exactly
+    const safe_exact_patterns = [_][]const u8{
+        "<details>",
+        "</details>",
+        "<summary>",
+        "</summary>",
+        "<br>",
+        "<br/>",
+        "<br />",
+    };
+
+    // Check if the line exactly matches a safe pattern (ignoring whitespace)
+    const trimmed_text = std.mem.trim(u8, text, " \t");
+    for (safe_exact_patterns) |pattern| {
+        if (std.mem.eql(u8, trimmed_text, pattern)) {
+            return true;
+        }
+    }
+
+    // Check for safe HTML with content (like <summary>text</summary>)
+    if (isSafeHtmlWithContent(trimmed_text)) {
+        return true;
+    }
+
+    return false;
+}
+
+/// Check if text is safe HTML that contains content (like <summary>text</summary>)
+fn isSafeHtmlWithContent(text: []const u8) bool {
+    // Safe tags that can contain content
+    const safe_content_tags = [_][]const u8{
+        "summary",
+        "code",
+        "em",
+        "strong",
+        "b",
+        "i",
+    };
+
+    // Check if it's a simple pattern like <tag>content</tag>
+    if (text.len < 7) return false; // Minimum: <a>x</a>
+
+    if (text[0] != '<') return false;
+
+    // Find the end of the opening tag
+    var tag_end: usize = 1;
+    while (tag_end < text.len and text[tag_end] != '>') {
+        tag_end += 1;
+    }
+    if (tag_end >= text.len) return false;
+
+    const tag_name = text[1..tag_end];
+
+    // Check if this is a safe tag
+    var is_safe_tag = false;
+    for (safe_content_tags) |safe_tag| {
+        if (std.mem.eql(u8, tag_name, safe_tag)) {
+            is_safe_tag = true;
+            break;
+        }
+    }
+
+    if (!is_safe_tag) return false;
+
+    // Check if it ends with the corresponding closing tag
+    const expected_closing = std.fmt.allocPrint(std.heap.page_allocator, "</{s}>", .{tag_name}) catch return false;
+    defer std.heap.page_allocator.free(expected_closing);
+
+    return std.mem.endsWith(u8, text, expected_closing);
+}
+
+/// Check if text contains HTML that should be treated as complex/dangerous
+fn containsDangerousHtml(text: []const u8) bool {
+    // If no HTML tags, it's safe
+    if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
+        return false;
+    }
+
+    // Dangerous patterns that should trigger fallback (case-insensitive check)
+    const dangerous_patterns = [_][]const u8{ "script", "iframe", "object", "embed", "form", "input", "button", "select", "textarea", "style", "link", "meta" };
+
+    // Simple case-insensitive check for dangerous patterns
+    var i: usize = 0;
+    while (i < text.len) {
+        if (text[i] == '<') {
+            // Extract tag name
+            i += 1;
+            const tag_start = i;
+            while (i < text.len and text[i] != ' ' and text[i] != '>' and text[i] != '/') {
+                i += 1;
+            }
+            if (i > tag_start) {
+                const tag_name = text[tag_start..i];
+                for (dangerous_patterns) |dangerous| {
+                    if (std.ascii.eqlIgnoreCase(tag_name, dangerous)) {
+                        return true;
+                    }
+                }
+            }
+        } else {
+            i += 1;
+        }
+    }
 
     return false;
 }
@@ -612,3 +733,29 @@ test "html escaping" {
         std.debug.print("HTML escaping test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
     }
 }
+
+test "safe HTML passthrough" {
+    const allocator = testing.allocator;
+
+    // Test details/summary tags
+    const markdown1 = "<details>\n<summary>Click to expand</summary>\nContent here\n</details>";
+    const result1 = try convertMarkdownToHtml(allocator, markdown1);
+    defer result1.deinit(allocator);
+
+    try testing.expect(std.mem.indexOf(u8, result1.html, "<details>") != null);
+    try testing.expect(std.mem.indexOf(u8, result1.html, "<summary>") != null);
+    try testing.expect(std.mem.indexOf(u8, result1.html, "</details>") != null);
+    try testing.expect(!result1.has_fallback);
+
+    // Test that dangerous HTML still triggers fallback
+    const markdown2 = "<script>alert('xss')</script>";
+    const result2 = try convertMarkdownToHtml(allocator, markdown2);
+    defer result2.deinit(allocator);
+
+    try testing.expect(result2.has_fallback);
+    try testing.expect(std.mem.indexOf(u8, result2.html, "<pre>") != null);
+
+    if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
+        std.debug.print("Safe HTML test - Input: {s}\nOutput: {s}\nHas fallback: {}\n", .{ markdown1, result1.html, result1.has_fallback });
+    }
+}