html passthrough in markdown
This commit is contained in:
parent
6c8cdbd8ae
commit
3b243beb7e
2 changed files with 154 additions and 14 deletions
|
@ -9,6 +9,7 @@ const GitLab = @import("providers/GitLab.zig");
|
|||
const Forgejo = @import("providers/Forgejo.zig");
|
||||
const SourceHut = @import("providers/SourceHut.zig");
|
||||
const config = @import("config.zig");
|
||||
const utils = @import("utils.zig");
|
||||
|
||||
fn testPrint(comptime fmt: []const u8, args: anytype) void {
|
||||
if (build_options.test_debug) {
|
||||
|
@ -24,7 +25,7 @@ test "Atom feed validates against W3C validator" {
|
|||
Release{
|
||||
.repo_name = "ziglang/zig",
|
||||
.tag_name = "0.14.0",
|
||||
.published_at = "2024-12-19T00:00:00Z",
|
||||
.published_at = try utils.parseReleaseTimestamp("2024-12-19T00:00:00Z"),
|
||||
.html_url = "https://github.com/ziglang/zig/releases/tag/0.14.0",
|
||||
.description = "Zig 0.14.0 release with many improvements",
|
||||
.provider = "github",
|
||||
|
@ -33,7 +34,7 @@ test "Atom feed validates against W3C validator" {
|
|||
Release{
|
||||
.repo_name = "example/test",
|
||||
.tag_name = "v1.2.3",
|
||||
.published_at = "2024-12-18T12:30:00Z",
|
||||
.published_at = try utils.parseReleaseTimestamp("2024-12-18T12:30:00Z"),
|
||||
.html_url = "https://github.com/example/test/releases/tag/v1.2.3",
|
||||
.description = "Bug fixes and performance improvements",
|
||||
.provider = "github",
|
||||
|
@ -583,21 +584,13 @@ test "SourceHut commit date fetching" {
|
|||
try testing.expect(release.repo_name.len > 0);
|
||||
try testing.expect(release.tag_name.len > 0);
|
||||
try testing.expect(release.html_url.len > 0);
|
||||
try testing.expect(release.published_at.len > 0);
|
||||
try testing.expectEqualStrings("sourcehut", release.provider);
|
||||
|
||||
// Check if we got a real commit date vs epoch fallback
|
||||
if (std.mem.eql(u8, release.published_at, "1970-01-01T00:00:00Z")) {
|
||||
if (release.published_at == 0) {
|
||||
epoch_dates += 1;
|
||||
testPrint(" -> Using epoch fallback date\n", .{});
|
||||
} else {
|
||||
valid_dates += 1;
|
||||
testPrint(" -> Got real commit date\n", .{});
|
||||
|
||||
// Verify the date format looks reasonable (should be ISO 8601)
|
||||
try testing.expect(release.published_at.len >= 19); // At least YYYY-MM-DDTHH:MM:SS
|
||||
try testing.expect(std.mem.indexOf(u8, release.published_at, "T") != null);
|
||||
}
|
||||
} else valid_dates += 1;
|
||||
}
|
||||
|
||||
testPrint("SourceHut commit date summary: {} valid dates, {} epoch fallbacks\n", .{ valid_dates, epoch_dates });
|
||||
|
|
151
src/markdown.zig
151
src/markdown.zig
|
@ -153,6 +153,13 @@ pub fn convertMarkdownToHtml(allocator: Allocator, markdown: []const u8) !Conver
|
|||
list_type = null;
|
||||
}
|
||||
|
||||
// Check if this is a safe HTML line that can be passed through
|
||||
if (isSafeHtmlLine(trimmed)) {
|
||||
try result.appendSlice(trimmed);
|
||||
try result.appendSlice("\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for complex markdown patterns that we don't handle
|
||||
if (hasComplexMarkdown(trimmed)) {
|
||||
has_fallback = true;
|
||||
|
@ -373,8 +380,122 @@ fn hasComplexMarkdown(text: []const u8) bool {
|
|||
// Horizontal rules
|
||||
if (std.mem.eql(u8, text, "---") or std.mem.eql(u8, text, "***")) return true;
|
||||
|
||||
// HTML tags (already HTML, might be complex)
|
||||
if (std.mem.indexOf(u8, text, "<") != null and std.mem.indexOf(u8, text, ">") != null) return true;
|
||||
// Only treat as complex HTML if it contains potentially dangerous tags
|
||||
if (containsDangerousHtml(text)) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if a line contains only safe HTML that can be passed through as-is
|
||||
fn isSafeHtmlLine(text: []const u8) bool {
|
||||
// If no HTML tags, not an HTML line
|
||||
if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// List of safe HTML patterns that can be passed through exactly
|
||||
const safe_exact_patterns = [_][]const u8{
|
||||
"<details>",
|
||||
"</details>",
|
||||
"<summary>",
|
||||
"</summary>",
|
||||
"<br>",
|
||||
"<br/>",
|
||||
"<br />",
|
||||
};
|
||||
|
||||
// Check if the line exactly matches a safe pattern (ignoring whitespace)
|
||||
const trimmed_text = std.mem.trim(u8, text, " \t");
|
||||
for (safe_exact_patterns) |pattern| {
|
||||
if (std.mem.eql(u8, trimmed_text, pattern)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for safe HTML with content (like <summary>text</summary>)
|
||||
if (isSafeHtmlWithContent(trimmed_text)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if text is safe HTML that contains content (like <summary>text</summary>)
|
||||
fn isSafeHtmlWithContent(text: []const u8) bool {
|
||||
// Safe tags that can contain content
|
||||
const safe_content_tags = [_][]const u8{
|
||||
"summary",
|
||||
"code",
|
||||
"em",
|
||||
"strong",
|
||||
"b",
|
||||
"i",
|
||||
};
|
||||
|
||||
// Check if it's a simple pattern like <tag>content</tag>
|
||||
if (text.len < 7) return false; // Minimum: <a>x</a>
|
||||
|
||||
if (text[0] != '<') return false;
|
||||
|
||||
// Find the end of the opening tag
|
||||
var tag_end: usize = 1;
|
||||
while (tag_end < text.len and text[tag_end] != '>') {
|
||||
tag_end += 1;
|
||||
}
|
||||
if (tag_end >= text.len) return false;
|
||||
|
||||
const tag_name = text[1..tag_end];
|
||||
|
||||
// Check if this is a safe tag
|
||||
var is_safe_tag = false;
|
||||
for (safe_content_tags) |safe_tag| {
|
||||
if (std.mem.eql(u8, tag_name, safe_tag)) {
|
||||
is_safe_tag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_safe_tag) return false;
|
||||
|
||||
// Check if it ends with the corresponding closing tag
|
||||
const expected_closing = std.fmt.allocPrint(std.heap.page_allocator, "</{s}>", .{tag_name}) catch return false;
|
||||
defer std.heap.page_allocator.free(expected_closing);
|
||||
|
||||
return std.mem.endsWith(u8, text, expected_closing);
|
||||
}
|
||||
|
||||
/// Check if text contains HTML that should be treated as complex/dangerous
|
||||
fn containsDangerousHtml(text: []const u8) bool {
|
||||
// If no HTML tags, it's safe
|
||||
if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Dangerous patterns that should trigger fallback (case-insensitive check)
|
||||
const dangerous_patterns = [_][]const u8{ "script", "iframe", "object", "embed", "form", "input", "button", "select", "textarea", "style", "link", "meta" };
|
||||
|
||||
// Simple case-insensitive check for dangerous patterns
|
||||
var i: usize = 0;
|
||||
while (i < text.len) {
|
||||
if (text[i] == '<') {
|
||||
// Extract tag name
|
||||
i += 1;
|
||||
const tag_start = i;
|
||||
while (i < text.len and text[i] != ' ' and text[i] != '>' and text[i] != '/') {
|
||||
i += 1;
|
||||
}
|
||||
if (i > tag_start) {
|
||||
const tag_name = text[tag_start..i];
|
||||
for (dangerous_patterns) |dangerous| {
|
||||
if (std.ascii.eqlIgnoreCase(tag_name, dangerous)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -612,3 +733,29 @@ test "html escaping" {
|
|||
std.debug.print("HTML escaping test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||
}
|
||||
}
|
||||
|
||||
test "safe HTML passthrough" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
// Test details/summary tags
|
||||
const markdown1 = "<details>\n<summary>Click to expand</summary>\nContent here\n</details>";
|
||||
const result1 = try convertMarkdownToHtml(allocator, markdown1);
|
||||
defer result1.deinit(allocator);
|
||||
|
||||
try testing.expect(std.mem.indexOf(u8, result1.html, "<details>") != null);
|
||||
try testing.expect(std.mem.indexOf(u8, result1.html, "<summary>") != null);
|
||||
try testing.expect(std.mem.indexOf(u8, result1.html, "</details>") != null);
|
||||
try testing.expect(!result1.has_fallback);
|
||||
|
||||
// Test that dangerous HTML still triggers fallback
|
||||
const markdown2 = "<script>alert('xss')</script>";
|
||||
const result2 = try convertMarkdownToHtml(allocator, markdown2);
|
||||
defer result2.deinit(allocator);
|
||||
|
||||
try testing.expect(result2.has_fallback);
|
||||
try testing.expect(std.mem.indexOf(u8, result2.html, "<pre>") != null);
|
||||
|
||||
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||
std.debug.print("Safe HTML test - Input: {s}\nOutput: {s}\nHas fallback: {}\n", .{ markdown1, result1.html, result1.has_fallback });
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue