html passthrough in markdown
This commit is contained in:
parent
6c8cdbd8ae
commit
3b243beb7e
2 changed files with 154 additions and 14 deletions
|
@ -9,6 +9,7 @@ const GitLab = @import("providers/GitLab.zig");
|
||||||
const Forgejo = @import("providers/Forgejo.zig");
|
const Forgejo = @import("providers/Forgejo.zig");
|
||||||
const SourceHut = @import("providers/SourceHut.zig");
|
const SourceHut = @import("providers/SourceHut.zig");
|
||||||
const config = @import("config.zig");
|
const config = @import("config.zig");
|
||||||
|
const utils = @import("utils.zig");
|
||||||
|
|
||||||
fn testPrint(comptime fmt: []const u8, args: anytype) void {
|
fn testPrint(comptime fmt: []const u8, args: anytype) void {
|
||||||
if (build_options.test_debug) {
|
if (build_options.test_debug) {
|
||||||
|
@ -24,7 +25,7 @@ test "Atom feed validates against W3C validator" {
|
||||||
Release{
|
Release{
|
||||||
.repo_name = "ziglang/zig",
|
.repo_name = "ziglang/zig",
|
||||||
.tag_name = "0.14.0",
|
.tag_name = "0.14.0",
|
||||||
.published_at = "2024-12-19T00:00:00Z",
|
.published_at = try utils.parseReleaseTimestamp("2024-12-19T00:00:00Z"),
|
||||||
.html_url = "https://github.com/ziglang/zig/releases/tag/0.14.0",
|
.html_url = "https://github.com/ziglang/zig/releases/tag/0.14.0",
|
||||||
.description = "Zig 0.14.0 release with many improvements",
|
.description = "Zig 0.14.0 release with many improvements",
|
||||||
.provider = "github",
|
.provider = "github",
|
||||||
|
@ -33,7 +34,7 @@ test "Atom feed validates against W3C validator" {
|
||||||
Release{
|
Release{
|
||||||
.repo_name = "example/test",
|
.repo_name = "example/test",
|
||||||
.tag_name = "v1.2.3",
|
.tag_name = "v1.2.3",
|
||||||
.published_at = "2024-12-18T12:30:00Z",
|
.published_at = try utils.parseReleaseTimestamp("2024-12-18T12:30:00Z"),
|
||||||
.html_url = "https://github.com/example/test/releases/tag/v1.2.3",
|
.html_url = "https://github.com/example/test/releases/tag/v1.2.3",
|
||||||
.description = "Bug fixes and performance improvements",
|
.description = "Bug fixes and performance improvements",
|
||||||
.provider = "github",
|
.provider = "github",
|
||||||
|
@ -583,21 +584,13 @@ test "SourceHut commit date fetching" {
|
||||||
try testing.expect(release.repo_name.len > 0);
|
try testing.expect(release.repo_name.len > 0);
|
||||||
try testing.expect(release.tag_name.len > 0);
|
try testing.expect(release.tag_name.len > 0);
|
||||||
try testing.expect(release.html_url.len > 0);
|
try testing.expect(release.html_url.len > 0);
|
||||||
try testing.expect(release.published_at.len > 0);
|
|
||||||
try testing.expectEqualStrings("sourcehut", release.provider);
|
try testing.expectEqualStrings("sourcehut", release.provider);
|
||||||
|
|
||||||
// Check if we got a real commit date vs epoch fallback
|
// Check if we got a real commit date vs epoch fallback
|
||||||
if (std.mem.eql(u8, release.published_at, "1970-01-01T00:00:00Z")) {
|
if (release.published_at == 0) {
|
||||||
epoch_dates += 1;
|
epoch_dates += 1;
|
||||||
testPrint(" -> Using epoch fallback date\n", .{});
|
testPrint(" -> Using epoch fallback date\n", .{});
|
||||||
} else {
|
} else valid_dates += 1;
|
||||||
valid_dates += 1;
|
|
||||||
testPrint(" -> Got real commit date\n", .{});
|
|
||||||
|
|
||||||
// Verify the date format looks reasonable (should be ISO 8601)
|
|
||||||
try testing.expect(release.published_at.len >= 19); // At least YYYY-MM-DDTHH:MM:SS
|
|
||||||
try testing.expect(std.mem.indexOf(u8, release.published_at, "T") != null);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
testPrint("SourceHut commit date summary: {} valid dates, {} epoch fallbacks\n", .{ valid_dates, epoch_dates });
|
testPrint("SourceHut commit date summary: {} valid dates, {} epoch fallbacks\n", .{ valid_dates, epoch_dates });
|
||||||
|
|
151
src/markdown.zig
151
src/markdown.zig
|
@ -153,6 +153,13 @@ pub fn convertMarkdownToHtml(allocator: Allocator, markdown: []const u8) !Conver
|
||||||
list_type = null;
|
list_type = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if this is a safe HTML line that can be passed through
|
||||||
|
if (isSafeHtmlLine(trimmed)) {
|
||||||
|
try result.appendSlice(trimmed);
|
||||||
|
try result.appendSlice("\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Check for complex markdown patterns that we don't handle
|
// Check for complex markdown patterns that we don't handle
|
||||||
if (hasComplexMarkdown(trimmed)) {
|
if (hasComplexMarkdown(trimmed)) {
|
||||||
has_fallback = true;
|
has_fallback = true;
|
||||||
|
@ -373,8 +380,122 @@ fn hasComplexMarkdown(text: []const u8) bool {
|
||||||
// Horizontal rules
|
// Horizontal rules
|
||||||
if (std.mem.eql(u8, text, "---") or std.mem.eql(u8, text, "***")) return true;
|
if (std.mem.eql(u8, text, "---") or std.mem.eql(u8, text, "***")) return true;
|
||||||
|
|
||||||
// HTML tags (already HTML, might be complex)
|
// Only treat as complex HTML if it contains potentially dangerous tags
|
||||||
if (std.mem.indexOf(u8, text, "<") != null and std.mem.indexOf(u8, text, ">") != null) return true;
|
if (containsDangerousHtml(text)) return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a line contains only safe HTML that can be passed through as-is
|
||||||
|
fn isSafeHtmlLine(text: []const u8) bool {
|
||||||
|
// If no HTML tags, not an HTML line
|
||||||
|
if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of safe HTML patterns that can be passed through exactly
|
||||||
|
const safe_exact_patterns = [_][]const u8{
|
||||||
|
"<details>",
|
||||||
|
"</details>",
|
||||||
|
"<summary>",
|
||||||
|
"</summary>",
|
||||||
|
"<br>",
|
||||||
|
"<br/>",
|
||||||
|
"<br />",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if the line exactly matches a safe pattern (ignoring whitespace)
|
||||||
|
const trimmed_text = std.mem.trim(u8, text, " \t");
|
||||||
|
for (safe_exact_patterns) |pattern| {
|
||||||
|
if (std.mem.eql(u8, trimmed_text, pattern)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for safe HTML with content (like <summary>text</summary>)
|
||||||
|
if (isSafeHtmlWithContent(trimmed_text)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if text is safe HTML that contains content (like <summary>text</summary>)
|
||||||
|
fn isSafeHtmlWithContent(text: []const u8) bool {
|
||||||
|
// Safe tags that can contain content
|
||||||
|
const safe_content_tags = [_][]const u8{
|
||||||
|
"summary",
|
||||||
|
"code",
|
||||||
|
"em",
|
||||||
|
"strong",
|
||||||
|
"b",
|
||||||
|
"i",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if it's a simple pattern like <tag>content</tag>
|
||||||
|
if (text.len < 7) return false; // Minimum: <a>x</a>
|
||||||
|
|
||||||
|
if (text[0] != '<') return false;
|
||||||
|
|
||||||
|
// Find the end of the opening tag
|
||||||
|
var tag_end: usize = 1;
|
||||||
|
while (tag_end < text.len and text[tag_end] != '>') {
|
||||||
|
tag_end += 1;
|
||||||
|
}
|
||||||
|
if (tag_end >= text.len) return false;
|
||||||
|
|
||||||
|
const tag_name = text[1..tag_end];
|
||||||
|
|
||||||
|
// Check if this is a safe tag
|
||||||
|
var is_safe_tag = false;
|
||||||
|
for (safe_content_tags) |safe_tag| {
|
||||||
|
if (std.mem.eql(u8, tag_name, safe_tag)) {
|
||||||
|
is_safe_tag = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_safe_tag) return false;
|
||||||
|
|
||||||
|
// Check if it ends with the corresponding closing tag
|
||||||
|
const expected_closing = std.fmt.allocPrint(std.heap.page_allocator, "</{s}>", .{tag_name}) catch return false;
|
||||||
|
defer std.heap.page_allocator.free(expected_closing);
|
||||||
|
|
||||||
|
return std.mem.endsWith(u8, text, expected_closing);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if text contains HTML that should be treated as complex/dangerous
|
||||||
|
fn containsDangerousHtml(text: []const u8) bool {
|
||||||
|
// If no HTML tags, it's safe
|
||||||
|
if (std.mem.indexOf(u8, text, "<") == null or std.mem.indexOf(u8, text, ">") == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dangerous patterns that should trigger fallback (case-insensitive check)
|
||||||
|
const dangerous_patterns = [_][]const u8{ "script", "iframe", "object", "embed", "form", "input", "button", "select", "textarea", "style", "link", "meta" };
|
||||||
|
|
||||||
|
// Simple case-insensitive check for dangerous patterns
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < text.len) {
|
||||||
|
if (text[i] == '<') {
|
||||||
|
// Extract tag name
|
||||||
|
i += 1;
|
||||||
|
const tag_start = i;
|
||||||
|
while (i < text.len and text[i] != ' ' and text[i] != '>' and text[i] != '/') {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
if (i > tag_start) {
|
||||||
|
const tag_name = text[tag_start..i];
|
||||||
|
for (dangerous_patterns) |dangerous| {
|
||||||
|
if (std.ascii.eqlIgnoreCase(tag_name, dangerous)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -612,3 +733,29 @@ test "html escaping" {
|
||||||
std.debug.print("HTML escaping test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
std.debug.print("HTML escaping test - Input: {s}\nOutput: {s}\n", .{ markdown, result.html });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "safe HTML passthrough" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
|
||||||
|
// Test details/summary tags
|
||||||
|
const markdown1 = "<details>\n<summary>Click to expand</summary>\nContent here\n</details>";
|
||||||
|
const result1 = try convertMarkdownToHtml(allocator, markdown1);
|
||||||
|
defer result1.deinit(allocator);
|
||||||
|
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result1.html, "<details>") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result1.html, "<summary>") != null);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result1.html, "</details>") != null);
|
||||||
|
try testing.expect(!result1.has_fallback);
|
||||||
|
|
||||||
|
// Test that dangerous HTML still triggers fallback
|
||||||
|
const markdown2 = "<script>alert('xss')</script>";
|
||||||
|
const result2 = try convertMarkdownToHtml(allocator, markdown2);
|
||||||
|
defer result2.deinit(allocator);
|
||||||
|
|
||||||
|
try testing.expect(result2.has_fallback);
|
||||||
|
try testing.expect(std.mem.indexOf(u8, result2.html, "<pre>") != null);
|
||||||
|
|
||||||
|
if (std.process.hasEnvVar(allocator, "test-debug") catch false) {
|
||||||
|
std.debug.print("Safe HTML test - Input: {s}\nOutput: {s}\nHas fallback: {}\n", .{ markdown1, result1.html, result1.has_fallback });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue