return message if no text available
This commit is contained in:
parent
a64e35ed75
commit
22aad3ed8f
2 changed files with 46 additions and 3 deletions
|
@ -322,14 +322,22 @@ pub const Message = struct {
|
|||
, .{text_content.?});
|
||||
}
|
||||
|
||||
var final_text = text_content orelse try allocator.dupe(u8, "no text or html versions available");
|
||||
|
||||
// If text is empty (e.g., HTML with only images without alt tags), provide fallback
|
||||
if (final_text.len == 0) {
|
||||
allocator.free(final_text);
|
||||
final_text = try allocator.dupe(u8, "Message contains only image data without alt tags");
|
||||
}
|
||||
|
||||
return .{
|
||||
.text = text_content orelse "no text or html versions available",
|
||||
.html = html_content orelse
|
||||
.text = final_text,
|
||||
.html = html_content orelse try allocator.dupe(u8,
|
||||
\\<html>
|
||||
\\<head><title>No text or HTML version available</title></head>
|
||||
\\<body>No text or HTML versions available</body>
|
||||
\\</html>
|
||||
,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,25 @@ pub fn htmlToText(allocator: std.mem.Allocator, html: []const u8) ![]const u8 {
|
|||
} else if (i + 8 <= html.len and std.mem.eql(u8, html[i .. i + 8], "</style>")) {
|
||||
in_style = false;
|
||||
i += 7;
|
||||
} else if (i + 4 <= html.len and std.mem.eql(u8, html[i .. i + 4], "<img")) {
|
||||
// Extract alt attribute from img tags
|
||||
const tag_end = std.mem.indexOfScalarPos(u8, html, i, '>') orelse html.len;
|
||||
const tag_content = html[i..tag_end];
|
||||
if (std.mem.indexOf(u8, tag_content, "alt=\"")) |alt_start| {
|
||||
const alt_value_start = alt_start + 5;
|
||||
if (std.mem.indexOfScalarPos(u8, tag_content, alt_value_start, '"')) |alt_end| {
|
||||
const alt_text = tag_content[alt_value_start..alt_end];
|
||||
if (alt_text.len > 0) {
|
||||
try result.append(allocator, '[');
|
||||
for (alt_text) |c| {
|
||||
try result.append(allocator, c);
|
||||
}
|
||||
try result.append(allocator, ']');
|
||||
}
|
||||
}
|
||||
}
|
||||
i = tag_end;
|
||||
in_tag = false;
|
||||
} else if ((i + 3 <= html.len and std.mem.eql(u8, html[i .. i + 3], "<br")) or
|
||||
(i + 3 <= html.len and std.mem.eql(u8, html[i .. i + 3], "<p>")) or
|
||||
(i + 4 <= html.len and std.mem.eql(u8, html[i .. i + 4], "<div")))
|
||||
|
@ -117,3 +136,19 @@ test "htmlToText - plain text" {
|
|||
defer allocator.free(text);
|
||||
try std.testing.expectEqualStrings("Just plain text", text);
|
||||
}
|
||||
|
||||
test "htmlToText - extracts img alt attributes" {
|
||||
const allocator = std.testing.allocator;
|
||||
const html = "<img src=\"test.jpg\" alt=\"Test Image\"><img alt=\"Another\" src=\"x.png\">";
|
||||
const text = try htmlToText(allocator, html);
|
||||
defer allocator.free(text);
|
||||
try std.testing.expectEqualStrings("[Test Image][Another]", text);
|
||||
}
|
||||
|
||||
test "htmlToText - img without alt" {
|
||||
const allocator = std.testing.allocator;
|
||||
const html = "<img src=\"test.jpg\">";
|
||||
const text = try htmlToText(allocator, html);
|
||||
defer allocator.free(text);
|
||||
try std.testing.expectEqualStrings("", text);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue