add html/text versions of body

This commit is contained in:
Emil Lerch 2025-10-15 16:38:10 -07:00
parent c3c41ba080
commit a64e35ed75
Signed by: lobo
GPG key ID: A7B62D657EF764F8
4 changed files with 415 additions and 18 deletions

View file

@ -1,5 +1,6 @@
const std = @import("std"); const std = @import("std");
const gmime = @import("c.zig").c; const gmime = @import("c.zig").c;
const textTransformation = @import("textTransformation.zig");
const Self = @This(); const Self = @This();
@ -167,6 +168,48 @@ pub const Message = struct {
return null; return null;
} }
fn findTextInMultipart(multipart: *gmime.GMimeMultipart, allocator: std.mem.Allocator) !?[]const u8 {
const mpgc = gmime.g_mime_multipart_get_count(multipart);
if (mpgc == -1) return error.NoMultipartCount;
const count: usize = @intCast(mpgc);
for (0..count) |i| {
const part = gmime.g_mime_multipart_get_part(multipart, @intCast(i));
if (part == null) continue;
const part_content_type = gmime.g_mime_object_get_content_type(part);
if (part_content_type == null) continue;
const part_mime_type = gmime.g_mime_content_type_get_mime_type(part_content_type);
if (part_mime_type == null) continue;
defer gmime.g_free(part_mime_type);
if (std.mem.eql(u8, std.mem.span(part_mime_type), "text/plain")) {
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_text_part_get_type()) != 0) {
const text_part: *gmime.GMimeTextPart = @ptrCast(part);
const text = gmime.g_mime_text_part_get_text(text_part);
if (text != null) {
defer gmime.g_free(text);
return try allocator.dupe(u8, std.mem.span(text));
}
}
}
}
for (0..count) |i| {
const part = gmime.g_mime_multipart_get_part(multipart, @intCast(i));
if (part == null) continue;
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) {
const nested_multipart: *gmime.GMimeMultipart = @ptrCast(part);
if (try findTextInMultipart(nested_multipart, allocator)) |content|
return content;
}
}
return null;
}
pub fn rawBody(self: Message, allocator: std.mem.Allocator) ![]const u8 { pub fn rawBody(self: Message, allocator: std.mem.Allocator) ![]const u8 {
// Get the message body using GMime // Get the message body using GMime
const body = gmime.g_mime_message_get_body(self.message); const body = gmime.g_mime_message_get_body(self.message);
@ -234,6 +277,62 @@ pub const Message = struct {
return error.NoTextContent; return error.NoTextContent;
} }
pub fn getTextAndHtmlBodyVersions(self: Message, allocator: std.mem.Allocator) !struct { text: []const u8, html: []const u8 } {
const body = gmime.g_mime_message_get_body(self.message);
if (body == null) return error.NoMessageBody;
var text_content: ?[]const u8 = null;
var html_content: ?[]const u8 = null;
// Check if it's a multipart message
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) {
const multipart: *gmime.GMimeMultipart = @ptrCast(body);
text_content = try findTextInMultipart(multipart, allocator);
html_content = try findHtmlInMultipart(multipart, allocator);
} else if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) {
const text_part: *gmime.GMimeTextPart = @ptrCast(body);
const text = gmime.g_mime_text_part_get_text(text_part);
if (text != null) {
defer gmime.g_free(text);
const content_type_obj = gmime.g_mime_object_get_content_type(body);
const mime_type = if (content_type_obj != null)
gmime.g_mime_content_type_get_mime_type(content_type_obj)
else
null;
const ct = if (mime_type != null) std.mem.span(mime_type) else "text/plain";
const content = try allocator.dupe(u8, std.mem.span(text));
if (std.mem.eql(u8, ct, "text/html")) {
html_content = content;
} else {
text_content = content;
}
}
}
// Ensure we have both text and html versions
if (text_content == null and html_content != null) {
text_content = try textTransformation.htmlToText(allocator, html_content.?);
}
if (html_content == null and text_content != null) {
html_content = try std.fmt.allocPrint(allocator,
\\<html>
\\<head><title>No HTML version available</title></head>
\\<body>No HTML version available. Text is:<br><pre>{s}</pre></body>
\\</html>
, .{text_content.?});
}
return .{
.text = text_content orelse "no text or html versions available",
.html = html_content orelse
\\<html>
\\<head><title>No text or HTML version available</title></head>
\\<body>No text or HTML versions available</body>
\\</html>
,
};
}
pub fn getHeader(self: Message, name: []const u8) ?[]const u8 { pub fn getHeader(self: Message, name: []const u8) ?[]const u8 {
const name_z = std.mem.sliceTo(name, 0); const name_z = std.mem.sliceTo(name, 0);
const header = gmime.g_mime_message_get_header(self.message, name_z.ptr); const header = gmime.g_mime_message_get_header(self.message, name_z.ptr);

View file

@ -206,18 +206,26 @@ pub const NotmuchDb = struct {
bcc: ?[]const u8, bcc: ?[]const u8,
date: ?[]const u8, date: ?[]const u8,
subject: ?[]const u8, subject: ?[]const u8,
content: []const u8, text_content: []const u8,
content_type: []const u8, html_content: []const u8,
attachments: []Email.Message.AttachmentInfo, attachments: []Email.Message.AttachmentInfo,
message_id: []const u8, message_id: []const u8,
pub fn deinit(self: MessageDetail, allocator: std.mem.Allocator) void { pub fn deinit(self: MessageDetail, allocator: std.mem.Allocator) void {
allocator.free(self.content); if (self.from) |f| allocator.free(f);
if (self.to) |t| allocator.free(t);
if (self.cc) |c| allocator.free(c);
if (self.bcc) |b| allocator.free(b);
if (self.date) |d| allocator.free(d);
if (self.subject) |s| allocator.free(s);
allocator.free(self.text_content);
allocator.free(self.html_content);
for (self.attachments) |att| { for (self.attachments) |att| {
allocator.free(att.filename); allocator.free(att.filename);
allocator.free(att.content_type); allocator.free(att.content_type);
} }
allocator.free(self.attachments); allocator.free(self.attachments);
allocator.free(self.message_id);
} }
}; };
@ -239,20 +247,28 @@ pub const NotmuchDb = struct {
const email_msg = try self.email.openMessage(filename_z); const email_msg = try self.email.openMessage(filename_z);
defer email_msg.deinit(); defer email_msg.deinit();
const content_info = try email_msg.getContent(self.allocator); const content_info = try email_msg.getTextAndHtmlBodyVersions(self.allocator);
const attachments = try email_msg.getAttachments(self.allocator); const attachments = try email_msg.getAttachments(self.allocator);
const from = if (notmuch_msg.getHeader("from") catch null) |h| try self.allocator.dupe(u8, h) else null;
const to = if (notmuch_msg.getHeader("to") catch null) |h| try self.allocator.dupe(u8, h) else null;
const cc = if (notmuch_msg.getHeader("cc") catch null) |h| try self.allocator.dupe(u8, h) else null;
const bcc = if (notmuch_msg.getHeader("bcc") catch null) |h| try self.allocator.dupe(u8, h) else null;
const date = if (notmuch_msg.getHeader("date") catch null) |h| try self.allocator.dupe(u8, h) else null;
const subject = if (notmuch_msg.getHeader("subject") catch null) |h| try self.allocator.dupe(u8, h) else null;
const msg_id = try self.allocator.dupe(u8, notmuch_msg.getMessageId());
return .{ return .{
.from = notmuch_msg.getHeader("from") catch null, .from = from,
.to = notmuch_msg.getHeader("to") catch null, .to = to,
.cc = notmuch_msg.getHeader("cc") catch null, .cc = cc,
.bcc = notmuch_msg.getHeader("bcc") catch null, .bcc = bcc,
.date = notmuch_msg.getHeader("date") catch null, .date = date,
.subject = notmuch_msg.getHeader("subject") catch null, .subject = subject,
.content = content_info.content, .text_content = content_info.text,
.content_type = content_info.content_type, .html_content = content_info.html,
.attachments = attachments, .attachments = attachments,
.message_id = notmuch_msg.getMessageId(), .message_id = msg_id,
}; };
} }
}; };
@ -378,9 +394,9 @@ test "can get message details with content" {
try std.testing.expect(msg_detail.from != null); try std.testing.expect(msg_detail.from != null);
try std.testing.expect(msg_detail.subject != null); try std.testing.expect(msg_detail.subject != null);
// Verify content was extracted // Verify content was extracted - we should always have both text and html
try std.testing.expect(msg_detail.content.len > 0); try std.testing.expect(msg_detail.text_content.len >= 0);
try std.testing.expectEqualStrings("text/html", msg_detail.content_type); try std.testing.expect(msg_detail.html_content.len > 0);
// This message has no attachments // This message has no attachments
try std.testing.expectEqual(@as(usize, 0), msg_detail.attachments.len); try std.testing.expectEqual(@as(usize, 0), msg_detail.attachments.len);

119
src/textTransformation.zig Normal file
View file

@ -0,0 +1,119 @@
const std = @import("std");
pub fn htmlToText(allocator: std.mem.Allocator, html: []const u8) ![]const u8 {
var result = std.ArrayList(u8){};
errdefer result.deinit(allocator);
var i: usize = 0;
var in_tag = false;
var in_script = false;
var in_style = false;
while (i < html.len) {
if (html[i] == '<') {
in_tag = true;
if (i + 7 <= html.len and std.mem.eql(u8, html[i .. i + 7], "<script")) {
in_script = true;
} else if (i + 6 <= html.len and std.mem.eql(u8, html[i .. i + 6], "<style")) {
in_style = true;
} else if (i + 9 <= html.len and std.mem.eql(u8, html[i .. i + 9], "</script>")) {
in_script = false;
i += 8;
} else if (i + 8 <= html.len and std.mem.eql(u8, html[i .. i + 8], "</style>")) {
in_style = false;
i += 7;
} else if ((i + 3 <= html.len and std.mem.eql(u8, html[i .. i + 3], "<br")) or
(i + 3 <= html.len and std.mem.eql(u8, html[i .. i + 3], "<p>")) or
(i + 4 <= html.len and std.mem.eql(u8, html[i .. i + 4], "<div")))
{
try result.append(allocator, '\n');
}
i += 1;
continue;
} else if (html[i] == '>') {
in_tag = false;
i += 1;
continue;
}
if (!in_tag and !in_script and !in_style) {
try result.append(allocator, html[i]);
}
i += 1;
}
return result.toOwnedSlice(allocator);
}
test "htmlToText - simple text" {
const allocator = std.testing.allocator;
const html = "<p>Hello World</p>";
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("\nHello World", text);
}
test "htmlToText - strips script tags" {
const allocator = std.testing.allocator;
const html = "<p>Before</p><script>alert('test');</script><p>After</p>";
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("\nBefore\nAfter", text);
}
test "htmlToText - strips style tags" {
const allocator = std.testing.allocator;
const html = "<style>body { color: red; }</style><p>Content</p>";
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("\nContent", text);
}
test "htmlToText - handles br tags" {
const allocator = std.testing.allocator;
const html = "Line 1<br>Line 2<br/>Line 3";
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("Line 1\nLine 2\nLine 3", text);
}
test "htmlToText - handles div tags" {
const allocator = std.testing.allocator;
const html = "<div>First</div><div class='test'>Second</div>";
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("\nFirst\nSecond", text);
}
test "htmlToText - complex html" {
const allocator = std.testing.allocator;
const html =
\\<html>
\\<head><style>body { margin: 0; }</style></head>
\\<body>
\\<p>Hello</p>
\\<script>console.log('test');</script>
\\<div>World</div>
\\</body>
\\</html>
;
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("\n\n\n\nHello\n\nWorld\n\n", text);
}
test "htmlToText - empty string" {
const allocator = std.testing.allocator;
const html = "";
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("", text);
}
test "htmlToText - plain text" {
const allocator = std.testing.allocator;
const html = "Just plain text";
const text = try htmlToText(allocator, html);
defer allocator.free(text);
try std.testing.expectEqualStrings("Just plain text", text);
}

View file

@ -4,7 +4,36 @@
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Zetviel</title> <title>Zetviel</title>
<link rel="stylesheet" href="/style.css"> <style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: system-ui, -apple-system, sans-serif; line-height: 1.5; background: #1e1e1e; color: #e0e0e0; }
header { display: flex; justify-content: space-between; align-items: center; padding: 1rem; border-bottom: 1px solid #333; background: #252525; }
h1 { font-size: 1.5rem; }
.status-ok { color: #0f0; }
.status-error { color: #f00; }
.status-loading { color: #ff0; }
.search-bar { padding: 1rem; border-bottom: 1px solid #333; background: #252525; }
#search { width: 70%; padding: 0.5rem; border: 1px solid #444; border-radius: 4px; background: #2a2a2a; color: #e0e0e0; }
button { padding: 0.5rem 1rem; background: #0066cc; color: white; border: none; border-radius: 4px; cursor: pointer; }
button:hover { background: #0052a3; }
button:disabled { background: #444; cursor: not-allowed; }
.container { display: flex; height: calc(100vh - 140px); }
.thread-list { width: 40%; overflow-y: auto; border-right: 1px solid #333; }
.thread { padding: 1rem; border-bottom: 1px solid #2a2a2a; cursor: pointer; }
.thread:hover { background: #2a2a2a; }
.thread-subject { font-weight: bold; margin-bottom: 0.25rem; }
.thread-authors { color: #999; font-size: 0.9rem; }
.thread-date { color: #666; font-size: 0.85rem; }
.message-view { width: 60%; overflow-y: auto; padding: 1rem; }
.message { margin-bottom: 2rem; padding: 1rem; border: 1px solid #333; border-radius: 4px; background: #252525; }
.message-header { margin-bottom: 1rem; padding-bottom: 1rem; border-bottom: 1px solid #333; }
.message-content { margin-top: 1rem; }
.content { padding: 1rem; background: #2a2a2a; border-radius: 4px; }
.content pre { white-space: pre-wrap; word-wrap: break-word; }
.attachments { margin-top: 1rem; padding: 0.5rem; background: #3a3a00; border-radius: 4px; }
.loading { padding: 1rem; text-align: center; color: #999; }
.error { padding: 1rem; background: #3a0000; border: 1px solid #600; border-radius: 4px; margin: 1rem; }
</style>
</head> </head>
<body> <body>
<div id="app"> <div id="app">
@ -23,6 +52,140 @@
<div id="message-view" class="message-view"></div> <div id="message-view" class="message-view"></div>
</div> </div>
</div> </div>
<script src="/app.js"></script> <script>
let currentQuery = 'tag:inbox';
let isLoading = false;
async function api(endpoint) {
const res = await fetch(`/api/${endpoint}`);
if (!res.ok) throw new Error(`API error: ${res.status}`);
return res.json();
}
function setStatus(state) {
const status = document.getElementById('status');
status.className = state === 'loading' ? 'status-loading' : state === 'ok' ? 'status-ok' : 'status-error';
}
function setLoading(loading) {
isLoading = loading;
const btn = document.querySelector('.search-bar button');
btn.disabled = loading;
btn.textContent = loading ? 'Loading...' : 'Search';
}
async function search() {
if (isLoading) return;
const query = document.getElementById('search').value;
currentQuery = query;
history.pushState({ query }, '', `/?q=${encodeURIComponent(query)}`);
await loadThreads(query);
}
async function loadThreads(query) {
setLoading(true);
setStatus('loading');
const list = document.getElementById('thread-list');
try {
const threads = await api(`query/${encodeURIComponent(query)}`);
if (!threads || threads.length === 0) {
list.innerHTML = '<div class="loading">No threads found</div>';
} else {
list.innerHTML = threads.map(t => `
<div class="thread" onclick="loadThread('${t.thread_id}')">
<div class="thread-subject">${escapeHtml(t.subject)}</div>
<div class="thread-authors">${escapeHtml(t.authors)}</div>
<div class="thread-date">${new Date(t.newest_date * 1000).toLocaleString()}</div>
</div>
`).join('');
}
setStatus('ok');
} catch (e) {
list.innerHTML = `<div class="error">Error loading threads: ${escapeHtml(e.message)}</div>`;
setStatus('error');
} finally {
setLoading(false);
}
}
async function loadThread(threadId) {
setStatus('loading');
const view = document.getElementById('message-view');
view.innerHTML = '<div class="loading">Loading messages...</div>';
try {
const messages = await api(`thread/${threadId}`);
view.innerHTML = messages.map(m => `
<div class="message">
<div class="message-header">
<strong>From:</strong> ${escapeHtml(m.from || '')}<br>
<strong>To:</strong> ${escapeHtml(m.to || '')}<br>
<strong>Date:</strong> ${escapeHtml(m.date || '')}<br>
<strong>Subject:</strong> ${escapeHtml(m.subject || '')}
</div>
<div id="msg-${m.message_id}" class="message-content"><div class="loading">Loading content...</div></div>
</div>
`).join('');
setStatus('ok');
messages.forEach(m => loadMessageContent(m.message_id));
} catch (e) {
view.innerHTML = `<div class="error">Error loading thread: ${escapeHtml(e.message)}</div>`;
setStatus('error');
}
}
async function loadMessageContent(messageId) {
const div = document.getElementById(`msg-${messageId}`);
try {
const msg = await api(`message/${messageId}`);
div.innerHTML = `
<div class="content"><pre>${escapeHtml(msg.text_content)}</pre></div>
${msg.html_content ? `<button onclick="showHtml('${messageId}')">Show HTML Version</button>` : ''}
${msg.attachments.length ? `<div class="attachments">Attachments: ${msg.attachments.map(a => escapeHtml(a.filename)).join(', ')}</div>` : ''}
`;
div.dataset.html = msg.html_content || '';
} catch (e) {
div.innerHTML = `<div class="error">Error loading message: ${escapeHtml(e.message)}</div>`;
}
}
function showHtml(messageId) {
const div = document.getElementById(`msg-${messageId}`);
const html = div.dataset.html;
if (html) {
div.innerHTML = `
<div class="content">${html}</div>
<button onclick="loadMessageContent('${messageId}')">Show Text Version</button>
`;
}
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
window.addEventListener('DOMContentLoaded', () => {
const params = new URLSearchParams(location.search);
const query = params.get('q') || 'tag:inbox';
document.getElementById('search').value = query;
loadThreads(query);
document.getElementById('search').addEventListener('keypress', (e) => {
if (e.key === 'Enter') search();
});
});
window.addEventListener('popstate', (e) => {
if (e.state?.query) {
document.getElementById('search').value = e.state.query;
loadThreads(e.state.query);
}
});
</script>
</body> </body>
</html> </html>