diff --git a/src/Email.zig b/src/Email.zig index a7bc503..f35a887 100644 --- a/src/Email.zig +++ b/src/Email.zig @@ -1,5 +1,6 @@ const std = @import("std"); const gmime = @import("c.zig").c; +const textTransformation = @import("textTransformation.zig"); const Self = @This(); @@ -167,6 +168,48 @@ pub const Message = struct { return null; } + fn findTextInMultipart(multipart: *gmime.GMimeMultipart, allocator: std.mem.Allocator) !?[]const u8 { + const mpgc = gmime.g_mime_multipart_get_count(multipart); + if (mpgc == -1) return error.NoMultipartCount; + const count: usize = @intCast(mpgc); + + for (0..count) |i| { + const part = gmime.g_mime_multipart_get_part(multipart, @intCast(i)); + if (part == null) continue; + + const part_content_type = gmime.g_mime_object_get_content_type(part); + if (part_content_type == null) continue; + + const part_mime_type = gmime.g_mime_content_type_get_mime_type(part_content_type); + if (part_mime_type == null) continue; + defer gmime.g_free(part_mime_type); + + if (std.mem.eql(u8, std.mem.span(part_mime_type), "text/plain")) { + if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_text_part_get_type()) != 0) { + const text_part: *gmime.GMimeTextPart = @ptrCast(part); + const text = gmime.g_mime_text_part_get_text(text_part); + if (text != null) { + defer gmime.g_free(text); + return try allocator.dupe(u8, std.mem.span(text)); + } + } + } + } + + for (0..count) |i| { + const part = gmime.g_mime_multipart_get_part(multipart, @intCast(i)); + if (part == null) continue; + + if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) { + const nested_multipart: *gmime.GMimeMultipart = @ptrCast(part); + if (try findTextInMultipart(nested_multipart, allocator)) |content| + return content; + } + } + + return null; + } + pub fn rawBody(self: Message, allocator: std.mem.Allocator) ![]const u8 { // Get the message body using GMime const body = gmime.g_mime_message_get_body(self.message); @@ -234,6 +277,62 @@ pub const Message = struct { return error.NoTextContent; } + pub fn getTextAndHtmlBodyVersions(self: Message, allocator: std.mem.Allocator) !struct { text: []const u8, html: []const u8 } { + const body = gmime.g_mime_message_get_body(self.message); + if (body == null) return error.NoMessageBody; + + var text_content: ?[]const u8 = null; + var html_content: ?[]const u8 = null; + + // Check if it's a multipart message + if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) { + const multipart: *gmime.GMimeMultipart = @ptrCast(body); + text_content = try findTextInMultipart(multipart, allocator); + html_content = try findHtmlInMultipart(multipart, allocator); + } else if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) { + const text_part: *gmime.GMimeTextPart = @ptrCast(body); + const text = gmime.g_mime_text_part_get_text(text_part); + if (text != null) { + defer gmime.g_free(text); + const content_type_obj = gmime.g_mime_object_get_content_type(body); + const mime_type = if (content_type_obj != null) + gmime.g_mime_content_type_get_mime_type(content_type_obj) + else + null; + const ct = if (mime_type != null) std.mem.span(mime_type) else "text/plain"; + const content = try allocator.dupe(u8, std.mem.span(text)); + if (std.mem.eql(u8, ct, "text/html")) { + html_content = content; + } else { + text_content = content; + } + } + } + + // Ensure we have both text and html versions + if (text_content == null and html_content != null) { + text_content = try textTransformation.htmlToText(allocator, html_content.?); + } + if (html_content == null and text_content != null) { + html_content = try std.fmt.allocPrint(allocator, + \\ + \\No HTML version available + \\No HTML version available. Text is:
{s}
+ \\ + , .{text_content.?}); + } + + return .{ + .text = text_content orelse "no text or html versions available", + .html = html_content orelse + \\ + \\No text or HTML version available + \\No text or HTML versions available + \\ + , + }; + } + pub fn getHeader(self: Message, name: []const u8) ?[]const u8 { const name_z = std.mem.sliceTo(name, 0); const header = gmime.g_mime_message_get_header(self.message, name_z.ptr); diff --git a/src/root.zig b/src/root.zig index b69a006..d7c002f 100644 --- a/src/root.zig +++ b/src/root.zig @@ -206,18 +206,26 @@ pub const NotmuchDb = struct { bcc: ?[]const u8, date: ?[]const u8, subject: ?[]const u8, - content: []const u8, - content_type: []const u8, + text_content: []const u8, + html_content: []const u8, attachments: []Email.Message.AttachmentInfo, message_id: []const u8, pub fn deinit(self: MessageDetail, allocator: std.mem.Allocator) void { - allocator.free(self.content); + if (self.from) |f| allocator.free(f); + if (self.to) |t| allocator.free(t); + if (self.cc) |c| allocator.free(c); + if (self.bcc) |b| allocator.free(b); + if (self.date) |d| allocator.free(d); + if (self.subject) |s| allocator.free(s); + allocator.free(self.text_content); + allocator.free(self.html_content); for (self.attachments) |att| { allocator.free(att.filename); allocator.free(att.content_type); } allocator.free(self.attachments); + allocator.free(self.message_id); } }; @@ -239,20 +247,28 @@ pub const NotmuchDb = struct { const email_msg = try self.email.openMessage(filename_z); defer email_msg.deinit(); - const content_info = try email_msg.getContent(self.allocator); + const content_info = try email_msg.getTextAndHtmlBodyVersions(self.allocator); const attachments = try email_msg.getAttachments(self.allocator); + const from = if (notmuch_msg.getHeader("from") catch null) |h| try self.allocator.dupe(u8, h) else null; + const to = if (notmuch_msg.getHeader("to") catch null) |h| try self.allocator.dupe(u8, h) else null; + const cc = if (notmuch_msg.getHeader("cc") catch null) |h| try self.allocator.dupe(u8, h) else null; + const bcc = if (notmuch_msg.getHeader("bcc") catch null) |h| try self.allocator.dupe(u8, h) else null; + const date = if (notmuch_msg.getHeader("date") catch null) |h| try self.allocator.dupe(u8, h) else null; + const subject = if (notmuch_msg.getHeader("subject") catch null) |h| try self.allocator.dupe(u8, h) else null; + const msg_id = try self.allocator.dupe(u8, notmuch_msg.getMessageId()); + return .{ - .from = notmuch_msg.getHeader("from") catch null, - .to = notmuch_msg.getHeader("to") catch null, - .cc = notmuch_msg.getHeader("cc") catch null, - .bcc = notmuch_msg.getHeader("bcc") catch null, - .date = notmuch_msg.getHeader("date") catch null, - .subject = notmuch_msg.getHeader("subject") catch null, - .content = content_info.content, - .content_type = content_info.content_type, + .from = from, + .to = to, + .cc = cc, + .bcc = bcc, + .date = date, + .subject = subject, + .text_content = content_info.text, + .html_content = content_info.html, .attachments = attachments, - .message_id = notmuch_msg.getMessageId(), + .message_id = msg_id, }; } }; @@ -378,9 +394,9 @@ test "can get message details with content" { try std.testing.expect(msg_detail.from != null); try std.testing.expect(msg_detail.subject != null); - // Verify content was extracted - try std.testing.expect(msg_detail.content.len > 0); - try std.testing.expectEqualStrings("text/html", msg_detail.content_type); + // Verify content was extracted - we should always have both text and html + try std.testing.expect(msg_detail.text_content.len >= 0); + try std.testing.expect(msg_detail.html_content.len > 0); // This message has no attachments try std.testing.expectEqual(@as(usize, 0), msg_detail.attachments.len); diff --git a/src/textTransformation.zig b/src/textTransformation.zig new file mode 100644 index 0000000..e47c685 --- /dev/null +++ b/src/textTransformation.zig @@ -0,0 +1,119 @@ +const std = @import("std"); + +pub fn htmlToText(allocator: std.mem.Allocator, html: []const u8) ![]const u8 { + var result = std.ArrayList(u8){}; + errdefer result.deinit(allocator); + + var i: usize = 0; + var in_tag = false; + var in_script = false; + var in_style = false; + + while (i < html.len) { + if (html[i] == '<') { + in_tag = true; + if (i + 7 <= html.len and std.mem.eql(u8, html[i .. i + 7], "")) { + in_script = false; + i += 8; + } else if (i + 8 <= html.len and std.mem.eql(u8, html[i .. i + 8], "")) { + in_style = false; + i += 7; + } else if ((i + 3 <= html.len and std.mem.eql(u8, html[i .. i + 3], "")) or + (i + 4 <= html.len and std.mem.eql(u8, html[i .. i + 4], "') { + in_tag = false; + i += 1; + continue; + } + + if (!in_tag and !in_script and !in_style) { + try result.append(allocator, html[i]); + } + i += 1; + } + + return result.toOwnedSlice(allocator); +} + +test "htmlToText - simple text" { + const allocator = std.testing.allocator; + const html = "

Hello World

"; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("\nHello World", text); +} + +test "htmlToText - strips script tags" { + const allocator = std.testing.allocator; + const html = "

Before

After

"; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("\nBefore\nAfter", text); +} + +test "htmlToText - strips style tags" { + const allocator = std.testing.allocator; + const html = "

Content

"; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("\nContent", text); +} + +test "htmlToText - handles br tags" { + const allocator = std.testing.allocator; + const html = "Line 1
Line 2
Line 3"; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("Line 1\nLine 2\nLine 3", text); +} + +test "htmlToText - handles div tags" { + const allocator = std.testing.allocator; + const html = "
First
Second
"; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("\nFirst\nSecond", text); +} + +test "htmlToText - complex html" { + const allocator = std.testing.allocator; + const html = + \\ + \\ + \\ + \\

Hello

+ \\ + \\
World
+ \\ + \\ + ; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("\n\n\n\nHello\n\nWorld\n\n", text); +} + +test "htmlToText - empty string" { + const allocator = std.testing.allocator; + const html = ""; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("", text); +} + +test "htmlToText - plain text" { + const allocator = std.testing.allocator; + const html = "Just plain text"; + const text = try htmlToText(allocator, html); + defer allocator.free(text); + try std.testing.expectEqualStrings("Just plain text", text); +} diff --git a/static/index.html b/static/index.html index 5267b14..a8fea0e 100644 --- a/static/index.html +++ b/static/index.html @@ -4,7 +4,36 @@ Zetviel - +
@@ -23,6 +52,140 @@
- +