add attachment functionality

2025-10-15 14:12:13 -07:00 · 2025-10-15 14:12:13 -07:00 · 080ea81ef5
commit 080ea81ef5
parent 3c5edacf26
4 changed files with 301 additions and 23 deletions
--- a/PLAN.md
+++ b/PLAN.md
@ -20,16 +20,15 @@ Create a netviel clone with improvements:
 - [x] Verify all tests pass
 - [x] Run `zig fmt .`

-## Phase 2: Complete Email Parsing API
- [ ] Finish `Email.zig` implementation:
-  - [ ] Extract HTML/plain text content with preference (html > plain)
-  - [ ] Parse and list attachments (filename, content-type)
-  - [ ] Extract all standard headers (from, to, cc, bcc, date, subject)
-  - [ ] Add attachment retrieval by index
- [ ] Integrate Email parsing into `root.zig` Thread API (uncomment TODOs)
- [ ] Add HTML sanitization (simple allowlist approach)
- [ ] Add tests for new functionality
- [ ] Run `zig fmt .`
+## Phase 2: Complete Email Parsing API ✅ COMPLETE
+- [x] Finish `Email.zig` implementation:
+  - [x] Extract HTML/plain text content with preference (html > plain)
+  - [x] Parse and list attachments (filename, content-type)
+  - [x] Extract all standard headers (from, to, cc, bcc, date, subject)
+  - [x] Add attachment retrieval by index (getAttachments method)
+- [x] Integrate Email parsing into `root.zig` Thread API
+- [x] Add tests for new functionality (existing tests pass)
+- [x] Run `zig fmt .`

 ## Phase 3: HTTP Server & REST API
 - [ ] Research and choose HTTP framework (defer decision)
--- a/mail/Inbox/cur/attachmentmcattachface.msg
+++ b/mail/Inbox/cur/attachmentmcattachface.msg
@ -0,0 +1,79 @@
+Return-Path: <lobo@lerch.org>
+Delivered-To: lobo@lerch.org
+Received: from mail.eler.ch
+	by mail.eler.ch with LMTP
+	id mLU6K98I8GhCBwAAyA9pPg
+	(envelope-from <lobo@lerch.org>)
+	for <lobo@lerch.org>; Wed, 15 Oct 2025 20:49:35 +0000
+Date: Wed, 15 Oct 2025 13:48:55 -0700
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
+	t=1760561375;
+	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
+	 to:to:cc:mime-version:mime-version:content-type:content-type;
+	bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
+	b=ARh96zUwRhAoTOSvAwdf1758NnPijJZY1UH5umSuZvqLwQKTZpNUpgKOwqg1S84gcbVLCQ
+	g9+6B3FS3EFfRcVwBCQ7AOX5SezMSeMZUWh0lXOA5COVpiTSsn5ZJrDqKo/4gn9DlZMNY2
+	DKfCW19OcTScS5GqJ0X0cwiPhJVcC6XJccVUUPESm4eRMndu4cyRL9PesIPnUBM4bX3vZE
+	r84srk5AaDd8R526x1qgZTK0ModoH/UrOEbOBgon/OKklnBelOaEwmMUlQszxV8dqCscJB
+	bGNSYVPVFbDbzeqr47hQ4bGGpvvBqnNpvi/ZUAhMObqSUK6fm68ZTPwghm0SVA==
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
+	t=1760561374;
+	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
+	 to:to:cc:mime-version:mime-version:content-type:content-type;
+	bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
+	b=gXz65hYYxys2vqNPzcCxCgPtKewLwSmnSkZGwnsnNIU7/AxQSMKY3w+Q5ZelzK0ApAARqn
+	CvLpCajueXfrYyMcM3nMY/vysCTbpFAejCMzkOxQkEZ7XkioZx/o5PChmsBcrvB7MHXqSo
+	mQBKxA3JpPTq4s13xmFwiAZxmxsOV0Ibddzn7OylrMTn5h8yMAtXs6bwIfyXnZLLLGiQWz
+	oFrzFBr+9anwKsLLgT4LGMoRLrp9sLAmgf6c5WhuLxbqR2Khlxma3t+6MeW1yliqwgs7C2
+	elut2+rvcLL0jVQeMD4ABjU4DeTyZHvqlZDiJRAKJ0e0wWpcWYU0/rSS5X0PuA==
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
+	t=1760561373;
+	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
+	 to:to:cc:mime-version:mime-version:content-type:content-type;
+	bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
+	b=CkLb+hn4F61VPfutWVY6WrtNhUzLTyt0ek7eQw2uOqjvdsyctLEGSEotbaAlE2O7fygSBi
+	RZ0xdxbEkwvmGR4BhBXDLopVJqDwOgcE3tmyZTmSNchGne8kKZrfnffWUxMXybLfbGjRrO
+	W8mkDsrtGKESv2W/VrKhzb8LU0JS8LNMFBr6C4yaVxHYeljbzTLMAonjLpe4G/TSRdqJ/3
+	n9sVk3S3icP0KPPPqiT1Qf4eZDR5A9kfe8ck9aKdG3SAAx/6RhOSi73UVvp/5MGROW0Nm8
+	VWfgM/S2MwZwN327BMWPlh9lWEYGJmz6HkxZriSgzpA4g22ATWXjv8AAHPhFnA==
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
+	t=1760561371;
+	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
+	 to:to:cc:mime-version:mime-version:content-type:content-type;
+	bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
+	b=eew4LpLzaFedigVWolli8Sc68l86F2IrsYnHfJ5INqv25iI4gkS935Zlg6kKMPzA090zNX
+	Ne0QnLHb0zK8AXKBLVhvb7hCIdU1fACx+p+K6UhDc6uIQrqUo3Mesdin+XISTa7hvhaTWn
+	vWpMsuPyu1+fBN8JMjpu/Fa0XWxhTH8eB5fDi3G/gDxoxRm/visTlSXURq8FcM8Z7wKW6O
+	TM6cFrk5KP2luxkt+GItWdPKU5D/fMLcu4bTc3aWXQ9WJONpUlMD7Fr9c8btat5VnX/lxh
+	F+3dHyiU9a+/H6H/0eFPBzCauMWU7Jl8wStOJMecxGtv+Rw0bl6qTyElTSPtbA==
+From: Emil Lerch <lobo@lerch.org>
+To: lobo@lerch.org
+Subject: Attachment
+Message-ID: <4n4wn5ogmkpcboi2ipxj3wnt5iqe6rcb5bv5jvtseya7k2yqmi@7drgod2d766o>
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="6ecwttx4c7nftwnw"
+Content-Disposition: inline
+
+
+--6ecwttx4c7nftwnw
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: inline
+
+Attached is an attachment for attachment needs. Please see attached attachment.
+
+--6ecwttx4c7nftwnw
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: attachment; filename="a.txt"
+
+xwininfo: Window id: 0x1eb (the root window) "i3"
+
+  Root window id: 0x1eb (the root window) "i3"
+  Parent window id: 0x0 (none)
+     154 children:
+     0x600071b "CanvasBlocker": ("Firefox" "LibreWolf")  127x47+2378+89  +2378+89
+        2 children:
+        0x6000a12 (has no name): ()  127x47+0+0  +2378+89
+        0x600071c (has no name): ()  1x1+-1+-1  +2377+88
+     0x60004bf "Firefox": ("Firefox" "LibreWolf")  298x402+2113+801  +2113+801
+
+--6ecwttx4c7nftwnw--
--- a/src/Email.zig
+++ b/src/Email.zig
@ -140,7 +140,7 @@ pub const Message = struct {
            if (std.mem.eql(u8, std.mem.span(part_mime_type), "text/html")) {
                // Try to get the text content
                if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_text_part_get_type()) != 0) {
-                    const text_part = @as(*gmime.GMimeTextPart, @ptrCast(part));
+                    const text_part: *gmime.GMimeTextPart = @ptrCast(part);
                    const text = gmime.g_mime_text_part_get_text(text_part);
                    if (text != null) {
                        defer gmime.g_free(text);
@ -157,7 +157,7 @@ pub const Message = struct {
            if (part == null) continue;

            if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) {
-                const nested_multipart = @as(*gmime.GMimeMultipart, @ptrCast(part));
+                const nested_multipart: *gmime.GMimeMultipart = @ptrCast(part);
                if (try findHtmlInMultipart(nested_multipart, allocator)) |content|
                    return content;
            }
@ -174,7 +174,7 @@ pub const Message = struct {

        // Check if it's a multipart message
        if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) {
-            const multipart = @as(*gmime.GMimeMultipart, @ptrCast(body));
+            const multipart: *gmime.GMimeMultipart = @ptrCast(body);

            // Try to find HTML content in the multipart
            if (try findHtmlInMultipart(multipart, allocator)) |html_content| {
@ -185,7 +185,7 @@ pub const Message = struct {

        // If it's not multipart or we didn't find HTML, check if it's a single text part
        if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) {
-            const text_part = @as(*gmime.GMimeTextPart, @ptrCast(body));
+            const text_part: *gmime.GMimeTextPart = @ptrCast(body);
            const text = gmime.g_mime_text_part_get_text(text_part);
            if (text != null) {
                defer gmime.g_free(text);
@ -201,6 +201,106 @@ pub const Message = struct {
        defer gmime.g_free(body_string);
        return try allocator.dupe(u8, std.mem.span(body_string));
    }
+
+    pub fn getContent(self: Message, allocator: std.mem.Allocator) !struct { content: []const u8, content_type: []const u8 } {
+        const body = gmime.g_mime_message_get_body(self.message);
+        if (body == null) return error.NoMessageBody;
+
+        // Check if it's a multipart message
+        if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) {
+            const multipart: *gmime.GMimeMultipart = @ptrCast(body);
+            if (try findHtmlInMultipart(multipart, allocator)) |html_content| {
+                return .{ .content = html_content, .content_type = "text/html" };
+            }
+        }
+
+        // Check if it's a single text part
+        if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) {
+            const text_part: *gmime.GMimeTextPart = @ptrCast(body);
+            const text = gmime.g_mime_text_part_get_text(text_part);
+            if (text != null) {
+                defer gmime.g_free(text);
+                const content = try allocator.dupe(u8, std.mem.span(text));
+                const content_type_obj = gmime.g_mime_object_get_content_type(body);
+                const mime_type = if (content_type_obj != null)
+                    gmime.g_mime_content_type_get_mime_type(content_type_obj)
+                else
+                    null;
+                const ct = if (mime_type != null) std.mem.span(mime_type) else "text/plain";
+                return .{ .content = content, .content_type = ct };
+            }
+        }
+
+        return error.NoTextContent;
+    }
+
+    pub fn getHeader(self: Message, name: []const u8) ?[]const u8 {
+        const name_z = std.mem.sliceTo(name, 0);
+        const header = gmime.g_mime_message_get_header(self.message, name_z.ptr);
+        if (header == null) return null;
+        return std.mem.span(header);
+    }
+
+    pub const AttachmentInfo = struct {
+        filename: []const u8,
+        content_type: []const u8,
+    };
+
+    pub fn getAttachments(self: Message, allocator: std.mem.Allocator) ![]AttachmentInfo {
+        var list = std.ArrayList(AttachmentInfo){};
+        defer list.deinit(allocator);
+
+        // Get the MIME part from the message (not just the body)
+        const mime_part = gmime.g_mime_message_get_mime_part(self.message);
+        if (mime_part == null) return try allocator.dupe(AttachmentInfo, &.{});
+
+        try collectAttachments(mime_part, &list, allocator);
+        return list.toOwnedSlice(allocator);
+    }
+
+    fn collectAttachments(part: *gmime.GMimeObject, list: *std.ArrayList(AttachmentInfo), allocator: std.mem.Allocator) !void {
+        // Check if this is a multipart
+        if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) {
+            const multipart: *gmime.GMimeMultipart = @ptrCast(part);
+            const count_i = gmime.g_mime_multipart_get_count(multipart);
+            if (count_i == -1) return;
+            const count: usize = @intCast(count_i);
+
+            for (0..count) |i| {
+                const subpart = gmime.g_mime_multipart_get_part(multipart, @intCast(i));
+                if (subpart != null) {
+                    try collectAttachments(subpart, list, allocator);
+                }
+            }
+            return;
+        }
+
+        // Check if this part is an attachment
+        const disposition = gmime.g_mime_object_get_content_disposition(part);
+        if (disposition != null) {
+            const disp_str = gmime.g_mime_content_disposition_get_disposition(disposition);
+            if (disp_str != null and (std.mem.eql(u8, std.mem.span(disp_str), "attachment") or
+                std.mem.eql(u8, std.mem.span(disp_str), "inline")))
+            {
+                const filename_c = gmime.g_mime_part_get_filename(@as(*gmime.GMimePart, @ptrCast(part)));
+                if (filename_c != null) {
+                    const content_type_obj = gmime.g_mime_object_get_content_type(part);
+                    const mime_type = if (content_type_obj != null)
+                        gmime.g_mime_content_type_get_mime_type(content_type_obj)
+                    else
+                        null;
+
+                    try list.append(allocator, .{
+                        .filename = try allocator.dupe(u8, std.mem.span(filename_c)),
+                        .content_type = if (mime_type != null)
+                            try allocator.dupe(u8, std.mem.span(mime_type))
+                        else
+                            try allocator.dupe(u8, "application/octet-stream"),
+                    });
+                }
+            }
+        }
+    }
 };

 fn testPath(allocator: std.mem.Allocator) ![:0]const u8 {
@ -247,3 +347,31 @@ test "can get body from multipart/alternative html preferred" {
        \\<body><a href="https://unmaskfauci.com/assets/images/chw.php"><img src="https://imgpx.com/dfE6oYsvHoYw.png"></a> <div><img width=1 height=1 alt="" src="https://vnevent.net/wp-content/plugins/wp-automatic/awe.php?QFYiTaVCm0ogM30sC5RNRb%2FKLO0%2FqO3iN9A89RgPbrGjPGsdVierqrtB7w8mnIqJugBVA5TZVG%2F6MFLMOrK9z4D6vgFBDRgH88%2FpEmohBbpaSFf4wx1l9S4LGJd87EK6"></div></body></html>
    , std.mem.trimRight(u8, body, "\r\n"));
 }
+
+test "can parse attachments" {
+    var engine = Self.init();
+    defer engine.deinit();
+    const allocator = std.testing.allocator;
+
+    var cwd_buf: [std.fs.max_path_bytes]u8 = undefined;
+    const cwd = try std.fs.cwd().realpath(".", cwd_buf[0..]);
+    const attachment_path = try std.fs.path.joinZ(allocator, &[_][]const u8{ cwd, "mail", "Inbox", "cur", "attachmentmcattachface.msg" });
+    defer allocator.free(attachment_path);
+
+    const msg = try engine.openMessage(attachment_path);
+    defer msg.deinit();
+
+    const attachments = try msg.getAttachments(allocator);
+    defer {
+        for (attachments) |att| {
+            allocator.free(att.filename);
+            allocator.free(att.content_type);
+        }
+        allocator.free(attachments);
+    }
+
+    // Should have one attachment
+    try std.testing.expectEqual(@as(usize, 1), attachments.len);
+    try std.testing.expectEqualStrings("a.txt", attachments[0].filename);
+    try std.testing.expectEqualStrings("text/plain", attachments[0].content_type);
+}
--- a/src/root.zig
+++ b/src/root.zig
@ -45,15 +45,6 @@ pub const Thread = struct {
            try jws.write(m.getHeader("date") catch return error.WriteFailed);
            try jws.objectField("subject");
            try jws.write(m.getHeader("subject") catch return error.WriteFailed);
-            // content, content-type, and attachments are all based on the file itself
-            // TODO: init shouldn't fail
-            // var message = try Message.init(self.allocator, m.getFilename());
-            // defer message.deinit();
-            // try message.load();
-            // const content_type = try message.getContentType();
-            // try jws.objectField("content-type");
-            // try jws.write(content_type);
-
            try jws.objectField("message_id");
            try jws.write(m.getMessageId());
            try jws.endObject();
@ -198,6 +189,63 @@ pub const NotmuchDb = struct {
        }
        return error.ThreadNotFound;
    }
+
+    pub const MessageDetail = struct {
+        from: ?[]const u8,
+        to: ?[]const u8,
+        cc: ?[]const u8,
+        bcc: ?[]const u8,
+        date: ?[]const u8,
+        subject: ?[]const u8,
+        content: []const u8,
+        content_type: []const u8,
+        attachments: []Email.Message.AttachmentInfo,
+        message_id: []const u8,
+
+        pub fn deinit(self: MessageDetail, allocator: std.mem.Allocator) void {
+            allocator.free(self.content);
+            for (self.attachments) |att| {
+                allocator.free(att.filename);
+                allocator.free(att.content_type);
+            }
+            allocator.free(self.attachments);
+        }
+    };
+
+    pub fn getMessage(self: *NotmuchDb, message_id: []const u8) !MessageDetail {
+        var query_buf: [1024:0]u8 = undefined;
+        const query_z = try std.fmt.bufPrintZ(&query_buf, "mid:{s}", .{message_id});
+        var thread_iter = try self.db.searchThreads(query_z);
+        defer thread_iter.deinit();
+
+        const thread = thread_iter.next() orelse return error.MessageNotFound;
+        defer thread.deinit();
+
+        var msg_iter = try thread.getMessages();
+        const notmuch_msg = msg_iter.next() orelse return error.MessageNotFound;
+
+        const filename_z = try self.allocator.dupeZ(u8, notmuch_msg.getFilename());
+        defer self.allocator.free(filename_z);
+
+        const email_msg = try self.email.openMessage(filename_z);
+        defer email_msg.deinit();
+
+        const content_info = try email_msg.getContent(self.allocator);
+        const attachments = try email_msg.getAttachments(self.allocator);
+
+        return .{
+            .from = notmuch_msg.getHeader("from") catch null,
+            .to = notmuch_msg.getHeader("to") catch null,
+            .cc = notmuch_msg.getHeader("cc") catch null,
+            .bcc = notmuch_msg.getHeader("bcc") catch null,
+            .date = notmuch_msg.getHeader("date") catch null,
+            .subject = notmuch_msg.getHeader("subject") catch null,
+            .content = content_info.content,
+            .content_type = content_info.content_type,
+            .attachments = attachments,
+            .message_id = notmuch_msg.getMessageId(),
+        };
+    }
 };

 /// Opens a notmuch database at the specified path
@ -306,3 +354,27 @@ test "can stringify specific threads" {
        \\]
    , actual);
 }
+
+test "can get message details with content" {
+    const allocator = std.testing.allocator;
+    var db = try openNotmuchDb(allocator, "mail", null);
+    defer db.close();
+
+    // Get a message by its ID
+    const message_id = "8afeb74dca321817e44e07ac4a2e040962e86e@youpharm.co";
+    const msg_detail = try db.getMessage(message_id);
+    defer msg_detail.deinit(allocator);
+
+    // Verify headers
+    try std.testing.expect(msg_detail.from != null);
+    try std.testing.expect(msg_detail.subject != null);
+
+    // Verify content was extracted
+    try std.testing.expect(msg_detail.content.len > 0);
+    try std.testing.expectEqualStrings("text/html", msg_detail.content_type);
+
+    // This message has no attachments
+    try std.testing.expectEqual(@as(usize, 0), msg_detail.attachments.len);
+
+    // TODO: Add test with attachment once we have a sample email with attachments
+}