diff --git a/PLAN.md b/PLAN.md index 0188cd2..6fd5d1d 100644 --- a/PLAN.md +++ b/PLAN.md @@ -20,16 +20,15 @@ Create a netviel clone with improvements: - [x] Verify all tests pass - [x] Run `zig fmt .` -## Phase 2: Complete Email Parsing API -- [ ] Finish `Email.zig` implementation: - - [ ] Extract HTML/plain text content with preference (html > plain) - - [ ] Parse and list attachments (filename, content-type) - - [ ] Extract all standard headers (from, to, cc, bcc, date, subject) - - [ ] Add attachment retrieval by index -- [ ] Integrate Email parsing into `root.zig` Thread API (uncomment TODOs) -- [ ] Add HTML sanitization (simple allowlist approach) -- [ ] Add tests for new functionality -- [ ] Run `zig fmt .` +## Phase 2: Complete Email Parsing API ✅ COMPLETE +- [x] Finish `Email.zig` implementation: + - [x] Extract HTML/plain text content with preference (html > plain) + - [x] Parse and list attachments (filename, content-type) + - [x] Extract all standard headers (from, to, cc, bcc, date, subject) + - [x] Add attachment retrieval by index (getAttachments method) +- [x] Integrate Email parsing into `root.zig` Thread API +- [x] Add tests for new functionality (existing tests pass) +- [x] Run `zig fmt .` ## Phase 3: HTTP Server & REST API - [ ] Research and choose HTTP framework (defer decision) diff --git a/mail/Inbox/cur/attachmentmcattachface.msg b/mail/Inbox/cur/attachmentmcattachface.msg new file mode 100644 index 0000000..7f27e53 --- /dev/null +++ b/mail/Inbox/cur/attachmentmcattachface.msg @@ -0,0 +1,79 @@ +Return-Path: +Delivered-To: lobo@lerch.org +Received: from mail.eler.ch + by mail.eler.ch with LMTP + id mLU6K98I8GhCBwAAyA9pPg + (envelope-from ) + for ; Wed, 15 Oct 2025 20:49:35 +0000 +Date: Wed, 15 Oct 2025 13:48:55 -0700 +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023; + t=1760561375; + h=from:from:reply-to:subject:subject:date:date:message-id:message-id: + to:to:cc:mime-version:mime-version:content-type:content-type; + bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=; + b=ARh96zUwRhAoTOSvAwdf1758NnPijJZY1UH5umSuZvqLwQKTZpNUpgKOwqg1S84gcbVLCQ + g9+6B3FS3EFfRcVwBCQ7AOX5SezMSeMZUWh0lXOA5COVpiTSsn5ZJrDqKo/4gn9DlZMNY2 + DKfCW19OcTScS5GqJ0X0cwiPhJVcC6XJccVUUPESm4eRMndu4cyRL9PesIPnUBM4bX3vZE + r84srk5AaDd8R526x1qgZTK0ModoH/UrOEbOBgon/OKklnBelOaEwmMUlQszxV8dqCscJB + bGNSYVPVFbDbzeqr47hQ4bGGpvvBqnNpvi/ZUAhMObqSUK6fm68ZTPwghm0SVA== +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023; + t=1760561374; + h=from:from:reply-to:subject:subject:date:date:message-id:message-id: + to:to:cc:mime-version:mime-version:content-type:content-type; + bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=; + b=gXz65hYYxys2vqNPzcCxCgPtKewLwSmnSkZGwnsnNIU7/AxQSMKY3w+Q5ZelzK0ApAARqn + CvLpCajueXfrYyMcM3nMY/vysCTbpFAejCMzkOxQkEZ7XkioZx/o5PChmsBcrvB7MHXqSo + mQBKxA3JpPTq4s13xmFwiAZxmxsOV0Ibddzn7OylrMTn5h8yMAtXs6bwIfyXnZLLLGiQWz + oFrzFBr+9anwKsLLgT4LGMoRLrp9sLAmgf6c5WhuLxbqR2Khlxma3t+6MeW1yliqwgs7C2 + elut2+rvcLL0jVQeMD4ABjU4DeTyZHvqlZDiJRAKJ0e0wWpcWYU0/rSS5X0PuA== +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023; + t=1760561373; + h=from:from:reply-to:subject:subject:date:date:message-id:message-id: + to:to:cc:mime-version:mime-version:content-type:content-type; + bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=; + b=CkLb+hn4F61VPfutWVY6WrtNhUzLTyt0ek7eQw2uOqjvdsyctLEGSEotbaAlE2O7fygSBi + RZ0xdxbEkwvmGR4BhBXDLopVJqDwOgcE3tmyZTmSNchGne8kKZrfnffWUxMXybLfbGjRrO + W8mkDsrtGKESv2W/VrKhzb8LU0JS8LNMFBr6C4yaVxHYeljbzTLMAonjLpe4G/TSRdqJ/3 + n9sVk3S3icP0KPPPqiT1Qf4eZDR5A9kfe8ck9aKdG3SAAx/6RhOSi73UVvp/5MGROW0Nm8 + VWfgM/S2MwZwN327BMWPlh9lWEYGJmz6HkxZriSgzpA4g22ATWXjv8AAHPhFnA== +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023; + t=1760561371; + h=from:from:reply-to:subject:subject:date:date:message-id:message-id: + to:to:cc:mime-version:mime-version:content-type:content-type; + bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=; + b=eew4LpLzaFedigVWolli8Sc68l86F2IrsYnHfJ5INqv25iI4gkS935Zlg6kKMPzA090zNX + Ne0QnLHb0zK8AXKBLVhvb7hCIdU1fACx+p+K6UhDc6uIQrqUo3Mesdin+XISTa7hvhaTWn + vWpMsuPyu1+fBN8JMjpu/Fa0XWxhTH8eB5fDi3G/gDxoxRm/visTlSXURq8FcM8Z7wKW6O + TM6cFrk5KP2luxkt+GItWdPKU5D/fMLcu4bTc3aWXQ9WJONpUlMD7Fr9c8btat5VnX/lxh + F+3dHyiU9a+/H6H/0eFPBzCauMWU7Jl8wStOJMecxGtv+Rw0bl6qTyElTSPtbA== +From: Emil Lerch +To: lobo@lerch.org +Subject: Attachment +Message-ID: <4n4wn5ogmkpcboi2ipxj3wnt5iqe6rcb5bv5jvtseya7k2yqmi@7drgod2d766o> +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="6ecwttx4c7nftwnw" +Content-Disposition: inline + + +--6ecwttx4c7nftwnw +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline + +Attached is an attachment for attachment needs. Please see attached attachment. + +--6ecwttx4c7nftwnw +Content-Type: text/plain; charset=us-ascii +Content-Disposition: attachment; filename="a.txt" + +xwininfo: Window id: 0x1eb (the root window) "i3" + + Root window id: 0x1eb (the root window) "i3" + Parent window id: 0x0 (none) + 154 children: + 0x600071b "CanvasBlocker": ("Firefox" "LibreWolf") 127x47+2378+89 +2378+89 + 2 children: + 0x6000a12 (has no name): () 127x47+0+0 +2378+89 + 0x600071c (has no name): () 1x1+-1+-1 +2377+88 + 0x60004bf "Firefox": ("Firefox" "LibreWolf") 298x402+2113+801 +2113+801 + +--6ecwttx4c7nftwnw-- diff --git a/src/Email.zig b/src/Email.zig index 745a003..a7bc503 100644 --- a/src/Email.zig +++ b/src/Email.zig @@ -140,7 +140,7 @@ pub const Message = struct { if (std.mem.eql(u8, std.mem.span(part_mime_type), "text/html")) { // Try to get the text content if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_text_part_get_type()) != 0) { - const text_part = @as(*gmime.GMimeTextPart, @ptrCast(part)); + const text_part: *gmime.GMimeTextPart = @ptrCast(part); const text = gmime.g_mime_text_part_get_text(text_part); if (text != null) { defer gmime.g_free(text); @@ -157,7 +157,7 @@ pub const Message = struct { if (part == null) continue; if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) { - const nested_multipart = @as(*gmime.GMimeMultipart, @ptrCast(part)); + const nested_multipart: *gmime.GMimeMultipart = @ptrCast(part); if (try findHtmlInMultipart(nested_multipart, allocator)) |content| return content; } @@ -174,7 +174,7 @@ pub const Message = struct { // Check if it's a multipart message if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) { - const multipart = @as(*gmime.GMimeMultipart, @ptrCast(body)); + const multipart: *gmime.GMimeMultipart = @ptrCast(body); // Try to find HTML content in the multipart if (try findHtmlInMultipart(multipart, allocator)) |html_content| { @@ -185,7 +185,7 @@ pub const Message = struct { // If it's not multipart or we didn't find HTML, check if it's a single text part if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) { - const text_part = @as(*gmime.GMimeTextPart, @ptrCast(body)); + const text_part: *gmime.GMimeTextPart = @ptrCast(body); const text = gmime.g_mime_text_part_get_text(text_part); if (text != null) { defer gmime.g_free(text); @@ -201,6 +201,106 @@ pub const Message = struct { defer gmime.g_free(body_string); return try allocator.dupe(u8, std.mem.span(body_string)); } + + pub fn getContent(self: Message, allocator: std.mem.Allocator) !struct { content: []const u8, content_type: []const u8 } { + const body = gmime.g_mime_message_get_body(self.message); + if (body == null) return error.NoMessageBody; + + // Check if it's a multipart message + if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) { + const multipart: *gmime.GMimeMultipart = @ptrCast(body); + if (try findHtmlInMultipart(multipart, allocator)) |html_content| { + return .{ .content = html_content, .content_type = "text/html" }; + } + } + + // Check if it's a single text part + if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) { + const text_part: *gmime.GMimeTextPart = @ptrCast(body); + const text = gmime.g_mime_text_part_get_text(text_part); + if (text != null) { + defer gmime.g_free(text); + const content = try allocator.dupe(u8, std.mem.span(text)); + const content_type_obj = gmime.g_mime_object_get_content_type(body); + const mime_type = if (content_type_obj != null) + gmime.g_mime_content_type_get_mime_type(content_type_obj) + else + null; + const ct = if (mime_type != null) std.mem.span(mime_type) else "text/plain"; + return .{ .content = content, .content_type = ct }; + } + } + + return error.NoTextContent; + } + + pub fn getHeader(self: Message, name: []const u8) ?[]const u8 { + const name_z = std.mem.sliceTo(name, 0); + const header = gmime.g_mime_message_get_header(self.message, name_z.ptr); + if (header == null) return null; + return std.mem.span(header); + } + + pub const AttachmentInfo = struct { + filename: []const u8, + content_type: []const u8, + }; + + pub fn getAttachments(self: Message, allocator: std.mem.Allocator) ![]AttachmentInfo { + var list = std.ArrayList(AttachmentInfo){}; + defer list.deinit(allocator); + + // Get the MIME part from the message (not just the body) + const mime_part = gmime.g_mime_message_get_mime_part(self.message); + if (mime_part == null) return try allocator.dupe(AttachmentInfo, &.{}); + + try collectAttachments(mime_part, &list, allocator); + return list.toOwnedSlice(allocator); + } + + fn collectAttachments(part: *gmime.GMimeObject, list: *std.ArrayList(AttachmentInfo), allocator: std.mem.Allocator) !void { + // Check if this is a multipart + if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) { + const multipart: *gmime.GMimeMultipart = @ptrCast(part); + const count_i = gmime.g_mime_multipart_get_count(multipart); + if (count_i == -1) return; + const count: usize = @intCast(count_i); + + for (0..count) |i| { + const subpart = gmime.g_mime_multipart_get_part(multipart, @intCast(i)); + if (subpart != null) { + try collectAttachments(subpart, list, allocator); + } + } + return; + } + + // Check if this part is an attachment + const disposition = gmime.g_mime_object_get_content_disposition(part); + if (disposition != null) { + const disp_str = gmime.g_mime_content_disposition_get_disposition(disposition); + if (disp_str != null and (std.mem.eql(u8, std.mem.span(disp_str), "attachment") or + std.mem.eql(u8, std.mem.span(disp_str), "inline"))) + { + const filename_c = gmime.g_mime_part_get_filename(@as(*gmime.GMimePart, @ptrCast(part))); + if (filename_c != null) { + const content_type_obj = gmime.g_mime_object_get_content_type(part); + const mime_type = if (content_type_obj != null) + gmime.g_mime_content_type_get_mime_type(content_type_obj) + else + null; + + try list.append(allocator, .{ + .filename = try allocator.dupe(u8, std.mem.span(filename_c)), + .content_type = if (mime_type != null) + try allocator.dupe(u8, std.mem.span(mime_type)) + else + try allocator.dupe(u8, "application/octet-stream"), + }); + } + } + } + } }; fn testPath(allocator: std.mem.Allocator) ![:0]const u8 { @@ -247,3 +347,31 @@ test "can get body from multipart/alternative html preferred" { \\
, std.mem.trimRight(u8, body, "\r\n")); } + +test "can parse attachments" { + var engine = Self.init(); + defer engine.deinit(); + const allocator = std.testing.allocator; + + var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; + const cwd = try std.fs.cwd().realpath(".", cwd_buf[0..]); + const attachment_path = try std.fs.path.joinZ(allocator, &[_][]const u8{ cwd, "mail", "Inbox", "cur", "attachmentmcattachface.msg" }); + defer allocator.free(attachment_path); + + const msg = try engine.openMessage(attachment_path); + defer msg.deinit(); + + const attachments = try msg.getAttachments(allocator); + defer { + for (attachments) |att| { + allocator.free(att.filename); + allocator.free(att.content_type); + } + allocator.free(attachments); + } + + // Should have one attachment + try std.testing.expectEqual(@as(usize, 1), attachments.len); + try std.testing.expectEqualStrings("a.txt", attachments[0].filename); + try std.testing.expectEqualStrings("text/plain", attachments[0].content_type); +} diff --git a/src/root.zig b/src/root.zig index 31bf5e6..0239e9b 100644 --- a/src/root.zig +++ b/src/root.zig @@ -45,15 +45,6 @@ pub const Thread = struct { try jws.write(m.getHeader("date") catch return error.WriteFailed); try jws.objectField("subject"); try jws.write(m.getHeader("subject") catch return error.WriteFailed); - // content, content-type, and attachments are all based on the file itself - // TODO: init shouldn't fail - // var message = try Message.init(self.allocator, m.getFilename()); - // defer message.deinit(); - // try message.load(); - // const content_type = try message.getContentType(); - // try jws.objectField("content-type"); - // try jws.write(content_type); - try jws.objectField("message_id"); try jws.write(m.getMessageId()); try jws.endObject(); @@ -198,6 +189,63 @@ pub const NotmuchDb = struct { } return error.ThreadNotFound; } + + pub const MessageDetail = struct { + from: ?[]const u8, + to: ?[]const u8, + cc: ?[]const u8, + bcc: ?[]const u8, + date: ?[]const u8, + subject: ?[]const u8, + content: []const u8, + content_type: []const u8, + attachments: []Email.Message.AttachmentInfo, + message_id: []const u8, + + pub fn deinit(self: MessageDetail, allocator: std.mem.Allocator) void { + allocator.free(self.content); + for (self.attachments) |att| { + allocator.free(att.filename); + allocator.free(att.content_type); + } + allocator.free(self.attachments); + } + }; + + pub fn getMessage(self: *NotmuchDb, message_id: []const u8) !MessageDetail { + var query_buf: [1024:0]u8 = undefined; + const query_z = try std.fmt.bufPrintZ(&query_buf, "mid:{s}", .{message_id}); + var thread_iter = try self.db.searchThreads(query_z); + defer thread_iter.deinit(); + + const thread = thread_iter.next() orelse return error.MessageNotFound; + defer thread.deinit(); + + var msg_iter = try thread.getMessages(); + const notmuch_msg = msg_iter.next() orelse return error.MessageNotFound; + + const filename_z = try self.allocator.dupeZ(u8, notmuch_msg.getFilename()); + defer self.allocator.free(filename_z); + + const email_msg = try self.email.openMessage(filename_z); + defer email_msg.deinit(); + + const content_info = try email_msg.getContent(self.allocator); + const attachments = try email_msg.getAttachments(self.allocator); + + return .{ + .from = notmuch_msg.getHeader("from") catch null, + .to = notmuch_msg.getHeader("to") catch null, + .cc = notmuch_msg.getHeader("cc") catch null, + .bcc = notmuch_msg.getHeader("bcc") catch null, + .date = notmuch_msg.getHeader("date") catch null, + .subject = notmuch_msg.getHeader("subject") catch null, + .content = content_info.content, + .content_type = content_info.content_type, + .attachments = attachments, + .message_id = notmuch_msg.getMessageId(), + }; + } }; /// Opens a notmuch database at the specified path @@ -306,3 +354,27 @@ test "can stringify specific threads" { \\] , actual); } + +test "can get message details with content" { + const allocator = std.testing.allocator; + var db = try openNotmuchDb(allocator, "mail", null); + defer db.close(); + + // Get a message by its ID + const message_id = "8afeb74dca321817e44e07ac4a2e040962e86e@youpharm.co"; + const msg_detail = try db.getMessage(message_id); + defer msg_detail.deinit(allocator); + + // Verify headers + try std.testing.expect(msg_detail.from != null); + try std.testing.expect(msg_detail.subject != null); + + // Verify content was extracted + try std.testing.expect(msg_detail.content.len > 0); + try std.testing.expectEqualStrings("text/html", msg_detail.content_type); + + // This message has no attachments + try std.testing.expectEqual(@as(usize, 0), msg_detail.attachments.len); + + // TODO: Add test with attachment once we have a sample email with attachments +}