add attachment functionality

This commit is contained in:
Emil Lerch 2025-10-15 14:12:13 -07:00
parent 3c5edacf26
commit 080ea81ef5
Signed by: lobo
GPG key ID: A7B62D657EF764F8
4 changed files with 301 additions and 23 deletions

19
PLAN.md
View file

@ -20,16 +20,15 @@ Create a netviel clone with improvements:
- [x] Verify all tests pass
- [x] Run `zig fmt .`
## Phase 2: Complete Email Parsing API
- [ ] Finish `Email.zig` implementation:
- [ ] Extract HTML/plain text content with preference (html > plain)
- [ ] Parse and list attachments (filename, content-type)
- [ ] Extract all standard headers (from, to, cc, bcc, date, subject)
- [ ] Add attachment retrieval by index
- [ ] Integrate Email parsing into `root.zig` Thread API (uncomment TODOs)
- [ ] Add HTML sanitization (simple allowlist approach)
- [ ] Add tests for new functionality
- [ ] Run `zig fmt .`
## Phase 2: Complete Email Parsing API ✅ COMPLETE
- [x] Finish `Email.zig` implementation:
- [x] Extract HTML/plain text content with preference (html > plain)
- [x] Parse and list attachments (filename, content-type)
- [x] Extract all standard headers (from, to, cc, bcc, date, subject)
- [x] Add attachment retrieval by index (getAttachments method)
- [x] Integrate Email parsing into `root.zig` Thread API
- [x] Add tests for new functionality (existing tests pass)
- [x] Run `zig fmt .`
## Phase 3: HTTP Server & REST API
- [ ] Research and choose HTTP framework (defer decision)

View file

@ -0,0 +1,79 @@
Return-Path: <lobo@lerch.org>
Delivered-To: lobo@lerch.org
Received: from mail.eler.ch
by mail.eler.ch with LMTP
id mLU6K98I8GhCBwAAyA9pPg
(envelope-from <lobo@lerch.org>)
for <lobo@lerch.org>; Wed, 15 Oct 2025 20:49:35 +0000
Date: Wed, 15 Oct 2025 13:48:55 -0700
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
t=1760561375;
h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
to:to:cc:mime-version:mime-version:content-type:content-type;
bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
b=ARh96zUwRhAoTOSvAwdf1758NnPijJZY1UH5umSuZvqLwQKTZpNUpgKOwqg1S84gcbVLCQ
g9+6B3FS3EFfRcVwBCQ7AOX5SezMSeMZUWh0lXOA5COVpiTSsn5ZJrDqKo/4gn9DlZMNY2
DKfCW19OcTScS5GqJ0X0cwiPhJVcC6XJccVUUPESm4eRMndu4cyRL9PesIPnUBM4bX3vZE
r84srk5AaDd8R526x1qgZTK0ModoH/UrOEbOBgon/OKklnBelOaEwmMUlQszxV8dqCscJB
bGNSYVPVFbDbzeqr47hQ4bGGpvvBqnNpvi/ZUAhMObqSUK6fm68ZTPwghm0SVA==
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
t=1760561374;
h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
to:to:cc:mime-version:mime-version:content-type:content-type;
bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
b=gXz65hYYxys2vqNPzcCxCgPtKewLwSmnSkZGwnsnNIU7/AxQSMKY3w+Q5ZelzK0ApAARqn
CvLpCajueXfrYyMcM3nMY/vysCTbpFAejCMzkOxQkEZ7XkioZx/o5PChmsBcrvB7MHXqSo
mQBKxA3JpPTq4s13xmFwiAZxmxsOV0Ibddzn7OylrMTn5h8yMAtXs6bwIfyXnZLLLGiQWz
oFrzFBr+9anwKsLLgT4LGMoRLrp9sLAmgf6c5WhuLxbqR2Khlxma3t+6MeW1yliqwgs7C2
elut2+rvcLL0jVQeMD4ABjU4DeTyZHvqlZDiJRAKJ0e0wWpcWYU0/rSS5X0PuA==
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
t=1760561373;
h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
to:to:cc:mime-version:mime-version:content-type:content-type;
bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
b=CkLb+hn4F61VPfutWVY6WrtNhUzLTyt0ek7eQw2uOqjvdsyctLEGSEotbaAlE2O7fygSBi
RZ0xdxbEkwvmGR4BhBXDLopVJqDwOgcE3tmyZTmSNchGne8kKZrfnffWUxMXybLfbGjRrO
W8mkDsrtGKESv2W/VrKhzb8LU0JS8LNMFBr6C4yaVxHYeljbzTLMAonjLpe4G/TSRdqJ/3
n9sVk3S3icP0KPPPqiT1Qf4eZDR5A9kfe8ck9aKdG3SAAx/6RhOSi73UVvp/5MGROW0Nm8
VWfgM/S2MwZwN327BMWPlh9lWEYGJmz6HkxZriSgzpA4g22ATWXjv8AAHPhFnA==
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=lerch.org; s=2023;
t=1760561371;
h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
to:to:cc:mime-version:mime-version:content-type:content-type;
bh=fK/8hI6FilkYWKOrKBjauM3coZWekxJWs7kueYzsykk=;
b=eew4LpLzaFedigVWolli8Sc68l86F2IrsYnHfJ5INqv25iI4gkS935Zlg6kKMPzA090zNX
Ne0QnLHb0zK8AXKBLVhvb7hCIdU1fACx+p+K6UhDc6uIQrqUo3Mesdin+XISTa7hvhaTWn
vWpMsuPyu1+fBN8JMjpu/Fa0XWxhTH8eB5fDi3G/gDxoxRm/visTlSXURq8FcM8Z7wKW6O
TM6cFrk5KP2luxkt+GItWdPKU5D/fMLcu4bTc3aWXQ9WJONpUlMD7Fr9c8btat5VnX/lxh
F+3dHyiU9a+/H6H/0eFPBzCauMWU7Jl8wStOJMecxGtv+Rw0bl6qTyElTSPtbA==
From: Emil Lerch <lobo@lerch.org>
To: lobo@lerch.org
Subject: Attachment
Message-ID: <4n4wn5ogmkpcboi2ipxj3wnt5iqe6rcb5bv5jvtseya7k2yqmi@7drgod2d766o>
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="6ecwttx4c7nftwnw"
Content-Disposition: inline
--6ecwttx4c7nftwnw
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Attached is an attachment for attachment needs. Please see attached attachment.
--6ecwttx4c7nftwnw
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="a.txt"
xwininfo: Window id: 0x1eb (the root window) "i3"
Root window id: 0x1eb (the root window) "i3"
Parent window id: 0x0 (none)
154 children:
0x600071b "CanvasBlocker": ("Firefox" "LibreWolf") 127x47+2378+89 +2378+89
2 children:
0x6000a12 (has no name): () 127x47+0+0 +2378+89
0x600071c (has no name): () 1x1+-1+-1 +2377+88
0x60004bf "Firefox": ("Firefox" "LibreWolf") 298x402+2113+801 +2113+801
--6ecwttx4c7nftwnw--

View file

@ -140,7 +140,7 @@ pub const Message = struct {
if (std.mem.eql(u8, std.mem.span(part_mime_type), "text/html")) {
// Try to get the text content
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_text_part_get_type()) != 0) {
const text_part = @as(*gmime.GMimeTextPart, @ptrCast(part));
const text_part: *gmime.GMimeTextPart = @ptrCast(part);
const text = gmime.g_mime_text_part_get_text(text_part);
if (text != null) {
defer gmime.g_free(text);
@ -157,7 +157,7 @@ pub const Message = struct {
if (part == null) continue;
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) {
const nested_multipart = @as(*gmime.GMimeMultipart, @ptrCast(part));
const nested_multipart: *gmime.GMimeMultipart = @ptrCast(part);
if (try findHtmlInMultipart(nested_multipart, allocator)) |content|
return content;
}
@ -174,7 +174,7 @@ pub const Message = struct {
// Check if it's a multipart message
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) {
const multipart = @as(*gmime.GMimeMultipart, @ptrCast(body));
const multipart: *gmime.GMimeMultipart = @ptrCast(body);
// Try to find HTML content in the multipart
if (try findHtmlInMultipart(multipart, allocator)) |html_content| {
@ -185,7 +185,7 @@ pub const Message = struct {
// If it's not multipart or we didn't find HTML, check if it's a single text part
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) {
const text_part = @as(*gmime.GMimeTextPart, @ptrCast(body));
const text_part: *gmime.GMimeTextPart = @ptrCast(body);
const text = gmime.g_mime_text_part_get_text(text_part);
if (text != null) {
defer gmime.g_free(text);
@ -201,6 +201,106 @@ pub const Message = struct {
defer gmime.g_free(body_string);
return try allocator.dupe(u8, std.mem.span(body_string));
}
pub fn getContent(self: Message, allocator: std.mem.Allocator) !struct { content: []const u8, content_type: []const u8 } {
const body = gmime.g_mime_message_get_body(self.message);
if (body == null) return error.NoMessageBody;
// Check if it's a multipart message
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_multipart_get_type()) != 0) {
const multipart: *gmime.GMimeMultipart = @ptrCast(body);
if (try findHtmlInMultipart(multipart, allocator)) |html_content| {
return .{ .content = html_content, .content_type = "text/html" };
}
}
// Check if it's a single text part
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(body)), gmime.g_mime_text_part_get_type()) != 0) {
const text_part: *gmime.GMimeTextPart = @ptrCast(body);
const text = gmime.g_mime_text_part_get_text(text_part);
if (text != null) {
defer gmime.g_free(text);
const content = try allocator.dupe(u8, std.mem.span(text));
const content_type_obj = gmime.g_mime_object_get_content_type(body);
const mime_type = if (content_type_obj != null)
gmime.g_mime_content_type_get_mime_type(content_type_obj)
else
null;
const ct = if (mime_type != null) std.mem.span(mime_type) else "text/plain";
return .{ .content = content, .content_type = ct };
}
}
return error.NoTextContent;
}
pub fn getHeader(self: Message, name: []const u8) ?[]const u8 {
const name_z = std.mem.sliceTo(name, 0);
const header = gmime.g_mime_message_get_header(self.message, name_z.ptr);
if (header == null) return null;
return std.mem.span(header);
}
pub const AttachmentInfo = struct {
filename: []const u8,
content_type: []const u8,
};
pub fn getAttachments(self: Message, allocator: std.mem.Allocator) ![]AttachmentInfo {
var list = std.ArrayList(AttachmentInfo){};
defer list.deinit(allocator);
// Get the MIME part from the message (not just the body)
const mime_part = gmime.g_mime_message_get_mime_part(self.message);
if (mime_part == null) return try allocator.dupe(AttachmentInfo, &.{});
try collectAttachments(mime_part, &list, allocator);
return list.toOwnedSlice(allocator);
}
fn collectAttachments(part: *gmime.GMimeObject, list: *std.ArrayList(AttachmentInfo), allocator: std.mem.Allocator) !void {
// Check if this is a multipart
if (gmime.g_type_check_instance_is_a(@as(*gmime.GTypeInstance, @ptrCast(part)), gmime.g_mime_multipart_get_type()) != 0) {
const multipart: *gmime.GMimeMultipart = @ptrCast(part);
const count_i = gmime.g_mime_multipart_get_count(multipart);
if (count_i == -1) return;
const count: usize = @intCast(count_i);
for (0..count) |i| {
const subpart = gmime.g_mime_multipart_get_part(multipart, @intCast(i));
if (subpart != null) {
try collectAttachments(subpart, list, allocator);
}
}
return;
}
// Check if this part is an attachment
const disposition = gmime.g_mime_object_get_content_disposition(part);
if (disposition != null) {
const disp_str = gmime.g_mime_content_disposition_get_disposition(disposition);
if (disp_str != null and (std.mem.eql(u8, std.mem.span(disp_str), "attachment") or
std.mem.eql(u8, std.mem.span(disp_str), "inline")))
{
const filename_c = gmime.g_mime_part_get_filename(@as(*gmime.GMimePart, @ptrCast(part)));
if (filename_c != null) {
const content_type_obj = gmime.g_mime_object_get_content_type(part);
const mime_type = if (content_type_obj != null)
gmime.g_mime_content_type_get_mime_type(content_type_obj)
else
null;
try list.append(allocator, .{
.filename = try allocator.dupe(u8, std.mem.span(filename_c)),
.content_type = if (mime_type != null)
try allocator.dupe(u8, std.mem.span(mime_type))
else
try allocator.dupe(u8, "application/octet-stream"),
});
}
}
}
}
};
fn testPath(allocator: std.mem.Allocator) ![:0]const u8 {
@ -247,3 +347,31 @@ test "can get body from multipart/alternative html preferred" {
\\<body><a href="https://unmaskfauci.com/assets/images/chw.php"><img src="https://imgpx.com/dfE6oYsvHoYw.png"></a> <div><img width=1 height=1 alt="" src="https://vnevent.net/wp-content/plugins/wp-automatic/awe.php?QFYiTaVCm0ogM30sC5RNRb%2FKLO0%2FqO3iN9A89RgPbrGjPGsdVierqrtB7w8mnIqJugBVA5TZVG%2F6MFLMOrK9z4D6vgFBDRgH88%2FpEmohBbpaSFf4wx1l9S4LGJd87EK6"></div></body></html>
, std.mem.trimRight(u8, body, "\r\n"));
}
test "can parse attachments" {
var engine = Self.init();
defer engine.deinit();
const allocator = std.testing.allocator;
var cwd_buf: [std.fs.max_path_bytes]u8 = undefined;
const cwd = try std.fs.cwd().realpath(".", cwd_buf[0..]);
const attachment_path = try std.fs.path.joinZ(allocator, &[_][]const u8{ cwd, "mail", "Inbox", "cur", "attachmentmcattachface.msg" });
defer allocator.free(attachment_path);
const msg = try engine.openMessage(attachment_path);
defer msg.deinit();
const attachments = try msg.getAttachments(allocator);
defer {
for (attachments) |att| {
allocator.free(att.filename);
allocator.free(att.content_type);
}
allocator.free(attachments);
}
// Should have one attachment
try std.testing.expectEqual(@as(usize, 1), attachments.len);
try std.testing.expectEqualStrings("a.txt", attachments[0].filename);
try std.testing.expectEqualStrings("text/plain", attachments[0].content_type);
}

View file

@ -45,15 +45,6 @@ pub const Thread = struct {
try jws.write(m.getHeader("date") catch return error.WriteFailed);
try jws.objectField("subject");
try jws.write(m.getHeader("subject") catch return error.WriteFailed);
// content, content-type, and attachments are all based on the file itself
// TODO: init shouldn't fail
// var message = try Message.init(self.allocator, m.getFilename());
// defer message.deinit();
// try message.load();
// const content_type = try message.getContentType();
// try jws.objectField("content-type");
// try jws.write(content_type);
try jws.objectField("message_id");
try jws.write(m.getMessageId());
try jws.endObject();
@ -198,6 +189,63 @@ pub const NotmuchDb = struct {
}
return error.ThreadNotFound;
}
pub const MessageDetail = struct {
from: ?[]const u8,
to: ?[]const u8,
cc: ?[]const u8,
bcc: ?[]const u8,
date: ?[]const u8,
subject: ?[]const u8,
content: []const u8,
content_type: []const u8,
attachments: []Email.Message.AttachmentInfo,
message_id: []const u8,
pub fn deinit(self: MessageDetail, allocator: std.mem.Allocator) void {
allocator.free(self.content);
for (self.attachments) |att| {
allocator.free(att.filename);
allocator.free(att.content_type);
}
allocator.free(self.attachments);
}
};
pub fn getMessage(self: *NotmuchDb, message_id: []const u8) !MessageDetail {
var query_buf: [1024:0]u8 = undefined;
const query_z = try std.fmt.bufPrintZ(&query_buf, "mid:{s}", .{message_id});
var thread_iter = try self.db.searchThreads(query_z);
defer thread_iter.deinit();
const thread = thread_iter.next() orelse return error.MessageNotFound;
defer thread.deinit();
var msg_iter = try thread.getMessages();
const notmuch_msg = msg_iter.next() orelse return error.MessageNotFound;
const filename_z = try self.allocator.dupeZ(u8, notmuch_msg.getFilename());
defer self.allocator.free(filename_z);
const email_msg = try self.email.openMessage(filename_z);
defer email_msg.deinit();
const content_info = try email_msg.getContent(self.allocator);
const attachments = try email_msg.getAttachments(self.allocator);
return .{
.from = notmuch_msg.getHeader("from") catch null,
.to = notmuch_msg.getHeader("to") catch null,
.cc = notmuch_msg.getHeader("cc") catch null,
.bcc = notmuch_msg.getHeader("bcc") catch null,
.date = notmuch_msg.getHeader("date") catch null,
.subject = notmuch_msg.getHeader("subject") catch null,
.content = content_info.content,
.content_type = content_info.content_type,
.attachments = attachments,
.message_id = notmuch_msg.getMessageId(),
};
}
};
/// Opens a notmuch database at the specified path
@ -306,3 +354,27 @@ test "can stringify specific threads" {
\\]
, actual);
}
test "can get message details with content" {
const allocator = std.testing.allocator;
var db = try openNotmuchDb(allocator, "mail", null);
defer db.close();
// Get a message by its ID
const message_id = "8afeb74dca321817e44e07ac4a2e040962e86e@youpharm.co";
const msg_detail = try db.getMessage(message_id);
defer msg_detail.deinit(allocator);
// Verify headers
try std.testing.expect(msg_detail.from != null);
try std.testing.expect(msg_detail.subject != null);
// Verify content was extracted
try std.testing.expect(msg_detail.content.len > 0);
try std.testing.expectEqualStrings("text/html", msg_detail.content_type);
// This message has no attachments
try std.testing.expectEqual(@as(usize, 0), msg_detail.attachments.len);
// TODO: Add test with attachment once we have a sample email with attachments
}