stt/src/main.zig

//! STT with callback-based event handling

const std = @import("std");
const builtin = @import("builtin");
const stt = @import("stt.zig");

/// Global flag for signal handling
var should_exit = std.atomic.Value(bool).init(false);

// SAFETY: we are setting this value at top of main before use
/// We need a global here to reclaim process when getting SIGCHLD
var handler: SpeechHandler = undefined;

const SpeechHandler = struct {
    allocator: std.mem.Allocator,
    speech_count: u32 = 0,
    error_count: u32 = 0,
    warning_count: u32 = 0,
    recoverable_error_count: u32 = 0,
    exec_program: ?[]const u8 = null,
    child_processes: std.ArrayList(*Process) = .{},
    reclaiming: std.atomic.Value(bool) = std.atomic.Value(bool).init(false),

    const max_children = 5;
    const Process = struct { child: ?*std.process.Child, start: i64, id: std.process.Child.Id }; // why id?
    /// Handle detected speech
    fn onSpeech(ctx: *anyopaque, text: []const u8) void {
        if (builtin.is_test) return; // Suppress output during tests

        const self: *SpeechHandler = @ptrCast(@alignCast(ctx));
        self.speech_count += 1;

        // Print with timestamp for better experience
        const timestamp = std.time.timestamp();
        var stdout_buffer: [1024]u8 = undefined;
        var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
        const stdout = &stdout_writer.interface;
        defer stdout.flush() catch std.log.warn("Caught error writing speech data to stdout", .{});
        stdout.print("[{}] Speech {}->{?s}: {s}\n", .{
            timestamp,
            self.speech_count,
            self.exec_program,
            text,
        }) catch std.log.warn("Caught error writing speech data to stdout", .{});

        // Execute program if specified
        if (self.exec_program) |program| self.exec(text) catch |err| {
            std.log.err("Failed to execute program '{s}': {}", .{ program, err });
        };
    }
    fn exec(self: *SpeechHandler, text: []const u8) !void {
        const program = self.exec_program.?; // should only be called when exec_program is not null
        // We need to be able to clean up at some point in the future, but we don't
        // care about these processes otherwise
        const process = try self.allocator.create(Process);
        errdefer self.allocator.destroy(process);
        process.* = .{
            .start = std.time.timestamp(),
            .child = try self.allocator.create(std.process.Child),
            // SAFETY: this is set 8 lines below before use
            .id = undefined,
        };
        process.child.?.* = std.process.Child.init(&[_][]const u8{ program, text }, self.allocator);
        try self.child_processes.append(self.allocator, process);
        errdefer _ = self.child_processes.pop();
        try process.child.?.spawn();
        try process.child.?.waitForSpawn();
        process.id = process.child.?.id;
        try self.reclaimProcessesPosix(false);
    }

    fn reclaimProcessesPosix(self: *SpeechHandler, reap_all: bool) !void {
        // We could end up called by two threads at the same time (via SIGCHLD and an actual speech event)
        // This code should present that
        if (self.reclaiming.cmpxchgStrong(false, true, .acquire, .acquire)) |_| return;
        defer self.reclaiming.store(false, .release);
        if (!reap_all and self.child_processes.items.len <= max_children) return;
        std.log.debug("Reclaiming memory from {s} processes", .{if (reap_all) "ALL" else "completed"});
        if (self.child_processes.items.len == 0) return;

        // If we're not reaping everything, we can just as well skip the last
        // one as we just started it
        const end = self.child_processes.items.len - @as(usize, if (reap_all) 0 else 1);
        const now = std.time.timestamp();
        for (0..end) |i| {
            const proc = self.child_processes.items[i];
            // Check timestamp. If we're either a) whacking everything, or b) 10 seconds have elapsed,
            // we kill it
            const should_kill = (proc.start + 10) <= now;
            if (proc.child == null or !should_kill) continue;
            const child = proc.child.?;
            const proc_exists = posixPidRunning(child.*) catch |err| {
                // not sure what we do here
                switch (err) {
                    error.ProcessNotFound => unreachable, // handled in posixPidRunning
                    error.PermissionDenied => {
                        std.log.err("Permission denied trying to reap pid {d}", .{child.id});
                        continue; // guess we'll keep it on the list and the OS will deal when we exit?
                    },
                    error.Unexpected => @panic("Unexpected error getting pid information. This should not happen"),
                }
            };
            if (!proc_exists) {
                _ = try child.wait(); // effectively deinit(). We don't care about term value (I hope?)
                self.allocator.destroy(child);
                proc.child = null;
                continue;
            }
            std.log.warn("Process ran longer than 10 seconds, killing pid {d}", .{child.id});
            proc.child = null; // avoid race condition between the kill below and the SIGCHLD processing
            _ = child.kill() catch |err| {
                // really should work at this point
                std.log.err("Permission denied trying to kill pid {d}: {}", .{ child.id, err });
                continue;
            };
            self.allocator.destroy(child);
        }

        if (reap_all) {
            std.log.debug("Shutting down, waiting for processes to finish", .{});
            for (self.child_processes.items) |proc| {
                if (proc.child) |c| {
                    // Child id seems undefined here for some reason, but on sigchld we're ok
                    // I suspect this might be a race condition somehow but not sure how
                    // We've worked around it by copying the pid out of the child into the process
                    // when we spawn it, then read that here, but it is the only place we use
                    // this value
                    std.log.info("Waiting on pid {d}", .{proc.id});
                    _ = try c.wait();
                    self.allocator.destroy(c);
                }
                self.allocator.destroy(proc);
            }
            self.child_processes.deinit(self.allocator);
            std.log.debug("All processes finished", .{});
            return;
        }
        // TODO: What's the right number here? We want to clear out memory from
        // the array list
        if (self.child_processes.items.len > 20) {
            std.log.debug("consolidating process tracking array", .{});
            var open_procs: usize = 0;
            for (self.child_processes.items) |proc| {
                if (proc.child) |_| open_procs += 1;
            }
            const cp = try self.child_processes.toOwnedSlice(self.allocator);
            defer self.allocator.free(cp);
            try self.child_processes.ensureTotalCapacity(self.allocator, open_procs);
            for (cp) |proc| {
                if (proc.child) |_|
                    self.child_processes.appendAssumeCapacity(proc)
                else
                    self.allocator.destroy(proc);
            }
        }
    }

    fn posixPidRunning(process: std.process.Child) std.posix.KillError!bool {
        // From https://man7.org/linux/man-pages/man2/kill.2.html:
        //
        //     If sig is 0, then no signal is sent, but existence and permission
        //     checks are still performed; this can be used to check for the
        //     existence of a process ID or process group ID that the caller is
        //     permitted to signal.
        std.posix.kill(process.id, 0) catch |err| {
            if (err == error.ProcessNotFound) return false;
            return err; // Permission denied
        };
        return true; // process is running
    }

    pub fn deinit(self: *SpeechHandler) void {
        self.reclaimProcessesPosix(true) catch |err| std.log.err("Error reclaiming processes: {}", .{err});
    }

    /// Handle basic errors (fallback for compatibility)
    fn onError(ctx: *anyopaque, error_code: stt.Error, message: []const u8) void {
        if (builtin.is_test) return; // Suppress output during tests

        const self: *SpeechHandler = @ptrCast(@alignCast(ctx));
        self.error_count += 1;

        // Print error with timestamp
        const timestamp = std.time.timestamp();
        std.log.err("[{}] Error #{} ({}): {s}", .{ timestamp, self.error_count, error_code, message });
    }

    /// Handle detailed errors with comprehensive information
    fn onDetailedError(ctx: *anyopaque, error_info: stt.ErrorInfo) void {
        const self: *SpeechHandler = @ptrCast(@alignCast(ctx));

        logDetail(self, error_info) catch |e|
            std.log.err("Error writing error {}. Original message: {s}", .{ e, error_info.message });
    }

    fn logDetail(self: *SpeechHandler, error_info: stt.ErrorInfo) !void {
        const log = std.log.scoped(.stt);
        // Categorize the error for statistics
        if (error_info.recoverable)
            self.recoverable_error_count += 1
        else
            self.error_count += 1;

        if (builtin.is_test) return; // Suppress output during tests

        // Format complete error message in a buffer
        var buffer: [2048]u8 = undefined;
        var stream = std.io.fixedBufferStream(&buffer);
        const writer = stream.writer();

        try writer.print("{s}", .{error_info.message});
        try writer.print("\n\tCode: {}", .{error_info.error_code});

        if (error_info.context) |context|
            try writer.print("\n\tContext: {s}", .{context});

        if (error_info.system_error) |sys_err|
            try writer.print("\n\tSystem Error: {} ({any})", .{ sys_err, error_info.error_code });

        if (error_info.recovery_suggestion) |suggestion| {
            if (std.mem.eql(u8, "Ready to start speech recognition", suggestion)) {
                self.recoverable_error_count -= 1;
                return; // that's stupid
            }
            try writer.print("\n\tSuggestion: {s}", .{suggestion});
        }

        if (error_info.recoverable)
            try writer.print("\n\tStatus: Recoverable - system will attempt to continue", .{})
        else
            try writer.print("\n\tStatus: Fatal - intervention may be required", .{});

        const message = stream.getWritten();

        // Determine and call appropriate log function once
        switch (error_info.error_code) {
            stt.Error.InternalError => if (error_info.recoverable) {
                log.info("{s}", .{message});
            } else {
                log.warn("{s}", .{message});
            },
            stt.Error.OutOfMemory,
            stt.Error.ModelLoadError,
            stt.Error.InitializationFailed,
            => log.err("{s}", .{message}),
            else => if (error_info.recoverable)
                log.warn("{s}", .{message})
            else
                log.err("{s}", .{message}),
        }
    }

    /// Get comprehensive statistics for summary
    fn getStats(self: *const SpeechHandler) struct {
        speech_count: u32,
        error_count: u32,
        warning_count: u32,
        recoverable_error_count: u32,
        total_issues: u32,
    } {
        const total_issues = self.error_count + self.warning_count + self.recoverable_error_count;
        return .{
            .speech_count = self.speech_count,
            .error_count = self.error_count,
            .warning_count = self.warning_count,
            .recoverable_error_count = self.recoverable_error_count,
            .total_issues = total_issues,
        };
    }
};

/// Signal handler for graceful shutdown
fn signalHandler(sig: i32) callconv(.c) void {
    if (sig == std.posix.SIG.INT) {
        should_exit.store(true, .release);
    }
}

fn signalAction(sig: i32, info: *const std.posix.siginfo_t, _: ?*anyopaque) callconv(.c) void {
    // NOTE: info only works correctly if std.posix.SA.SIGINFO is in the flags
    // std.log.debug("signal action. sig {d}", .{sig});
    if (sig == std.posix.SIG.CHLD) {
        const pid = info.fields.common.first.piduid.pid;
        std.log.debug("SIGCHLD on pid {d}", .{pid});
        for (handler.child_processes.items) |proc| {
            if (proc.child) |child| {
                if (child.id == pid) {
                    const term = child.wait() catch @panic("child.wait should not throw error at this point"); // I don't *think* this could fail at this point...
                    if (term == .Exited) { // this should be the only possible term value since the handler is set up with SA_NOCLDSTOP
                        if (term.Exited > 0) std.log.warn("Child process exited with non-zero return code {d}", .{term.Exited});
                        handler.allocator.destroy(child);
                        proc.child = null;
                    }
                }
            }
        }
        handler.reclaimProcessesPosix(false) catch |err| {
            std.log.err("Caught error reclaiming processes. This is fatal, shutting down. Error: {}", .{err});
            signalHandler(std.posix.SIG.INT);
        };
    }
}

pub fn main() !void {
    const stdout = std.fs.File.stdout();
    const stderr = std.fs.File.stderr();

    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    // Check and set ALSA_CONFIG_PATH if not set
    if (std.posix.getenv("ALSA_CONFIG_PATH") == null) {
        std.fs.cwd().access("alsa.conf", .{}) catch {
            _ = std.fs.File.stderr().writeAll("Error: alsa.conf file not found. Please put alsa.conf in the current directory or set ALSA_CONFIG_PATH\n") catch {};
            std.process.exit(1);
        };
        const c = @cImport({
            @cInclude("stdlib.h");
        });
        _ = c.setenv("ALSA_CONFIG_PATH", "alsa.conf", 1);
    }

    const sigintact = std.posix.Sigaction{
        .handler = .{ .handler = signalHandler },
        .mask = std.posix.sigemptyset(),
        .flags = 0,
    };
    std.posix.sigaction(std.c.SIG.INT, &sigintact, null);
    const sigchldact = std.posix.Sigaction{
        .handler = .{ .sigaction = signalAction },
        .mask = std.posix.sigemptyset(),
        .flags = std.posix.SA.NOCLDSTOP | std.posix.SA.SIGINFO,
    };
    std.posix.sigaction(std.c.SIG.CHLD, &sigchldact, null);

    // Parse command line arguments
    const args = try std.process.argsAlloc(allocator);
    defer std.process.argsFree(allocator, args);

    var model_path: ?[]const u8 = null;
    var exec_program: ?[]const u8 = null;

    // Parse --model and --exec arguments
    for (args[1..]) |arg| {
        if (std.mem.startsWith(u8, arg, "--model=")) {
            model_path = arg[8..]; // Skip "--model="
        } else if (std.mem.startsWith(u8, arg, "--exec=")) {
            exec_program = arg[7..]; // Skip "--exec="
        }
    }

    // Create handler with statistics tracking
    handler = SpeechHandler{
        .allocator = allocator,
        .exec_program = exec_program,
    };
    defer handler.deinit();
    const speech_handler = stt.SpeechEventHandler{
        .onSpeechFn = SpeechHandler.onSpeech,
        .onErrorFn = SpeechHandler.onError,
        .onDetailedErrorFn = SpeechHandler.onDetailedError,
        .ctx = &handler,
    };

    // If no model specified, try default locations
    const default_paths = [_][]const u8{
        "vosk-model-small-en-us-0.15",
        "zig-out/bin/vosk-model-small-en-us-0.15",
        "/usr/share/vosk/models/vosk-model-small-en-us-0.15",
    };

    if (model_path == null) {
        for (default_paths) |path| {
            std.fs.cwd().access(path, .{}) catch continue;
            model_path = path;
            break;
        }
    }

    // Check if model path exists
    if (model_path == null) {
        _ = try stderr.writeAll("Error: Vosk model not found.\n\n");
        _ = try stderr.writeAll("Usage: stt [--model=<path>] [--exec=<program>]\n\n");
        _ = try stderr.writeAll("Locations searched:\n");
        inline for (default_paths) |path|
            _ = try stderr.writeAll("\t" ++ path ++ "\n");
        _ = try stderr.writeAll("Please download the model. A fine model can be downloaded from:\n");
        _ = try stderr.writeAll("\thttps://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip\n");
        std.process.exit(1);
    }

    std.fs.cwd().access(model_path.?, .{}) catch {
        std.log.err("Model path does not exist: {s}", .{model_path.?});
        std.process.exit(1);
    };

    // Initialize STT session with resolved model path
    const options = stt.Options{
        .model_path = model_path.?,
        .audio_device = "default", // Use ALSA default device from alsa.conf
        .event_handler = speech_handler,
        .sample_rate = 16000, // Standard sample rate for speech recognition
        .channels = 2, // Stereo input (will be converted to mono internally)
        .buffer_size = 256, // Existing buffer size for low latency
    };

    std.log.debug("Initializing STT library...", .{});
    var session = stt.Session.init(allocator, options) catch |err| {
        std.log.err("Failed to initialize STT library: {}", .{err});
        std.log.err("Please ensure:", .{});
        std.log.err("  - Audio device '{s}' is available", .{options.audio_device});
        std.log.err("  - Model directory exists at: {s}", .{options.model_path});
        std.log.err("  - You have permission to access the audio device", .{});
        return;
    };
    defer session.deinit();

    std.log.info("Program to execute on speech detection: {?s}", .{exec_program});
    std.log.info("STT library initialized successfully with configuration:", .{});
    std.log.info("  Model path: {s}", .{options.model_path});
    std.log.info("  Audio device: {s}", .{options.audio_device});
    std.log.info("  Sample rate: {} Hz", .{options.sample_rate});
    std.log.info("  Channels: {} (converted to mono)", .{options.channels});
    std.log.info("  Buffer size: {} frames", .{options.buffer_size});
    std.log.info("", .{});

    // Start listening for speech with error handling
    _ = stdout.writeAll("Starting speech recognition...\n") catch {};
    session.start_listening() catch |err| {
        std.log.err("Failed to start listening: {}", .{err});
        switch (err) {
            stt.Error.AudioDeviceError => {
                std.log.err("Audio device error. Please check:", .{});
                std.log.err("  - Device '{s}' exists and is accessible", .{options.audio_device});
                std.log.err("  - No other application is using the device", .{});
                std.log.err("  - You have permission to access audio devices", .{});
            },
            stt.Error.ThreadingError => {
                std.log.err("Threading error. System may be under heavy load.", .{});
            },
            else => {
                std.log.err("Unexpected error during startup.", .{});
            },
        }
        return;
    };
    defer session.stop_listening();

    std.log.info("Speech recognition started successfully", .{});
    _ = stdout.writeAll("Listening for speech... (Press Ctrl+C to exit)\n") catch {};
    _ = stdout.writeAll("Speak into your microphone to see speech recognition results\n") catch {};
    _ = stdout.writeAll("------------------------------------------------------------\n") catch {};

    // Main loop - wait for Ctrl+C signal
    while (!should_exit.load(.acquire)) {
        // Sleep for a short time to avoid busy waiting
        std.Thread.sleep(100 * std.time.ns_per_ms); // 100ms

        // Check if session is still listening (in case of errors)
        if (!session.is_listening()) {
            std.log.err("Speech recognition stopped unexpectedly.", .{});
            break;
        }
    }

    _ = stdout.writeAll("\n----------------------------------------\n") catch {};
    _ = stdout.writeAll("Shutdown signal received, stopping...\n") catch {};

    // Get final statistics from handler
    const stats = handler.getStats();
    std.log.info("Demo Session Summary:", .{});
    std.log.info("  Speech detections: {}", .{stats.speech_count});
    std.log.info("  Fatal errors: {}", .{stats.error_count});
    std.log.info("  Recoverable errors: {}", .{stats.recoverable_error_count});
    std.log.info("  Total issues: {}\n", .{stats.total_issues});

    // Print seperately since ^^ are info calls and vv is an error call
    if (stats.error_count > 0)
        std.log.err("✗ {d} fatal errors occurred during speech recognition.", .{stats.error_count});

    _ = stdout.writeAll("Session completed successfully.\n") catch {};
}

test "handler callbacks" {
    const testing = std.testing;

    var sh = SpeechHandler{ .allocator = std.testing.allocator };
    const speech_handler = stt.SpeechEventHandler{
        .onSpeechFn = SpeechHandler.onSpeech,
        .onErrorFn = SpeechHandler.onError,
        .ctx = &sh,
    };

    // Test that callbacks can be invoked without crashing
    speech_handler.onSpeech("test speech");
    speech_handler.onError(stt.Error.AudioDeviceError, "test error");

    // If we get here without crashing, the test passes
    try testing.expect(true);
}