stt/src/main.zig

const std = @import("std");
const c = @cImport({
    @cInclude("vosk_api.h");
    @cInclude("alsa/asoundlib.h");
});

const SAMPLE_RATE = 16000;
const BUFFER_SIZE = 4000;

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();

    // Initialize Vosk
    c.vosk_set_log_level(-1);
    const model = c.vosk_model_new("vosk-model-small-en-us-0.15");
    if (model == null) {
        std.debug.print("Failed to load model\n", .{});
        return;
    }
    defer c.vosk_model_free(model);

    const rec = c.vosk_recognizer_new(model, SAMPLE_RATE);
    if (rec == null) {
        std.debug.print("Failed to create recognizer\n", .{});
        return;
    }
    defer c.vosk_recognizer_free(rec);

    // Try to open default capture device
    var handle: ?*c.snd_pcm_t = null;
    var err = c.snd_pcm_open(&handle, "default", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK);
    if (err < 0) {
        std.debug.print("Cannot open default audio device: {s}\n", .{c.snd_strerror(err)});
        std.debug.print("Make sure no other applications are using the microphone\n", .{});
        return;
    }
    defer _ = c.snd_pcm_close(handle);

    // Set to blocking mode
    err = c.snd_pcm_nonblock(handle, 0);
    if (err < 0) {
        std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)});
        return;
    }

    // Configure audio - try simple parameters first
    err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 1, SAMPLE_RATE, 1, 100000);
    if (err < 0) {
        std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)});
        return;
    }

    std.debug.print("Audio configured successfully\n", .{});
    std.debug.print("Listening... (Ctrl+C to exit)\n", .{});

    var buffer: [BUFFER_SIZE]i16 = undefined;
    var frame_count: u32 = 0;

    while (true) {
        const frames = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE / 2);
        if (frames < 0) {
            std.debug.print("Audio read error: {s}\n", .{c.snd_strerror(@intCast(frames))});
            err = c.snd_pcm_recover(handle, @intCast(frames), 0);
            if (err < 0) {
                std.debug.print("Cannot recover from error: {s}\n", .{c.snd_strerror(err)});
                break;
            }
            continue;
        }

        frame_count += 1;
        if (frame_count % 50 == 0) {
            // Show we're getting audio data
            var max_sample: u16 = 0;
            for (0..@intCast(frames)) |i| {
                const abs_sample = @abs(buffer[i]);
                if (abs_sample > max_sample) {
                    max_sample = abs_sample;
                }
            }
            std.debug.print("Audio: {} frames, max level: {}\n", .{ frames, max_sample });
        }

        const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&buffer), @intCast(frames * 2));
        if (result != 0) {
            const text = c.vosk_recognizer_result(rec);
            std.debug.print("RECOGNIZED: {s}\n", .{text});
        }
    }
}