const std = @import("std"); const c = @cImport({ @cInclude("vosk_api.h"); @cInclude("alsa/asoundlib.h"); }); const SAMPLE_RATE = 16000; const BUFFER_SIZE = 4000; pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); // Initialize Vosk c.vosk_set_log_level(-1); const model = c.vosk_model_new("vosk-model-small-en-us-0.15"); if (model == null) { std.debug.print("Failed to load model\n", .{}); return; } defer c.vosk_model_free(model); const rec = c.vosk_recognizer_new(model, SAMPLE_RATE); if (rec == null) { std.debug.print("Failed to create recognizer\n", .{}); return; } defer c.vosk_recognizer_free(rec); // Try to open default capture device var handle: ?*c.snd_pcm_t = null; var err = c.snd_pcm_open(&handle, "default", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK); if (err < 0) { std.debug.print("Cannot open default audio device: {s}\n", .{c.snd_strerror(err)}); std.debug.print("Make sure no other applications are using the microphone\n", .{}); return; } defer _ = c.snd_pcm_close(handle); // Set to blocking mode err = c.snd_pcm_nonblock(handle, 0); if (err < 0) { std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)}); return; } // Configure audio - try simple parameters first err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 1, SAMPLE_RATE, 1, 100000); if (err < 0) { std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)}); return; } std.debug.print("Audio configured successfully\n", .{}); std.debug.print("Listening... (Ctrl+C to exit)\n", .{}); var buffer: [BUFFER_SIZE]i16 = undefined; var frame_count: u32 = 0; while (true) { const frames = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE / 2); if (frames < 0) { std.debug.print("Audio read error: {s}\n", .{c.snd_strerror(@intCast(frames))}); err = c.snd_pcm_recover(handle, @intCast(frames), 0); if (err < 0) { std.debug.print("Cannot recover from error: {s}\n", .{c.snd_strerror(err)}); break; } continue; } frame_count += 1; if (frame_count % 50 == 0) { // Show we're getting audio data var max_sample: u16 = 0; for (0..@intCast(frames)) |i| { const abs_sample = @abs(buffer[i]); if (abs_sample > max_sample) { max_sample = abs_sample; } } std.debug.print("Audio: {} frames, max level: {}\n", .{ frames, max_sample }); } const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&buffer), @intCast(frames * 2)); if (result != 0) { const text = c.vosk_recognizer_result(rec); std.debug.print("RECOGNIZED: {s}\n", .{text}); } } }