91 lines
3 KiB
Zig
91 lines
3 KiB
Zig
const std = @import("std");
|
|
const c = @cImport({
|
|
@cInclude("vosk_api.h");
|
|
@cInclude("alsa/asoundlib.h");
|
|
});
|
|
|
|
const SAMPLE_RATE = 16000;
|
|
const BUFFER_SIZE = 4000;
|
|
|
|
pub fn main() !void {
|
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
|
defer _ = gpa.deinit();
|
|
|
|
// Initialize Vosk
|
|
c.vosk_set_log_level(-1);
|
|
const model = c.vosk_model_new("vosk-model-small-en-us-0.15");
|
|
if (model == null) {
|
|
std.debug.print("Failed to load model\n", .{});
|
|
return;
|
|
}
|
|
defer c.vosk_model_free(model);
|
|
|
|
const rec = c.vosk_recognizer_new(model, SAMPLE_RATE);
|
|
if (rec == null) {
|
|
std.debug.print("Failed to create recognizer\n", .{});
|
|
return;
|
|
}
|
|
defer c.vosk_recognizer_free(rec);
|
|
|
|
// Try to open default capture device
|
|
var handle: ?*c.snd_pcm_t = null;
|
|
var err = c.snd_pcm_open(&handle, "default", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK);
|
|
if (err < 0) {
|
|
std.debug.print("Cannot open default audio device: {s}\n", .{c.snd_strerror(err)});
|
|
std.debug.print("Make sure no other applications are using the microphone\n", .{});
|
|
return;
|
|
}
|
|
defer _ = c.snd_pcm_close(handle);
|
|
|
|
// Set to blocking mode
|
|
err = c.snd_pcm_nonblock(handle, 0);
|
|
if (err < 0) {
|
|
std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)});
|
|
return;
|
|
}
|
|
|
|
// Configure audio - try simple parameters first
|
|
err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 1, SAMPLE_RATE, 1, 100000);
|
|
if (err < 0) {
|
|
std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)});
|
|
return;
|
|
}
|
|
|
|
std.debug.print("Audio configured successfully\n", .{});
|
|
std.debug.print("Listening... (Ctrl+C to exit)\n", .{});
|
|
|
|
var buffer: [BUFFER_SIZE]i16 = undefined;
|
|
var frame_count: u32 = 0;
|
|
|
|
while (true) {
|
|
const frames = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE / 2);
|
|
if (frames < 0) {
|
|
std.debug.print("Audio read error: {s}\n", .{c.snd_strerror(@intCast(frames))});
|
|
err = c.snd_pcm_recover(handle, @intCast(frames), 0);
|
|
if (err < 0) {
|
|
std.debug.print("Cannot recover from error: {s}\n", .{c.snd_strerror(err)});
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
frame_count += 1;
|
|
if (frame_count % 50 == 0) {
|
|
// Show we're getting audio data
|
|
var max_sample: u16 = 0;
|
|
for (0..@intCast(frames)) |i| {
|
|
const abs_sample = @abs(buffer[i]);
|
|
if (abs_sample > max_sample) {
|
|
max_sample = abs_sample;
|
|
}
|
|
}
|
|
std.debug.print("Audio: {} frames, max level: {}\n", .{ frames, max_sample });
|
|
}
|
|
|
|
const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&buffer), @intCast(frames * 2));
|
|
if (result != 0) {
|
|
const text = c.vosk_recognizer_result(rec);
|
|
std.debug.print("RECOGNIZED: {s}\n", .{text});
|
|
}
|
|
}
|
|
}
|