diff --git a/src/stt.zig b/src/stt.zig index 987d99d..9122b65 100644 --- a/src/stt.zig +++ b/src/stt.zig @@ -418,6 +418,12 @@ pub const AlsaCapture = struct { err = c.snd_pcm_hw_params_set_rate_near(self.pcm_handle, hw_params, &actual_rate, null); if (err < 0) return Error.SetSampleRateError; + // Update sample rate if hardware doesn't support requested rate + if (actual_rate != self.sample_rate) { + std.log.info("Hardware doesn't support {}Hz, using {}Hz", .{ self.sample_rate, actual_rate }); + self.sample_rate = actual_rate; + } + // Set buffer size var actual_buffer_size: c.snd_pcm_uframes_t = self.buffer_size; err = c.snd_pcm_hw_params_set_buffer_size_near(self.pcm_handle, hw_params, &actual_buffer_size); @@ -602,7 +608,7 @@ pub const Session = struct { } // Initialize Vosk audio buffer (larger buffer for processing) - const vosk_audio_buffer = AudioBuffer.init(allocator, options.sample_rate * 2) catch { + const vosk_audio_buffer = AudioBuffer.init(allocator, alsa_capture.sample_rate * 2) catch { const error_info = ErrorInfo.init(Error.OutOfMemory, "Failed to allocate Vosk audio buffer during initialization"); options.event_handler.onDetailedError(error_info); return Error.OutOfMemory; @@ -946,8 +952,15 @@ pub const Session = struct { return Error.InvalidState; } + // Resample to 16kHz if needed + var resampled_buffer: [4096]i16 = undefined; + const final_audio = if (self.alsa_capture.?.sample_rate != 16000) blk: { + const resampled_count = AudioConverter.resample(audio_data, &resampled_buffer, self.alsa_capture.?.sample_rate, 16000); + break :blk resampled_buffer[0..resampled_count]; + } else audio_data; + // Convert i16 samples to bytes for Vosk - const audio_bytes = std.mem.sliceAsBytes(audio_data); + const audio_bytes = std.mem.sliceAsBytes(final_audio); // Feed audio to Vosk recognizer const accept_result = c.vosk_recognizer_accept_waveform(self.vosk_recognizer, audio_bytes.ptr, @intCast(audio_bytes.len));