set vosk initializer sample rate to actual hardware/remove resample
This commit is contained in:
parent
43cf222a98
commit
e2490ec3e3
3 changed files with 6 additions and 85 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
.zig-cache/
|
||||
zig-out/
|
||||
alsa-*.conf
|
||||
alsa.conf
|
||||
|
|
69
src/stt.zig
69
src/stt.zig
|
@ -277,39 +277,6 @@ pub const AudioConverter = struct {
|
|||
|
||||
return frames;
|
||||
}
|
||||
|
||||
/// Simple sample rate conversion (basic linear interpolation)
|
||||
/// Note: This is a basic implementation. For production use, consider more sophisticated algorithms
|
||||
pub fn resample(input_samples: []const i16, output_samples: []i16, input_rate: u32, output_rate: u32) usize {
|
||||
if (input_rate == output_rate) {
|
||||
const copy_len = @min(input_samples.len, output_samples.len);
|
||||
@memcpy(output_samples[0..copy_len], input_samples[0..copy_len]);
|
||||
return copy_len;
|
||||
}
|
||||
|
||||
const ratio = @as(f64, @floatFromInt(input_rate)) / @as(f64, @floatFromInt(output_rate));
|
||||
const output_len = @min(output_samples.len, @as(usize, @intFromFloat(@as(f64, @floatFromInt(input_samples.len)) / ratio)));
|
||||
|
||||
for (0..output_len) |i| {
|
||||
const src_pos = @as(f64, @floatFromInt(i)) * ratio;
|
||||
const src_idx: usize = @intFromFloat(src_pos);
|
||||
|
||||
if (src_idx >= input_samples.len) break;
|
||||
|
||||
if (src_idx + 1 < input_samples.len) {
|
||||
// Linear interpolation
|
||||
const frac = src_pos - @as(f64, @floatFromInt(src_idx));
|
||||
const sample1: f64 = @floatFromInt(input_samples[src_idx]);
|
||||
const sample2: f64 = @floatFromInt(input_samples[src_idx + 1]);
|
||||
const interpolated = sample1 + (sample2 - sample1) * frac;
|
||||
output_samples[i] = @intFromFloat(@max(@min(interpolated, std.math.maxInt(i16)), std.math.minInt(i16)));
|
||||
} else {
|
||||
output_samples[i] = input_samples[src_idx];
|
||||
}
|
||||
}
|
||||
|
||||
return output_len;
|
||||
}
|
||||
};
|
||||
|
||||
/// ALSA audio capture configuration and state
|
||||
|
@ -664,8 +631,8 @@ pub const Session = struct {
|
|||
return Error.ModelLoadError;
|
||||
}
|
||||
|
||||
// Create Vosk recognizer
|
||||
self.vosk_recognizer = c.vosk_recognizer_new(self.vosk_model, @floatFromInt(self.options.sample_rate));
|
||||
// Create Vosk recognizer using actual hardware sample rate
|
||||
self.vosk_recognizer = c.vosk_recognizer_new(self.vosk_model, @floatFromInt(self.alsa_capture.?.sample_rate));
|
||||
if (self.vosk_recognizer == null) {
|
||||
if (self.vosk_model) |model| {
|
||||
c.vosk_model_free(model);
|
||||
|
@ -952,12 +919,8 @@ pub const Session = struct {
|
|||
return Error.InvalidState;
|
||||
}
|
||||
|
||||
// Resample to 16kHz if needed
|
||||
var resampled_buffer: [4096]i16 = undefined;
|
||||
const final_audio = if (self.alsa_capture.?.sample_rate != 16000) blk: {
|
||||
const resampled_count = AudioConverter.resample(audio_data, &resampled_buffer, self.alsa_capture.?.sample_rate, 16000);
|
||||
break :blk resampled_buffer[0..resampled_count];
|
||||
} else audio_data;
|
||||
// Use audio data directly without resampling
|
||||
const final_audio = audio_data;
|
||||
|
||||
// Convert i16 samples to bytes for Vosk
|
||||
const audio_bytes = std.mem.sliceAsBytes(final_audio);
|
||||
|
@ -1088,7 +1051,7 @@ pub const Session = struct {
|
|||
|
||||
// Reinitialize recognizer (model should still be valid)
|
||||
if (self.vosk_model) |model| {
|
||||
self.vosk_recognizer = c.vosk_recognizer_new(model, @floatFromInt(self.options.sample_rate));
|
||||
self.vosk_recognizer = c.vosk_recognizer_new(model, @floatFromInt(self.alsa_capture.?.sample_rate));
|
||||
if (self.vosk_recognizer == null) {
|
||||
const error_info = ErrorInfo.init(Error.ModelLoadError, "Failed to reinitialize Vosk recognizer");
|
||||
self.options.event_handler.onDetailedError(error_info);
|
||||
|
@ -1510,28 +1473,6 @@ test "AudioConverter stereo to mono conversion" {
|
|||
try testing.expect(mono_samples[2] == 550); // (500 + 600) / 2
|
||||
}
|
||||
|
||||
test "AudioConverter sample rate conversion" {
|
||||
const testing = std.testing;
|
||||
|
||||
// Test same sample rate (should copy directly)
|
||||
const input_samples = [_]i16{ 100, 200, 300, 400 };
|
||||
var output_samples: [4]i16 = undefined;
|
||||
|
||||
const converted = AudioConverter.resample(&input_samples, &output_samples, 16000, 16000);
|
||||
try testing.expect(converted == 4);
|
||||
try testing.expect(output_samples[0] == 100);
|
||||
try testing.expect(output_samples[1] == 200);
|
||||
try testing.expect(output_samples[2] == 300);
|
||||
try testing.expect(output_samples[3] == 400);
|
||||
|
||||
// Test downsampling (2:1 ratio)
|
||||
var downsampled: [2]i16 = undefined;
|
||||
const downsampled_count = AudioConverter.resample(&input_samples, &downsampled, 16000, 8000);
|
||||
try testing.expect(downsampled_count == 2);
|
||||
try testing.expect(downsampled[0] == 100); // First sample
|
||||
try testing.expect(downsampled[1] == 300); // Interpolated sample
|
||||
}
|
||||
|
||||
test "AlsaCapture initialization" {
|
||||
const testing = std.testing;
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
|
|
21
src/test.zig
21
src/test.zig
|
@ -228,27 +228,6 @@ test "AudioConverter stereo to mono conversion" {
|
|||
try testing.expect(overflow_mono[0] == std.math.maxInt(i16)); // Should clamp to max
|
||||
}
|
||||
|
||||
test "AudioConverter sample rate conversion" {
|
||||
// Test same sample rate (no conversion)
|
||||
const input_samples = [_]i16{ 100, 200, 300, 400 };
|
||||
var output_samples: [4]i16 = undefined;
|
||||
|
||||
const converted = stt.AudioConverter.resample(&input_samples, &output_samples, 44100, 44100);
|
||||
try testing.expect(converted == 4);
|
||||
try testing.expectEqualSlices(i16, &input_samples, output_samples[0..converted]);
|
||||
|
||||
// Test downsampling (44100 -> 22050, 2:1 ratio)
|
||||
var downsampled: [2]i16 = undefined;
|
||||
const downsampled_count = stt.AudioConverter.resample(&input_samples, &downsampled, 44100, 22050);
|
||||
try testing.expect(downsampled_count == 2);
|
||||
|
||||
// Test upsampling (22050 -> 44100, 1:2 ratio)
|
||||
const small_input = [_]i16{ 100, 200 };
|
||||
var upsampled: [4]i16 = undefined;
|
||||
const upsampled_count = stt.AudioConverter.resample(&small_input, &upsampled, 22050, 44100);
|
||||
try testing.expect(upsampled_count == 4);
|
||||
}
|
||||
|
||||
test "Session initialization error handling" {
|
||||
var test_handler = TestEventHandler.init(test_allocator);
|
||||
defer test_handler.deinit();
|
||||
|
|
Loading…
Add table
Reference in a new issue