add 250ms pre-roll
All checks were successful
Generic zig build / build (push) Successful in 23s

This commit is contained in:
Emil Lerch 2025-11-04 09:37:48 -08:00
parent 899fc6977d
commit 5543e122ea
Signed by: lobo
GPG key ID: A7B62D657EF764F8

View file

@ -976,6 +976,8 @@ pub const Session = struct {
const silence_duration_ms: u64 = 300;
const min_speech_duration_ms: u64 = 300;
const samples_per_ms = 16; // This assumes 16kHz audio
const preroll_ms: u64 = 250; // Capture 250ms before speech starts
const preroll_samples = preroll_ms * samples_per_ms;
std.log.info(
"Listening with silence threshold {}. If wrong, use --measure-levels to find approriate val",
@ -988,6 +990,16 @@ pub const Session = struct {
};
defer self.allocator.free(speech_buffer);
// Pre-roll ring buffer
var preroll_buffer = self.allocator.alloc(i16, preroll_samples) catch {
const error_info = ErrorInfo.init(Error.OutOfMemory, "Failed to allocate pre-roll buffer");
self.options.event_handler.onDetailedError(error_info);
return;
};
defer self.allocator.free(preroll_buffer);
var preroll_pos: usize = 0;
var preroll_filled: bool = false;
var in_speech = false;
var silence_samples: usize = 0;
var speech_samples: usize = 0;
@ -1024,6 +1036,20 @@ pub const Session = struct {
speech_pos = 0;
speech_max_amplitude = 0;
speech_min_amplitude = std.math.maxInt(u16);
// Copy pre-roll buffer to speech buffer
if (preroll_filled) {
// Copy from preroll_pos to end
const first_part_len = preroll_samples - preroll_pos;
@memcpy(speech_buffer[0..first_part_len], preroll_buffer[preroll_pos..]);
// Copy from start to preroll_pos
@memcpy(speech_buffer[first_part_len..preroll_samples], preroll_buffer[0..preroll_pos]);
speech_pos = preroll_samples;
} else if (preroll_pos > 0) {
// Buffer not full yet, just copy what we have
@memcpy(speech_buffer[0..preroll_pos], preroll_buffer[0..preroll_pos]);
speech_pos = preroll_pos;
}
}
if (max_amplitude > speech_max_amplitude) speech_max_amplitude = max_amplitude;
if (max_amplitude < speech_min_amplitude) speech_min_amplitude = max_amplitude;
@ -1063,6 +1089,13 @@ pub const Session = struct {
c.vosk_recognizer_reset(rec);
}
}
} else {
// Not in speech - update pre-roll buffer
for (vosk_buffer[0..samples_read]) |sample| {
preroll_buffer[preroll_pos] = sample;
preroll_pos = (preroll_pos + 1) % preroll_samples;
if (preroll_pos == 0) preroll_filled = true;
}
}
// Sleep to ease CPU pressure