This commit is contained in:
parent
899fc6977d
commit
5543e122ea
1 changed files with 33 additions and 0 deletions
33
src/stt.zig
33
src/stt.zig
|
|
@ -976,6 +976,8 @@ pub const Session = struct {
|
|||
const silence_duration_ms: u64 = 300;
|
||||
const min_speech_duration_ms: u64 = 300;
|
||||
const samples_per_ms = 16; // This assumes 16kHz audio
|
||||
const preroll_ms: u64 = 250; // Capture 250ms before speech starts
|
||||
const preroll_samples = preroll_ms * samples_per_ms;
|
||||
|
||||
std.log.info(
|
||||
"Listening with silence threshold {}. If wrong, use --measure-levels to find approriate val",
|
||||
|
|
@ -988,6 +990,16 @@ pub const Session = struct {
|
|||
};
|
||||
defer self.allocator.free(speech_buffer);
|
||||
|
||||
// Pre-roll ring buffer
|
||||
var preroll_buffer = self.allocator.alloc(i16, preroll_samples) catch {
|
||||
const error_info = ErrorInfo.init(Error.OutOfMemory, "Failed to allocate pre-roll buffer");
|
||||
self.options.event_handler.onDetailedError(error_info);
|
||||
return;
|
||||
};
|
||||
defer self.allocator.free(preroll_buffer);
|
||||
var preroll_pos: usize = 0;
|
||||
var preroll_filled: bool = false;
|
||||
|
||||
var in_speech = false;
|
||||
var silence_samples: usize = 0;
|
||||
var speech_samples: usize = 0;
|
||||
|
|
@ -1024,6 +1036,20 @@ pub const Session = struct {
|
|||
speech_pos = 0;
|
||||
speech_max_amplitude = 0;
|
||||
speech_min_amplitude = std.math.maxInt(u16);
|
||||
|
||||
// Copy pre-roll buffer to speech buffer
|
||||
if (preroll_filled) {
|
||||
// Copy from preroll_pos to end
|
||||
const first_part_len = preroll_samples - preroll_pos;
|
||||
@memcpy(speech_buffer[0..first_part_len], preroll_buffer[preroll_pos..]);
|
||||
// Copy from start to preroll_pos
|
||||
@memcpy(speech_buffer[first_part_len..preroll_samples], preroll_buffer[0..preroll_pos]);
|
||||
speech_pos = preroll_samples;
|
||||
} else if (preroll_pos > 0) {
|
||||
// Buffer not full yet, just copy what we have
|
||||
@memcpy(speech_buffer[0..preroll_pos], preroll_buffer[0..preroll_pos]);
|
||||
speech_pos = preroll_pos;
|
||||
}
|
||||
}
|
||||
if (max_amplitude > speech_max_amplitude) speech_max_amplitude = max_amplitude;
|
||||
if (max_amplitude < speech_min_amplitude) speech_min_amplitude = max_amplitude;
|
||||
|
|
@ -1063,6 +1089,13 @@ pub const Session = struct {
|
|||
c.vosk_recognizer_reset(rec);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Not in speech - update pre-roll buffer
|
||||
for (vosk_buffer[0..samples_read]) |sample| {
|
||||
preroll_buffer[preroll_pos] = sample;
|
||||
preroll_pos = (preroll_pos + 1) % preroll_samples;
|
||||
if (preroll_pos == 0) preroll_filled = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Sleep to ease CPU pressure
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue