add silence threshold for great good
All checks were successful
Generic zig build / build (push) Successful in 22s

This commit is contained in:
Emil Lerch 2025-10-29 17:25:59 -07:00
parent 07308a548a
commit e82b119b36
Signed by: lobo
GPG key ID: A7B62D657EF764F8
2 changed files with 24 additions and 5 deletions

View file

@ -418,6 +418,7 @@ pub fn main() !void {
var model_path: ?[]const u8 = null;
var exec_program: ?[]const u8 = null;
var measure_levels = false;
var silence_threshold: ?i16 = null;
// Parse arguments
for (args[1..]) |arg| {
@ -428,6 +429,7 @@ pub fn main() !void {
_ = try stdout.writeAll("OPTIONS:\n");
_ = try stdout.writeAll(" --model=<path> Path to Vosk model directory\n");
_ = try stdout.writeAll(" --exec=<program> Program to execute with recognized text\n");
_ = try stdout.writeAll(" --silence-threshold=<n> Silence detection threshold (default: 300)\n");
_ = try stdout.writeAll(" --measure-levels Display real-time audio level histogram\n");
_ = try stdout.writeAll(" --help, -h Show this help message\n\n");
_ = try stdout.writeAll("EXAMPLES:\n");
@ -445,6 +447,16 @@ pub fn main() !void {
exec_program = arg[7..]; // Skip "--exec="
} else if (std.mem.eql(u8, arg, "--measure-levels")) {
measure_levels = true;
} else if (std.mem.startsWith(u8, arg, "--silence-threshold=")) {
const threshold_str = arg[20..]; // Skip "--silence-threshold="
silence_threshold = std.fmt.parseInt(i16, threshold_str, 10) catch {
std.log.err("Invalid silence threshold: {s}", .{threshold_str});
return error.InvalidArgument;
};
if (silence_threshold.? < 0) {
std.log.err("Invalid silence threshold: {s}", .{threshold_str});
return error.InvalidArgument;
}
}
}
@ -516,6 +528,7 @@ pub fn main() !void {
.event_handler = speech_handler,
.sample_rate = 16000, // Standard sample rate for speech recognition
.buffer_size = 256, // Existing buffer size for low latency
.silence_threshold = silence_threshold orelse 300,
};
std.log.debug("Initializing STT library...", .{});

View file

@ -606,6 +606,8 @@ pub const Options = struct {
// channels: u32 = 2,
/// Audio buffer size in frames (default: 256)
buffer_size: u32 = 256,
/// Silence detection threshold (default: 300)
silence_threshold: i16 = 300,
};
/// Main STT session handle
@ -970,11 +972,15 @@ pub const Session = struct {
defer self.allocator.free(vosk_buffer);
// Silence detection parameters
const silence_threshold: i16 = 300;
const silence_threshold: i16 = self.options.silence_threshold;
const silence_duration_ms: u64 = 500;
const min_speech_duration_ms: u64 = 300;
const samples_per_ms = 16; // This assumes 16kHz audio
std.log.info(
"Listening with silence threshold {}. If wrong, use --measure-levels to find approriate val",
.{silence_threshold},
);
var speech_buffer = self.allocator.alloc(i16, 16000 * 10) catch {
const error_info = ErrorInfo.init(Error.OutOfMemory, "Failed to allocate speech buffer");
self.options.event_handler.onDetailedError(error_info);