add silence threshold for great good
All checks were successful
Generic zig build / build (push) Successful in 22s
All checks were successful
Generic zig build / build (push) Successful in 22s
This commit is contained in:
parent
07308a548a
commit
e82b119b36
2 changed files with 24 additions and 5 deletions
21
src/main.zig
21
src/main.zig
|
|
@ -418,6 +418,7 @@ pub fn main() !void {
|
||||||
var model_path: ?[]const u8 = null;
|
var model_path: ?[]const u8 = null;
|
||||||
var exec_program: ?[]const u8 = null;
|
var exec_program: ?[]const u8 = null;
|
||||||
var measure_levels = false;
|
var measure_levels = false;
|
||||||
|
var silence_threshold: ?i16 = null;
|
||||||
|
|
||||||
// Parse arguments
|
// Parse arguments
|
||||||
for (args[1..]) |arg| {
|
for (args[1..]) |arg| {
|
||||||
|
|
@ -426,10 +427,11 @@ pub fn main() !void {
|
||||||
_ = try stdout.writeAll("USAGE:\n");
|
_ = try stdout.writeAll("USAGE:\n");
|
||||||
_ = try stdout.writeAll(" stt [OPTIONS]\n\n");
|
_ = try stdout.writeAll(" stt [OPTIONS]\n\n");
|
||||||
_ = try stdout.writeAll("OPTIONS:\n");
|
_ = try stdout.writeAll("OPTIONS:\n");
|
||||||
_ = try stdout.writeAll(" --model=<path> Path to Vosk model directory\n");
|
_ = try stdout.writeAll(" --model=<path> Path to Vosk model directory\n");
|
||||||
_ = try stdout.writeAll(" --exec=<program> Program to execute with recognized text\n");
|
_ = try stdout.writeAll(" --exec=<program> Program to execute with recognized text\n");
|
||||||
_ = try stdout.writeAll(" --measure-levels Display real-time audio level histogram\n");
|
_ = try stdout.writeAll(" --silence-threshold=<n> Silence detection threshold (default: 300)\n");
|
||||||
_ = try stdout.writeAll(" --help, -h Show this help message\n\n");
|
_ = try stdout.writeAll(" --measure-levels Display real-time audio level histogram\n");
|
||||||
|
_ = try stdout.writeAll(" --help, -h Show this help message\n\n");
|
||||||
_ = try stdout.writeAll("EXAMPLES:\n");
|
_ = try stdout.writeAll("EXAMPLES:\n");
|
||||||
_ = try stdout.writeAll(" stt\n");
|
_ = try stdout.writeAll(" stt\n");
|
||||||
_ = try stdout.writeAll(" stt --model=../share/vosk/models/vosk-model-small-en-us-0.15\n");
|
_ = try stdout.writeAll(" stt --model=../share/vosk/models/vosk-model-small-en-us-0.15\n");
|
||||||
|
|
@ -445,6 +447,16 @@ pub fn main() !void {
|
||||||
exec_program = arg[7..]; // Skip "--exec="
|
exec_program = arg[7..]; // Skip "--exec="
|
||||||
} else if (std.mem.eql(u8, arg, "--measure-levels")) {
|
} else if (std.mem.eql(u8, arg, "--measure-levels")) {
|
||||||
measure_levels = true;
|
measure_levels = true;
|
||||||
|
} else if (std.mem.startsWith(u8, arg, "--silence-threshold=")) {
|
||||||
|
const threshold_str = arg[20..]; // Skip "--silence-threshold="
|
||||||
|
silence_threshold = std.fmt.parseInt(i16, threshold_str, 10) catch {
|
||||||
|
std.log.err("Invalid silence threshold: {s}", .{threshold_str});
|
||||||
|
return error.InvalidArgument;
|
||||||
|
};
|
||||||
|
if (silence_threshold.? < 0) {
|
||||||
|
std.log.err("Invalid silence threshold: {s}", .{threshold_str});
|
||||||
|
return error.InvalidArgument;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -516,6 +528,7 @@ pub fn main() !void {
|
||||||
.event_handler = speech_handler,
|
.event_handler = speech_handler,
|
||||||
.sample_rate = 16000, // Standard sample rate for speech recognition
|
.sample_rate = 16000, // Standard sample rate for speech recognition
|
||||||
.buffer_size = 256, // Existing buffer size for low latency
|
.buffer_size = 256, // Existing buffer size for low latency
|
||||||
|
.silence_threshold = silence_threshold orelse 300,
|
||||||
};
|
};
|
||||||
|
|
||||||
std.log.debug("Initializing STT library...", .{});
|
std.log.debug("Initializing STT library...", .{});
|
||||||
|
|
|
||||||
|
|
@ -606,6 +606,8 @@ pub const Options = struct {
|
||||||
// channels: u32 = 2,
|
// channels: u32 = 2,
|
||||||
/// Audio buffer size in frames (default: 256)
|
/// Audio buffer size in frames (default: 256)
|
||||||
buffer_size: u32 = 256,
|
buffer_size: u32 = 256,
|
||||||
|
/// Silence detection threshold (default: 300)
|
||||||
|
silence_threshold: i16 = 300,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Main STT session handle
|
/// Main STT session handle
|
||||||
|
|
@ -970,11 +972,15 @@ pub const Session = struct {
|
||||||
defer self.allocator.free(vosk_buffer);
|
defer self.allocator.free(vosk_buffer);
|
||||||
|
|
||||||
// Silence detection parameters
|
// Silence detection parameters
|
||||||
const silence_threshold: i16 = 300;
|
const silence_threshold: i16 = self.options.silence_threshold;
|
||||||
const silence_duration_ms: u64 = 500;
|
const silence_duration_ms: u64 = 500;
|
||||||
const min_speech_duration_ms: u64 = 300;
|
const min_speech_duration_ms: u64 = 300;
|
||||||
const samples_per_ms = 16; // This assumes 16kHz audio
|
const samples_per_ms = 16; // This assumes 16kHz audio
|
||||||
|
|
||||||
|
std.log.info(
|
||||||
|
"Listening with silence threshold {}. If wrong, use --measure-levels to find approriate val",
|
||||||
|
.{silence_threshold},
|
||||||
|
);
|
||||||
var speech_buffer = self.allocator.alloc(i16, 16000 * 10) catch {
|
var speech_buffer = self.allocator.alloc(i16, 16000 * 10) catch {
|
||||||
const error_info = ErrorInfo.init(Error.OutOfMemory, "Failed to allocate speech buffer");
|
const error_info = ErrorInfo.init(Error.OutOfMemory, "Failed to allocate speech buffer");
|
||||||
self.options.event_handler.onDetailedError(error_info);
|
self.options.event_handler.onDetailedError(error_info);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue