ai generated - no review yet
This commit is contained in:
parent
4b9f838328
commit
446c146ded
3 changed files with 1688 additions and 114 deletions
74
build.zig
74
build.zig
|
@ -4,7 +4,6 @@ pub fn build(b: *std.Build) void {
|
||||||
const target = b.standardTargetOptions(.{});
|
const target = b.standardTargetOptions(.{});
|
||||||
const optimize = b.standardOptimizeOption(.{});
|
const optimize = b.standardOptimizeOption(.{});
|
||||||
|
|
||||||
// Select Vosk dependency based on target
|
|
||||||
const vosk_dep_name = selectVoskDependency(target.result);
|
const vosk_dep_name = selectVoskDependency(target.result);
|
||||||
const vosk_dep = b.dependency(vosk_dep_name, .{});
|
const vosk_dep = b.dependency(vosk_dep_name, .{});
|
||||||
const alsa_dep = b.dependency("alsa", .{
|
const alsa_dep = b.dependency("alsa", .{
|
||||||
|
@ -24,24 +23,49 @@ pub fn build(b: *std.Build) void {
|
||||||
install_model.step.dependOn(&model_step.step);
|
install_model.step.dependOn(&model_step.step);
|
||||||
b.getInstallStep().dependOn(&install_model.step);
|
b.getInstallStep().dependOn(&install_model.step);
|
||||||
|
|
||||||
const exe = b.addExecutable(.{
|
// Create the STT library
|
||||||
|
const stt_lib = b.addLibrary(.{
|
||||||
.name = "stt",
|
.name = "stt",
|
||||||
|
.linkage = .static,
|
||||||
|
.root_module = b.createModule(.{
|
||||||
|
.root_source_file = b.path("src/root.zig"),
|
||||||
|
.target = target,
|
||||||
|
.optimize = optimize,
|
||||||
|
.link_libc = true,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Link with Vosk library
|
||||||
|
stt_lib.addIncludePath(vosk_dep.path(""));
|
||||||
|
stt_lib.addLibraryPath(vosk_dep.path(""));
|
||||||
|
stt_lib.linkSystemLibrary("vosk");
|
||||||
|
|
||||||
|
const alsa_lib = alsa_dep.artifact("asound");
|
||||||
|
stt_lib.linkLibrary(alsa_lib);
|
||||||
|
stt_lib.addIncludePath(alsa_dep.path("zig-out/include"));
|
||||||
|
|
||||||
|
b.installArtifact(stt_lib);
|
||||||
|
|
||||||
|
// Create the demo executable
|
||||||
|
const exe = b.addExecutable(.{
|
||||||
|
.name = "stt-demo",
|
||||||
.root_module = b.createModule(.{
|
.root_module = b.createModule(.{
|
||||||
.root_source_file = b.path("src/main.zig"),
|
.root_source_file = b.path("src/main.zig"),
|
||||||
.target = target,
|
.target = target,
|
||||||
.optimize = optimize,
|
.optimize = optimize,
|
||||||
|
.link_libc = true,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
exe.linkLibC();
|
exe.linkLibrary(stt_lib);
|
||||||
|
exe.linkLibrary(alsa_lib);
|
||||||
|
exe.addIncludePath(alsa_dep.path("zig-out/include"));
|
||||||
|
|
||||||
|
// Link with Vosk for the executable
|
||||||
exe.addIncludePath(vosk_dep.path(""));
|
exe.addIncludePath(vosk_dep.path(""));
|
||||||
exe.addLibraryPath(vosk_dep.path(""));
|
exe.addLibraryPath(vosk_dep.path(""));
|
||||||
exe.linkSystemLibrary("vosk");
|
exe.linkSystemLibrary("vosk");
|
||||||
|
|
||||||
const alsa_lib = alsa_dep.artifact("asound");
|
|
||||||
exe.linkLibrary(alsa_lib);
|
|
||||||
exe.addIncludePath(alsa_dep.path("zig-out/include"));
|
|
||||||
|
|
||||||
b.installArtifact(exe);
|
b.installArtifact(exe);
|
||||||
|
|
||||||
const run_step = b.step("run", "Run the app");
|
const run_step = b.step("run", "Run the app");
|
||||||
|
@ -52,21 +76,47 @@ pub fn build(b: *std.Build) void {
|
||||||
if (b.args) |args| {
|
if (b.args) |args| {
|
||||||
run_cmd.addArgs(args);
|
run_cmd.addArgs(args);
|
||||||
}
|
}
|
||||||
// Creates a step for unit testing. This only builds the test executable
|
// Creates a step for unit testing the library
|
||||||
// but does not run it.
|
const lib_unit_tests = b.addTest(.{
|
||||||
|
.root_module = b.createModule(.{
|
||||||
|
.root_source_file = b.path("src/root.zig"),
|
||||||
|
.target = target,
|
||||||
|
.optimize = optimize,
|
||||||
|
.link_libc = true,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Link the same dependencies as the library
|
||||||
|
lib_unit_tests.linkLibrary(alsa_lib);
|
||||||
|
lib_unit_tests.addIncludePath(alsa_dep.path("zig-out/include"));
|
||||||
|
lib_unit_tests.addIncludePath(vosk_dep.path(""));
|
||||||
|
lib_unit_tests.addLibraryPath(vosk_dep.path(""));
|
||||||
|
lib_unit_tests.linkSystemLibrary("vosk");
|
||||||
|
|
||||||
|
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
|
||||||
|
|
||||||
|
// Creates a step for unit testing the demo application
|
||||||
const exe_unit_tests = b.addTest(.{
|
const exe_unit_tests = b.addTest(.{
|
||||||
.root_module = b.createModule(.{
|
.root_module = b.createModule(.{
|
||||||
.root_source_file = b.path("src/main.zig"),
|
.root_source_file = b.path("src/main.zig"),
|
||||||
.target = target,
|
.target = target,
|
||||||
.optimize = optimize,
|
.optimize = optimize,
|
||||||
|
.link_libc = true,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
exe_unit_tests.linkLibrary(stt_lib);
|
||||||
|
exe_unit_tests.linkLibrary(alsa_lib);
|
||||||
|
exe_unit_tests.addIncludePath(alsa_dep.path("zig-out/include"));
|
||||||
|
exe_unit_tests.addIncludePath(vosk_dep.path(""));
|
||||||
|
exe_unit_tests.addLibraryPath(vosk_dep.path(""));
|
||||||
|
exe_unit_tests.linkSystemLibrary("vosk");
|
||||||
|
|
||||||
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
|
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
|
||||||
|
|
||||||
// Similar to creating the run step earlier, this exposes a `test` step to
|
// Test step that runs both library and demo tests
|
||||||
// the `zig build --help` menu, providing a way for the user to request
|
|
||||||
// running the unit tests.
|
|
||||||
const test_step = b.step("test", "Run unit tests");
|
const test_step = b.step("test", "Run unit tests");
|
||||||
|
test_step.dependOn(&run_lib_unit_tests.step);
|
||||||
test_step.dependOn(&run_exe_unit_tests.step);
|
test_step.dependOn(&run_exe_unit_tests.step);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
172
src/main.zig
172
src/main.zig
|
@ -1,103 +1,105 @@
|
||||||
const std = @import("std");
|
//! STT Library Demo Application
|
||||||
const c = @cImport({
|
//!
|
||||||
@cInclude("vosk_api.h");
|
//! This demonstrates how to use the STT library for speech recognition.
|
||||||
@cInclude("alsa/asoundlib.h");
|
//! It will be updated in subsequent tasks to use the actual Vosk integration.
|
||||||
});
|
|
||||||
|
|
||||||
const VOSK_SAMPLE_RATE = 16000;
|
const std = @import("std");
|
||||||
const BUFFER_SIZE = 256;
|
const stt = @import("root.zig");
|
||||||
|
|
||||||
|
/// Demo implementation of speech event handler
|
||||||
|
const DemoHandler = struct {
|
||||||
|
/// Handle detected speech
|
||||||
|
fn onSpeech(ctx: *anyopaque, text: []const u8) void {
|
||||||
|
const self: *DemoHandler = @ptrCast(@alignCast(ctx));
|
||||||
|
_ = self; // Handler context not used in this simple demo
|
||||||
|
|
||||||
|
std.debug.print("Detected: {s}\n", .{text});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle errors
|
||||||
|
fn onError(ctx: *anyopaque, error_code: stt.SttError, message: []const u8) void {
|
||||||
|
const self: *DemoHandler = @ptrCast(@alignCast(ctx));
|
||||||
|
_ = self; // Handler context not used in this simple demo
|
||||||
|
|
||||||
|
std.debug.print("Error {}: {s}\n", .{ error_code, message });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
defer _ = gpa.deinit();
|
defer _ = gpa.deinit();
|
||||||
|
const allocator = gpa.allocator();
|
||||||
|
|
||||||
// Set ALSA config path to our local alsa.conf
|
std.debug.print("STT Library Demo\n", .{});
|
||||||
_ = c.setenv("ALSA_CONFIG_PATH", "alsa.conf", 1);
|
std.debug.print("================\n", .{});
|
||||||
|
|
||||||
// Initialize Vosk
|
// Create demo handler
|
||||||
c.vosk_set_log_level(-1);
|
var demo_handler = DemoHandler{};
|
||||||
const model = c.vosk_model_new("zig-out/bin/vosk-model-small-en-us-0.15");
|
const speech_handler = stt.SpeechEventHandler{
|
||||||
if (model == null) {
|
.onSpeechFn = DemoHandler.onSpeech,
|
||||||
std.debug.print("Failed to load model\n", .{});
|
.onErrorFn = DemoHandler.onError,
|
||||||
|
.ctx = &demo_handler,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Initialize STT session with configuration
|
||||||
|
const options = stt.SttOptions{
|
||||||
|
.model_path = "zig-out/bin/vosk-model-small-en-us-0.15",
|
||||||
|
.audio_device = "hw:3,0",
|
||||||
|
.event_handler = speech_handler,
|
||||||
|
.sample_rate = 16000,
|
||||||
|
.channels = 2,
|
||||||
|
.buffer_size = 256,
|
||||||
|
};
|
||||||
|
|
||||||
|
var session = stt.SttSession.init(allocator, options) catch |err| {
|
||||||
|
std.debug.print("Failed to initialize STT library: {}\n", .{err});
|
||||||
return;
|
return;
|
||||||
}
|
};
|
||||||
defer c.vosk_model_free(model);
|
defer session.deinit();
|
||||||
|
|
||||||
const rec = c.vosk_recognizer_new(model, VOSK_SAMPLE_RATE);
|
std.debug.print("STT library initialized successfully\n", .{});
|
||||||
if (rec == null) {
|
std.debug.print("Model path: {s}\n", .{options.model_path});
|
||||||
std.debug.print("Failed to create recognizer\n", .{});
|
std.debug.print("Audio device: {s}\n", .{options.audio_device});
|
||||||
|
std.debug.print("Sample rate: {} Hz\n", .{options.sample_rate});
|
||||||
|
std.debug.print("Channels: {}\n", .{options.channels});
|
||||||
|
std.debug.print("Buffer size: {} frames\n", .{options.buffer_size});
|
||||||
|
std.debug.print("\n", .{});
|
||||||
|
|
||||||
|
// Start listening for speech
|
||||||
|
session.start_listening() catch |err| {
|
||||||
|
std.debug.print("Failed to start listening: {}\n", .{err});
|
||||||
return;
|
return;
|
||||||
}
|
};
|
||||||
defer c.vosk_recognizer_free(rec);
|
|
||||||
|
|
||||||
// Try to open hardware capture device directly
|
std.debug.print("Listening for speech... (Press Enter to exit)\n", .{});
|
||||||
var handle: ?*c.snd_pcm_t = null;
|
|
||||||
var err = c.snd_pcm_open(&handle, "hw:3,0", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK);
|
|
||||||
if (err < 0) {
|
|
||||||
std.debug.print("Cannot open audio device: {s}\n", .{c.snd_strerror(err)});
|
|
||||||
std.debug.print("Make sure no other applications are using the microphone\n", .{});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
defer _ = c.snd_pcm_close(handle);
|
|
||||||
|
|
||||||
// Set to blocking mode
|
// Wait for user input to exit (simulating Ctrl+C behavior)
|
||||||
err = c.snd_pcm_nonblock(handle, 0);
|
// In subsequent tasks, this will be replaced with actual audio processing
|
||||||
if (err < 0) {
|
const stdin = std.fs.File.stdin();
|
||||||
std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)});
|
var buffer: [1]u8 = undefined;
|
||||||
return;
|
_ = stdin.read(&buffer) catch {};
|
||||||
|
|
||||||
|
std.debug.print("\nStopping speech recognition...\n", .{});
|
||||||
|
session.stop_listening();
|
||||||
|
|
||||||
|
std.debug.print("Demo completed successfully\n", .{});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure audio parameters
|
// Test the demo functionality
|
||||||
err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 2, VOSK_SAMPLE_RATE, 1, 100000);
|
test "demo handler functionality" {
|
||||||
if (err < 0) {
|
const testing = std.testing;
|
||||||
std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prepare the PCM device
|
var demo_handler = DemoHandler{};
|
||||||
err = c.snd_pcm_prepare(handle);
|
const speech_handler = stt.SpeechEventHandler{
|
||||||
if (err < 0) {
|
.onSpeechFn = DemoHandler.onSpeech,
|
||||||
std.debug.print("Cannot prepare audio: {s}\n", .{c.snd_strerror(err)});
|
.onErrorFn = DemoHandler.onError,
|
||||||
return;
|
.ctx = &demo_handler,
|
||||||
}
|
};
|
||||||
|
|
||||||
// Start the PCM stream
|
// Test that callbacks can be invoked without crashing
|
||||||
err = c.snd_pcm_start(handle);
|
speech_handler.onSpeech("test speech");
|
||||||
if (err < 0) {
|
speech_handler.onError(stt.SttError.AudioDeviceError, "test error");
|
||||||
std.debug.print("Cannot start audio: {s}\n", .{c.snd_strerror(err)});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std.debug.print("Audio configured successfully\n", .{});
|
// If we get here without crashing, the test passes
|
||||||
std.debug.print("Listening... (Ctrl+C to exit)\n", .{});
|
try testing.expect(true);
|
||||||
|
|
||||||
var buffer: [BUFFER_SIZE * 2]i16 = undefined; // stereo
|
|
||||||
var accumulator: [VOSK_SAMPLE_RATE]i16 = undefined; // 1 second buffer
|
|
||||||
var acc_pos: usize = 0;
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const frames_read = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE);
|
|
||||||
if (frames_read < 0) {
|
|
||||||
_ = c.snd_pcm_recover(handle, @intCast(frames_read), 1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert stereo to mono and accumulate
|
|
||||||
for (0..@intCast(frames_read)) |i| {
|
|
||||||
if (acc_pos < accumulator.len) {
|
|
||||||
accumulator[acc_pos] = buffer[i * 2]; // left channel
|
|
||||||
acc_pos += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process when we have enough data (0.1 seconds)
|
|
||||||
if (acc_pos >= VOSK_SAMPLE_RATE / 10) {
|
|
||||||
const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&accumulator), @intCast(acc_pos * 2));
|
|
||||||
if (result != 0) {
|
|
||||||
const text = c.vosk_recognizer_result(rec);
|
|
||||||
std.debug.print("{s}\n", .{text});
|
|
||||||
}
|
|
||||||
acc_pos = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
1550
src/root.zig
1550
src/root.zig
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue