ai generated - no review yet

2025-09-10 12:19:56 -07:00 · 2025-09-10 12:19:56 -07:00 · 446c146ded
commit 446c146ded
parent 4b9f838328
3 changed files with 1688 additions and 114 deletions
--- a/build.zig
+++ b/build.zig
@ -4,7 +4,6 @@ pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

-    // Select Vosk dependency based on target
    const vosk_dep_name = selectVoskDependency(target.result);
    const vosk_dep = b.dependency(vosk_dep_name, .{});
    const alsa_dep = b.dependency("alsa", .{
@ -24,24 +23,49 @@ pub fn build(b: *std.Build) void {
    install_model.step.dependOn(&model_step.step);
    b.getInstallStep().dependOn(&install_model.step);

-    const exe = b.addExecutable(.{
+    // Create the STT library
+    const stt_lib = b.addLibrary(.{
        .name = "stt",
+        .linkage = .static,
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/root.zig"),
+            .target = target,
+            .optimize = optimize,
+            .link_libc = true,
+        }),
+    });
+
+    // Link with Vosk library
+    stt_lib.addIncludePath(vosk_dep.path(""));
+    stt_lib.addLibraryPath(vosk_dep.path(""));
+    stt_lib.linkSystemLibrary("vosk");
+
+    const alsa_lib = alsa_dep.artifact("asound");
+    stt_lib.linkLibrary(alsa_lib);
+    stt_lib.addIncludePath(alsa_dep.path("zig-out/include"));
+
+    b.installArtifact(stt_lib);
+
+    // Create the demo executable
+    const exe = b.addExecutable(.{
+        .name = "stt-demo",
        .root_module = b.createModule(.{
            .root_source_file = b.path("src/main.zig"),
            .target = target,
            .optimize = optimize,
+            .link_libc = true,
        }),
    });

-    exe.linkLibC();
+    exe.linkLibrary(stt_lib);
+    exe.linkLibrary(alsa_lib);
+    exe.addIncludePath(alsa_dep.path("zig-out/include"));
+
+    // Link with Vosk for the executable
    exe.addIncludePath(vosk_dep.path(""));
    exe.addLibraryPath(vosk_dep.path(""));
    exe.linkSystemLibrary("vosk");

-    const alsa_lib = alsa_dep.artifact("asound");
-    exe.linkLibrary(alsa_lib);
-    exe.addIncludePath(alsa_dep.path("zig-out/include"));
-
    b.installArtifact(exe);

    const run_step = b.step("run", "Run the app");
@ -52,21 +76,47 @@ pub fn build(b: *std.Build) void {
    if (b.args) |args| {
        run_cmd.addArgs(args);
    }
-    // Creates a step for unit testing. This only builds the test executable
-    // but does not run it.
+    // Creates a step for unit testing the library
+    const lib_unit_tests = b.addTest(.{
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/root.zig"),
+            .target = target,
+            .optimize = optimize,
+            .link_libc = true,
+        }),
+    });
+
+    // Link the same dependencies as the library
+    lib_unit_tests.linkLibrary(alsa_lib);
+    lib_unit_tests.addIncludePath(alsa_dep.path("zig-out/include"));
+    lib_unit_tests.addIncludePath(vosk_dep.path(""));
+    lib_unit_tests.addLibraryPath(vosk_dep.path(""));
+    lib_unit_tests.linkSystemLibrary("vosk");
+
+    const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
+
+    // Creates a step for unit testing the demo application
    const exe_unit_tests = b.addTest(.{
        .root_module = b.createModule(.{
            .root_source_file = b.path("src/main.zig"),
            .target = target,
            .optimize = optimize,
+            .link_libc = true,
        }),
    });
+
+    exe_unit_tests.linkLibrary(stt_lib);
+    exe_unit_tests.linkLibrary(alsa_lib);
+    exe_unit_tests.addIncludePath(alsa_dep.path("zig-out/include"));
+    exe_unit_tests.addIncludePath(vosk_dep.path(""));
+    exe_unit_tests.addLibraryPath(vosk_dep.path(""));
+    exe_unit_tests.linkSystemLibrary("vosk");
+
    const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);

-    // Similar to creating the run step earlier, this exposes a `test` step to
-    // the `zig build --help` menu, providing a way for the user to request
-    // running the unit tests.
+    // Test step that runs both library and demo tests
    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&run_lib_unit_tests.step);
    test_step.dependOn(&run_exe_unit_tests.step);
 }

--- a/src/main.zig
+++ b/src/main.zig
@ -1,103 +1,105 @@
-const std = @import("std");
-const c = @cImport({
-    @cInclude("vosk_api.h");
-    @cInclude("alsa/asoundlib.h");
-});
+//! STT Library Demo Application
+//!
+//! This demonstrates how to use the STT library for speech recognition.
+//! It will be updated in subsequent tasks to use the actual Vosk integration.

-const VOSK_SAMPLE_RATE = 16000;
-const BUFFER_SIZE = 256;
+const std = @import("std");
+const stt = @import("root.zig");
+
+/// Demo implementation of speech event handler
+const DemoHandler = struct {
+    /// Handle detected speech
+    fn onSpeech(ctx: *anyopaque, text: []const u8) void {
+        const self: *DemoHandler = @ptrCast(@alignCast(ctx));
+        _ = self; // Handler context not used in this simple demo
+
+        std.debug.print("Detected: {s}\n", .{text});
+    }
+
+    /// Handle errors
+    fn onError(ctx: *anyopaque, error_code: stt.SttError, message: []const u8) void {
+        const self: *DemoHandler = @ptrCast(@alignCast(ctx));
+        _ = self; // Handler context not used in this simple demo
+
+        std.debug.print("Error {}: {s}\n", .{ error_code, message });
+    }
+};

 pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
+    const allocator = gpa.allocator();

-    // Set ALSA config path to our local alsa.conf
-    _ = c.setenv("ALSA_CONFIG_PATH", "alsa.conf", 1);
+    std.debug.print("STT Library Demo\n", .{});
+    std.debug.print("================\n", .{});

-    // Initialize Vosk
-    c.vosk_set_log_level(-1);
-    const model = c.vosk_model_new("zig-out/bin/vosk-model-small-en-us-0.15");
-    if (model == null) {
-        std.debug.print("Failed to load model\n", .{});
+    // Create demo handler
+    var demo_handler = DemoHandler{};
+    const speech_handler = stt.SpeechEventHandler{
+        .onSpeechFn = DemoHandler.onSpeech,
+        .onErrorFn = DemoHandler.onError,
+        .ctx = &demo_handler,
+    };
+
+    // Initialize STT session with configuration
+    const options = stt.SttOptions{
+        .model_path = "zig-out/bin/vosk-model-small-en-us-0.15",
+        .audio_device = "hw:3,0",
+        .event_handler = speech_handler,
+        .sample_rate = 16000,
+        .channels = 2,
+        .buffer_size = 256,
+    };
+
+    var session = stt.SttSession.init(allocator, options) catch |err| {
+        std.debug.print("Failed to initialize STT library: {}\n", .{err});
        return;
-    }
-    defer c.vosk_model_free(model);
+    };
+    defer session.deinit();

-    const rec = c.vosk_recognizer_new(model, VOSK_SAMPLE_RATE);
-    if (rec == null) {
-        std.debug.print("Failed to create recognizer\n", .{});
+    std.debug.print("STT library initialized successfully\n", .{});
+    std.debug.print("Model path: {s}\n", .{options.model_path});
+    std.debug.print("Audio device: {s}\n", .{options.audio_device});
+    std.debug.print("Sample rate: {} Hz\n", .{options.sample_rate});
+    std.debug.print("Channels: {}\n", .{options.channels});
+    std.debug.print("Buffer size: {} frames\n", .{options.buffer_size});
+    std.debug.print("\n", .{});
+
+    // Start listening for speech
+    session.start_listening() catch |err| {
+        std.debug.print("Failed to start listening: {}\n", .{err});
        return;
-    }
-    defer c.vosk_recognizer_free(rec);
+    };

-    // Try to open hardware capture device directly
-    var handle: ?*c.snd_pcm_t = null;
-    var err = c.snd_pcm_open(&handle, "hw:3,0", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK);
-    if (err < 0) {
-        std.debug.print("Cannot open audio device: {s}\n", .{c.snd_strerror(err)});
-        std.debug.print("Make sure no other applications are using the microphone\n", .{});
-        return;
-    }
-    defer _ = c.snd_pcm_close(handle);
+    std.debug.print("Listening for speech... (Press Enter to exit)\n", .{});

-    // Set to blocking mode
-    err = c.snd_pcm_nonblock(handle, 0);
-    if (err < 0) {
-        std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)});
-        return;
-    }
+    // Wait for user input to exit (simulating Ctrl+C behavior)
+    // In subsequent tasks, this will be replaced with actual audio processing
+    const stdin = std.fs.File.stdin();
+    var buffer: [1]u8 = undefined;
+    _ = stdin.read(&buffer) catch {};

-    // Configure audio parameters
-    err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 2, VOSK_SAMPLE_RATE, 1, 100000);
-    if (err < 0) {
-        std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)});
-        return;
-    }
+    std.debug.print("\nStopping speech recognition...\n", .{});
+    session.stop_listening();

-    // Prepare the PCM device
-    err = c.snd_pcm_prepare(handle);
-    if (err < 0) {
-        std.debug.print("Cannot prepare audio: {s}\n", .{c.snd_strerror(err)});
-        return;
-    }
-
-    // Start the PCM stream
-    err = c.snd_pcm_start(handle);
-    if (err < 0) {
-        std.debug.print("Cannot start audio: {s}\n", .{c.snd_strerror(err)});
-        return;
-    }
-
-    std.debug.print("Audio configured successfully\n", .{});
-    std.debug.print("Listening... (Ctrl+C to exit)\n", .{});
-
-    var buffer: [BUFFER_SIZE * 2]i16 = undefined; // stereo
-    var accumulator: [VOSK_SAMPLE_RATE]i16 = undefined; // 1 second buffer
-    var acc_pos: usize = 0;
-
-    while (true) {
-        const frames_read = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE);
-        if (frames_read < 0) {
-            _ = c.snd_pcm_recover(handle, @intCast(frames_read), 1);
-            continue;
-        }
-
-        // Convert stereo to mono and accumulate
-        for (0..@intCast(frames_read)) |i| {
-            if (acc_pos < accumulator.len) {
-                accumulator[acc_pos] = buffer[i * 2]; // left channel
-                acc_pos += 1;
-            }
-        }
-
-        // Process when we have enough data (0.1 seconds)
-        if (acc_pos >= VOSK_SAMPLE_RATE / 10) {
-            const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&accumulator), @intCast(acc_pos * 2));
-            if (result != 0) {
-                const text = c.vosk_recognizer_result(rec);
-                std.debug.print("{s}\n", .{text});
-            }
-            acc_pos = 0;
-        }
-    }
+    std.debug.print("Demo completed successfully\n", .{});
+}
+
+// Test the demo functionality
+test "demo handler functionality" {
+    const testing = std.testing;
+
+    var demo_handler = DemoHandler{};
+    const speech_handler = stt.SpeechEventHandler{
+        .onSpeechFn = DemoHandler.onSpeech,
+        .onErrorFn = DemoHandler.onError,
+        .ctx = &demo_handler,
+    };
+
+    // Test that callbacks can be invoked without crashing
+    speech_handler.onSpeech("test speech");
+    speech_handler.onError(stt.SttError.AudioDeviceError, "test error");
+
+    // If we get here without crashing, the test passes
+    try testing.expect(true);
 }
--- a/src/root.zig
+++ b/src/root.zig