From ecaf55ebe9e946e0ef05e5ffccb0ac07dab29106 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Tue, 9 Sep 2025 10:15:16 -0700 Subject: [PATCH] AI generated first pass --- .gitignore | 2 ++ .mise.toml | 3 ++ LICENSE | 21 ++++++++++++ README.md | 36 ++++++++++++++++++++ build.zig | 32 ++++++++++++++++++ build.zig.zon | 81 +++++++++++++++++++++++++++++++++++++++++++++ flake.lock | 61 ++++++++++++++++++++++++++++++++++ flake.nix | 32 ++++++++++++++++++ src/main.zig | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/root.zig | 23 +++++++++++++ 10 files changed, 382 insertions(+) create mode 100644 .gitignore create mode 100644 .mise.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 src/main.zig create mode 100644 src/root.zig diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3389c86 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.zig-cache/ +zig-out/ diff --git a/.mise.toml b/.mise.toml new file mode 100644 index 0000000..1deab06 --- /dev/null +++ b/.mise.toml @@ -0,0 +1,3 @@ +[tools] +zig = "0.15.1" +zls = "0.15.0" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..506dadd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Emil Lerch + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..beb2633 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# Real-time Speech Recognition with Vosk and Zig + +This project implements a minimal real-time speech-to-text application using Vosk and Zig. + +## Setup + +### Prerequisites +- Zig 0.15.1 (configured via mise) +- Nix development environment with C compilation tools, ALSA, and audio libraries + +### Vosk Model Download +The application uses the Vosk small English model for speech recognition: +- **Source**: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip +- **Size**: ~50MB +- **Language**: English only +- **Accuracy**: Good for simple sentences and commands + +### Installation Steps +1. Enter nix development environment: `nix develop` +2. Download Vosk model: `wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip` +3. Extract model: `unzip vosk-model-small-en-us-0.15.zip` +4. Build application: `zig build` +5. Run: `./zig-out/bin/stt` + +## Usage +The application will: +- Initialize audio capture from default microphone +- Load the Vosk speech recognition model +- Process audio in real-time +- Output recognized text to terminal +- Exit on Ctrl+C + +## Dependencies +- Vosk C API library +- ALSA for audio capture +- Standard C libraries for audio processing diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..9ee6d0c --- /dev/null +++ b/build.zig @@ -0,0 +1,32 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "stt", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }), + }); + + exe.linkLibC(); + exe.addIncludePath(b.path("vosk-linux-x86_64-0.3.45")); + exe.addLibraryPath(b.path("vosk-linux-x86_64-0.3.45")); + exe.linkSystemLibrary("vosk"); + exe.linkSystemLibrary("asound"); + + b.installArtifact(exe); + + const run_step = b.step("run", "Run the app"); + const run_cmd = b.addRunArtifact(exe); + run_step.dependOn(&run_cmd.step); + run_cmd.step.dependOn(b.getInstallStep()); + + if (b.args) |args| { + run_cmd.addArgs(args); + } +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..f36eeea --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,81 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = ._1_stt_2, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0xe6cb5784eea38627, // Changing this has security and trust implications. + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.15.1", + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..e5bbead --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1757347588, + "narHash": "sha256-tLdkkC6XnsY9EOZW9TlpesTclELy8W7lL2ClL+nma8o=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "b599843bad24621dcaa5ab60dac98f9b0eb1cabe", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..55103fa --- /dev/null +++ b/flake.nix @@ -0,0 +1,32 @@ +{ + description = "Vosk speech recognition development environment"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + in + { + devShells.default = pkgs.mkShell { + buildInputs = with pkgs; [ + clang + llvm + cmake + pkg-config + zlib + alsa-lib + alsa-plugins + SDL2 + ]; + + shellHook = '' + export ALSA_PLUGIN_DIR=${pkgs.alsa-plugins}/lib/alsa-lib + ''; + }; + }); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..2b8f132 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,91 @@ +const std = @import("std"); +const c = @cImport({ + @cInclude("vosk_api.h"); + @cInclude("alsa/asoundlib.h"); +}); + +const SAMPLE_RATE = 16000; +const BUFFER_SIZE = 4000; + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + + // Initialize Vosk + c.vosk_set_log_level(-1); + const model = c.vosk_model_new("vosk-model-small-en-us-0.15"); + if (model == null) { + std.debug.print("Failed to load model\n", .{}); + return; + } + defer c.vosk_model_free(model); + + const rec = c.vosk_recognizer_new(model, SAMPLE_RATE); + if (rec == null) { + std.debug.print("Failed to create recognizer\n", .{}); + return; + } + defer c.vosk_recognizer_free(rec); + + // Try to open default capture device + var handle: ?*c.snd_pcm_t = null; + var err = c.snd_pcm_open(&handle, "default", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK); + if (err < 0) { + std.debug.print("Cannot open default audio device: {s}\n", .{c.snd_strerror(err)}); + std.debug.print("Make sure no other applications are using the microphone\n", .{}); + return; + } + defer _ = c.snd_pcm_close(handle); + + // Set to blocking mode + err = c.snd_pcm_nonblock(handle, 0); + if (err < 0) { + std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)}); + return; + } + + // Configure audio - try simple parameters first + err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 1, SAMPLE_RATE, 1, 100000); + if (err < 0) { + std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)}); + return; + } + + std.debug.print("Audio configured successfully\n", .{}); + std.debug.print("Listening... (Ctrl+C to exit)\n", .{}); + + var buffer: [BUFFER_SIZE]i16 = undefined; + var frame_count: u32 = 0; + + while (true) { + const frames = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE / 2); + if (frames < 0) { + std.debug.print("Audio read error: {s}\n", .{c.snd_strerror(@intCast(frames))}); + err = c.snd_pcm_recover(handle, @intCast(frames), 0); + if (err < 0) { + std.debug.print("Cannot recover from error: {s}\n", .{c.snd_strerror(err)}); + break; + } + continue; + } + + frame_count += 1; + if (frame_count % 50 == 0) { + // Show we're getting audio data + var max_sample: u16 = 0; + for (0..@intCast(frames)) |i| { + const abs_sample = @abs(buffer[i]); + if (abs_sample > max_sample) { + max_sample = abs_sample; + } + } + std.debug.print("Audio: {} frames, max level: {}\n", .{ frames, max_sample }); + } + + const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&buffer), @intCast(frames * 2)); + if (result != 0) { + const text = c.vosk_recognizer_result(rec); + std.debug.print("RECOGNIZED: {s}\n", .{text}); + } + } +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..94c7cd0 --- /dev/null +++ b/src/root.zig @@ -0,0 +1,23 @@ +//! By convention, root.zig is the root source file when making a library. +const std = @import("std"); + +pub fn bufferedPrint() !void { + // Stdout is for the actual output of your application, for example if you + // are implementing gzip, then only the compressed bytes should be sent to + // stdout, not any debugging messages. + var stdout_buffer: [1024]u8 = undefined; + var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); + const stdout = &stdout_writer.interface; + + try stdout.print("Run `zig build test` to run the tests.\n", .{}); + + try stdout.flush(); // Don't forget to flush! +} + +pub fn add(a: i32, b: i32) i32 { + return a + b; +} + +test "basic add functionality" { + try std.testing.expect(add(3, 7) == 10); +}