AI generated first pass

This commit is contained in:
Emil Lerch 2025-09-09 10:15:16 -07:00
parent 3b0f4c12a7
commit ecaf55ebe9
Signed by: lobo
GPG key ID: A7B62D657EF764F8
10 changed files with 382 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
.zig-cache/
zig-out/

3
.mise.toml Normal file
View file

@ -0,0 +1,3 @@
[tools]
zig = "0.15.1"
zls = "0.15.0"

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 Emil Lerch
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

36
README.md Normal file
View file

@ -0,0 +1,36 @@
# Real-time Speech Recognition with Vosk and Zig
This project implements a minimal real-time speech-to-text application using Vosk and Zig.
## Setup
### Prerequisites
- Zig 0.15.1 (configured via mise)
- Nix development environment with C compilation tools, ALSA, and audio libraries
### Vosk Model Download
The application uses the Vosk small English model for speech recognition:
- **Source**: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
- **Size**: ~50MB
- **Language**: English only
- **Accuracy**: Good for simple sentences and commands
### Installation Steps
1. Enter nix development environment: `nix develop`
2. Download Vosk model: `wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip`
3. Extract model: `unzip vosk-model-small-en-us-0.15.zip`
4. Build application: `zig build`
5. Run: `./zig-out/bin/stt`
## Usage
The application will:
- Initialize audio capture from default microphone
- Load the Vosk speech recognition model
- Process audio in real-time
- Output recognized text to terminal
- Exit on Ctrl+C
## Dependencies
- Vosk C API library
- ALSA for audio capture
- Standard C libraries for audio processing

32
build.zig Normal file
View file

@ -0,0 +1,32 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "stt",
.root_module = b.createModule(.{
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
}),
});
exe.linkLibC();
exe.addIncludePath(b.path("vosk-linux-x86_64-0.3.45"));
exe.addLibraryPath(b.path("vosk-linux-x86_64-0.3.45"));
exe.linkSystemLibrary("vosk");
exe.linkSystemLibrary("asound");
b.installArtifact(exe);
const run_step = b.step("run", "Run the app");
const run_cmd = b.addRunArtifact(exe);
run_step.dependOn(&run_cmd.step);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
run_cmd.addArgs(args);
}
}

81
build.zig.zon Normal file
View file

@ -0,0 +1,81 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = ._1_stt_2,
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// Together with name, this represents a globally unique package
// identifier. This field is generated by the Zig toolchain when the
// package is first created, and then *never changes*. This allows
// unambiguous detection of one package being an updated version of
// another.
//
// When forking a Zig project, this id should be regenerated (delete the
// field and run `zig build`) if the upstream project is still maintained.
// Otherwise, the fork is *hostile*, attempting to take control over the
// original project's identity. Thus it is recommended to leave the comment
// on the following line intact, so that it shows up in code reviews that
// modify the field.
.fingerprint = 0xe6cb5784eea38627, // Changing this has security and trust implications.
// Tracks the earliest Zig version that the package considers to be a
// supported use case.
.minimum_zig_version = "0.15.1",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL. If the contents of a URL change this will result in a hash mismatch
// // which will prevent zig from using it.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
//
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

61
flake.lock generated Normal file
View file

@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1757347588,
"narHash": "sha256-tLdkkC6XnsY9EOZW9TlpesTclELy8W7lL2ClL+nma8o=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "b599843bad24621dcaa5ab60dac98f9b0eb1cabe",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

32
flake.nix Normal file
View file

@ -0,0 +1,32 @@
{
description = "Vosk speech recognition development environment";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
in
{
devShells.default = pkgs.mkShell {
buildInputs = with pkgs; [
clang
llvm
cmake
pkg-config
zlib
alsa-lib
alsa-plugins
SDL2
];
shellHook = ''
export ALSA_PLUGIN_DIR=${pkgs.alsa-plugins}/lib/alsa-lib
'';
};
});
}

91
src/main.zig Normal file
View file

@ -0,0 +1,91 @@
const std = @import("std");
const c = @cImport({
@cInclude("vosk_api.h");
@cInclude("alsa/asoundlib.h");
});
const SAMPLE_RATE = 16000;
const BUFFER_SIZE = 4000;
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
// Initialize Vosk
c.vosk_set_log_level(-1);
const model = c.vosk_model_new("vosk-model-small-en-us-0.15");
if (model == null) {
std.debug.print("Failed to load model\n", .{});
return;
}
defer c.vosk_model_free(model);
const rec = c.vosk_recognizer_new(model, SAMPLE_RATE);
if (rec == null) {
std.debug.print("Failed to create recognizer\n", .{});
return;
}
defer c.vosk_recognizer_free(rec);
// Try to open default capture device
var handle: ?*c.snd_pcm_t = null;
var err = c.snd_pcm_open(&handle, "default", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK);
if (err < 0) {
std.debug.print("Cannot open default audio device: {s}\n", .{c.snd_strerror(err)});
std.debug.print("Make sure no other applications are using the microphone\n", .{});
return;
}
defer _ = c.snd_pcm_close(handle);
// Set to blocking mode
err = c.snd_pcm_nonblock(handle, 0);
if (err < 0) {
std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)});
return;
}
// Configure audio - try simple parameters first
err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 1, SAMPLE_RATE, 1, 100000);
if (err < 0) {
std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)});
return;
}
std.debug.print("Audio configured successfully\n", .{});
std.debug.print("Listening... (Ctrl+C to exit)\n", .{});
var buffer: [BUFFER_SIZE]i16 = undefined;
var frame_count: u32 = 0;
while (true) {
const frames = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE / 2);
if (frames < 0) {
std.debug.print("Audio read error: {s}\n", .{c.snd_strerror(@intCast(frames))});
err = c.snd_pcm_recover(handle, @intCast(frames), 0);
if (err < 0) {
std.debug.print("Cannot recover from error: {s}\n", .{c.snd_strerror(err)});
break;
}
continue;
}
frame_count += 1;
if (frame_count % 50 == 0) {
// Show we're getting audio data
var max_sample: u16 = 0;
for (0..@intCast(frames)) |i| {
const abs_sample = @abs(buffer[i]);
if (abs_sample > max_sample) {
max_sample = abs_sample;
}
}
std.debug.print("Audio: {} frames, max level: {}\n", .{ frames, max_sample });
}
const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&buffer), @intCast(frames * 2));
if (result != 0) {
const text = c.vosk_recognizer_result(rec);
std.debug.print("RECOGNIZED: {s}\n", .{text});
}
}
}

23
src/root.zig Normal file
View file

@ -0,0 +1,23 @@
//! By convention, root.zig is the root source file when making a library.
const std = @import("std");
pub fn bufferedPrint() !void {
// Stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
var stdout_buffer: [1024]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
try stdout.print("Run `zig build test` to run the tests.\n", .{});
try stdout.flush(); // Don't forget to flush!
}
pub fn add(a: i32, b: i32) i32 {
return a + b;
}
test "basic add functionality" {
try std.testing.expect(add(3, 7) == 10);
}