AI generated first pass
This commit is contained in:
parent
3b0f4c12a7
commit
ecaf55ebe9
10 changed files with 382 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
.zig-cache/
|
||||||
|
zig-out/
|
3
.mise.toml
Normal file
3
.mise.toml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[tools]
|
||||||
|
zig = "0.15.1"
|
||||||
|
zls = "0.15.0"
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 Emil Lerch
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
36
README.md
Normal file
36
README.md
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# Real-time Speech Recognition with Vosk and Zig
|
||||||
|
|
||||||
|
This project implements a minimal real-time speech-to-text application using Vosk and Zig.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
- Zig 0.15.1 (configured via mise)
|
||||||
|
- Nix development environment with C compilation tools, ALSA, and audio libraries
|
||||||
|
|
||||||
|
### Vosk Model Download
|
||||||
|
The application uses the Vosk small English model for speech recognition:
|
||||||
|
- **Source**: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
|
||||||
|
- **Size**: ~50MB
|
||||||
|
- **Language**: English only
|
||||||
|
- **Accuracy**: Good for simple sentences and commands
|
||||||
|
|
||||||
|
### Installation Steps
|
||||||
|
1. Enter nix development environment: `nix develop`
|
||||||
|
2. Download Vosk model: `wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip`
|
||||||
|
3. Extract model: `unzip vosk-model-small-en-us-0.15.zip`
|
||||||
|
4. Build application: `zig build`
|
||||||
|
5. Run: `./zig-out/bin/stt`
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
The application will:
|
||||||
|
- Initialize audio capture from default microphone
|
||||||
|
- Load the Vosk speech recognition model
|
||||||
|
- Process audio in real-time
|
||||||
|
- Output recognized text to terminal
|
||||||
|
- Exit on Ctrl+C
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
- Vosk C API library
|
||||||
|
- ALSA for audio capture
|
||||||
|
- Standard C libraries for audio processing
|
32
build.zig
Normal file
32
build.zig
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn build(b: *std.Build) void {
|
||||||
|
const target = b.standardTargetOptions(.{});
|
||||||
|
const optimize = b.standardOptimizeOption(.{});
|
||||||
|
|
||||||
|
const exe = b.addExecutable(.{
|
||||||
|
.name = "stt",
|
||||||
|
.root_module = b.createModule(.{
|
||||||
|
.root_source_file = b.path("src/main.zig"),
|
||||||
|
.target = target,
|
||||||
|
.optimize = optimize,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
exe.linkLibC();
|
||||||
|
exe.addIncludePath(b.path("vosk-linux-x86_64-0.3.45"));
|
||||||
|
exe.addLibraryPath(b.path("vosk-linux-x86_64-0.3.45"));
|
||||||
|
exe.linkSystemLibrary("vosk");
|
||||||
|
exe.linkSystemLibrary("asound");
|
||||||
|
|
||||||
|
b.installArtifact(exe);
|
||||||
|
|
||||||
|
const run_step = b.step("run", "Run the app");
|
||||||
|
const run_cmd = b.addRunArtifact(exe);
|
||||||
|
run_step.dependOn(&run_cmd.step);
|
||||||
|
run_cmd.step.dependOn(b.getInstallStep());
|
||||||
|
|
||||||
|
if (b.args) |args| {
|
||||||
|
run_cmd.addArgs(args);
|
||||||
|
}
|
||||||
|
}
|
81
build.zig.zon
Normal file
81
build.zig.zon
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
.{
|
||||||
|
// This is the default name used by packages depending on this one. For
|
||||||
|
// example, when a user runs `zig fetch --save <url>`, this field is used
|
||||||
|
// as the key in the `dependencies` table. Although the user can choose a
|
||||||
|
// different name, most users will stick with this provided value.
|
||||||
|
//
|
||||||
|
// It is redundant to include "zig" in this name because it is already
|
||||||
|
// within the Zig package namespace.
|
||||||
|
.name = ._1_stt_2,
|
||||||
|
// This is a [Semantic Version](https://semver.org/).
|
||||||
|
// In a future version of Zig it will be used for package deduplication.
|
||||||
|
.version = "0.0.0",
|
||||||
|
// Together with name, this represents a globally unique package
|
||||||
|
// identifier. This field is generated by the Zig toolchain when the
|
||||||
|
// package is first created, and then *never changes*. This allows
|
||||||
|
// unambiguous detection of one package being an updated version of
|
||||||
|
// another.
|
||||||
|
//
|
||||||
|
// When forking a Zig project, this id should be regenerated (delete the
|
||||||
|
// field and run `zig build`) if the upstream project is still maintained.
|
||||||
|
// Otherwise, the fork is *hostile*, attempting to take control over the
|
||||||
|
// original project's identity. Thus it is recommended to leave the comment
|
||||||
|
// on the following line intact, so that it shows up in code reviews that
|
||||||
|
// modify the field.
|
||||||
|
.fingerprint = 0xe6cb5784eea38627, // Changing this has security and trust implications.
|
||||||
|
// Tracks the earliest Zig version that the package considers to be a
|
||||||
|
// supported use case.
|
||||||
|
.minimum_zig_version = "0.15.1",
|
||||||
|
// This field is optional.
|
||||||
|
// Each dependency must either provide a `url` and `hash`, or a `path`.
|
||||||
|
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
|
||||||
|
// Once all dependencies are fetched, `zig build` no longer requires
|
||||||
|
// internet connectivity.
|
||||||
|
.dependencies = .{
|
||||||
|
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
|
||||||
|
//.example = .{
|
||||||
|
// // When updating this field to a new URL, be sure to delete the corresponding
|
||||||
|
// // `hash`, otherwise you are communicating that you expect to find the old hash at
|
||||||
|
// // the new URL. If the contents of a URL change this will result in a hash mismatch
|
||||||
|
// // which will prevent zig from using it.
|
||||||
|
// .url = "https://example.com/foo.tar.gz",
|
||||||
|
//
|
||||||
|
// // This is computed from the file contents of the directory of files that is
|
||||||
|
// // obtained after fetching `url` and applying the inclusion rules given by
|
||||||
|
// // `paths`.
|
||||||
|
// //
|
||||||
|
// // This field is the source of truth; packages do not come from a `url`; they
|
||||||
|
// // come from a `hash`. `url` is just one of many possible mirrors for how to
|
||||||
|
// // obtain a package matching this `hash`.
|
||||||
|
// //
|
||||||
|
// // Uses the [multihash](https://multiformats.io/multihash/) format.
|
||||||
|
// .hash = "...",
|
||||||
|
//
|
||||||
|
// // When this is provided, the package is found in a directory relative to the
|
||||||
|
// // build root. In this case the package's hash is irrelevant and therefore not
|
||||||
|
// // computed. This field and `url` are mutually exclusive.
|
||||||
|
// .path = "foo",
|
||||||
|
//
|
||||||
|
// // When this is set to `true`, a package is declared to be lazily
|
||||||
|
// // fetched. This makes the dependency only get fetched if it is
|
||||||
|
// // actually used.
|
||||||
|
// .lazy = false,
|
||||||
|
//},
|
||||||
|
},
|
||||||
|
// Specifies the set of files and directories that are included in this package.
|
||||||
|
// Only files and directories listed here are included in the `hash` that
|
||||||
|
// is computed for this package. Only files listed here will remain on disk
|
||||||
|
// when using the zig package manager. As a rule of thumb, one should list
|
||||||
|
// files required for compilation plus any license(s).
|
||||||
|
// Paths are relative to the build root. Use the empty string (`""`) to refer to
|
||||||
|
// the build root itself.
|
||||||
|
// A directory listed here means that all files within, recursively, are included.
|
||||||
|
.paths = .{
|
||||||
|
"build.zig",
|
||||||
|
"build.zig.zon",
|
||||||
|
"src",
|
||||||
|
// For example...
|
||||||
|
//"LICENSE",
|
||||||
|
//"README.md",
|
||||||
|
},
|
||||||
|
}
|
61
flake.lock
generated
Normal file
61
flake.lock
generated
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1731533236,
|
||||||
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1757347588,
|
||||||
|
"narHash": "sha256-tLdkkC6XnsY9EOZW9TlpesTclELy8W7lL2ClL+nma8o=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "b599843bad24621dcaa5ab60dac98f9b0eb1cabe",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
32
flake.nix
Normal file
32
flake.nix
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
{
|
||||||
|
description = "Vosk speech recognition development environment";
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs, flake-utils }:
|
||||||
|
flake-utils.lib.eachDefaultSystem (system:
|
||||||
|
let
|
||||||
|
pkgs = nixpkgs.legacyPackages.${system};
|
||||||
|
in
|
||||||
|
{
|
||||||
|
devShells.default = pkgs.mkShell {
|
||||||
|
buildInputs = with pkgs; [
|
||||||
|
clang
|
||||||
|
llvm
|
||||||
|
cmake
|
||||||
|
pkg-config
|
||||||
|
zlib
|
||||||
|
alsa-lib
|
||||||
|
alsa-plugins
|
||||||
|
SDL2
|
||||||
|
];
|
||||||
|
|
||||||
|
shellHook = ''
|
||||||
|
export ALSA_PLUGIN_DIR=${pkgs.alsa-plugins}/lib/alsa-lib
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
91
src/main.zig
Normal file
91
src/main.zig
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const c = @cImport({
|
||||||
|
@cInclude("vosk_api.h");
|
||||||
|
@cInclude("alsa/asoundlib.h");
|
||||||
|
});
|
||||||
|
|
||||||
|
const SAMPLE_RATE = 16000;
|
||||||
|
const BUFFER_SIZE = 4000;
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
|
defer _ = gpa.deinit();
|
||||||
|
|
||||||
|
// Initialize Vosk
|
||||||
|
c.vosk_set_log_level(-1);
|
||||||
|
const model = c.vosk_model_new("vosk-model-small-en-us-0.15");
|
||||||
|
if (model == null) {
|
||||||
|
std.debug.print("Failed to load model\n", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
defer c.vosk_model_free(model);
|
||||||
|
|
||||||
|
const rec = c.vosk_recognizer_new(model, SAMPLE_RATE);
|
||||||
|
if (rec == null) {
|
||||||
|
std.debug.print("Failed to create recognizer\n", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
defer c.vosk_recognizer_free(rec);
|
||||||
|
|
||||||
|
// Try to open default capture device
|
||||||
|
var handle: ?*c.snd_pcm_t = null;
|
||||||
|
var err = c.snd_pcm_open(&handle, "default", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK);
|
||||||
|
if (err < 0) {
|
||||||
|
std.debug.print("Cannot open default audio device: {s}\n", .{c.snd_strerror(err)});
|
||||||
|
std.debug.print("Make sure no other applications are using the microphone\n", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
defer _ = c.snd_pcm_close(handle);
|
||||||
|
|
||||||
|
// Set to blocking mode
|
||||||
|
err = c.snd_pcm_nonblock(handle, 0);
|
||||||
|
if (err < 0) {
|
||||||
|
std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure audio - try simple parameters first
|
||||||
|
err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 1, SAMPLE_RATE, 1, 100000);
|
||||||
|
if (err < 0) {
|
||||||
|
std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std.debug.print("Audio configured successfully\n", .{});
|
||||||
|
std.debug.print("Listening... (Ctrl+C to exit)\n", .{});
|
||||||
|
|
||||||
|
var buffer: [BUFFER_SIZE]i16 = undefined;
|
||||||
|
var frame_count: u32 = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const frames = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE / 2);
|
||||||
|
if (frames < 0) {
|
||||||
|
std.debug.print("Audio read error: {s}\n", .{c.snd_strerror(@intCast(frames))});
|
||||||
|
err = c.snd_pcm_recover(handle, @intCast(frames), 0);
|
||||||
|
if (err < 0) {
|
||||||
|
std.debug.print("Cannot recover from error: {s}\n", .{c.snd_strerror(err)});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_count += 1;
|
||||||
|
if (frame_count % 50 == 0) {
|
||||||
|
// Show we're getting audio data
|
||||||
|
var max_sample: u16 = 0;
|
||||||
|
for (0..@intCast(frames)) |i| {
|
||||||
|
const abs_sample = @abs(buffer[i]);
|
||||||
|
if (abs_sample > max_sample) {
|
||||||
|
max_sample = abs_sample;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std.debug.print("Audio: {} frames, max level: {}\n", .{ frames, max_sample });
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&buffer), @intCast(frames * 2));
|
||||||
|
if (result != 0) {
|
||||||
|
const text = c.vosk_recognizer_result(rec);
|
||||||
|
std.debug.print("RECOGNIZED: {s}\n", .{text});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
23
src/root.zig
Normal file
23
src/root.zig
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
//! By convention, root.zig is the root source file when making a library.
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn bufferedPrint() !void {
|
||||||
|
// Stdout is for the actual output of your application, for example if you
|
||||||
|
// are implementing gzip, then only the compressed bytes should be sent to
|
||||||
|
// stdout, not any debugging messages.
|
||||||
|
var stdout_buffer: [1024]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
|
||||||
|
try stdout.print("Run `zig build test` to run the tests.\n", .{});
|
||||||
|
|
||||||
|
try stdout.flush(); // Don't forget to flush!
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add(a: i32, b: i32) i32 {
|
||||||
|
return a + b;
|
||||||
|
}
|
||||||
|
|
||||||
|
test "basic add functionality" {
|
||||||
|
try std.testing.expect(add(3, 7) == 10);
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue