AI generated first pass
This commit is contained in:
parent
3b0f4c12a7
commit
ecaf55ebe9
10 changed files with 382 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
.zig-cache/
|
||||
zig-out/
|
3
.mise.toml
Normal file
3
.mise.toml
Normal file
|
@ -0,0 +1,3 @@
|
|||
[tools]
|
||||
zig = "0.15.1"
|
||||
zls = "0.15.0"
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2025 Emil Lerch
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
36
README.md
Normal file
36
README.md
Normal file
|
@ -0,0 +1,36 @@
|
|||
# Real-time Speech Recognition with Vosk and Zig
|
||||
|
||||
This project implements a minimal real-time speech-to-text application using Vosk and Zig.
|
||||
|
||||
## Setup
|
||||
|
||||
### Prerequisites
|
||||
- Zig 0.15.1 (configured via mise)
|
||||
- Nix development environment with C compilation tools, ALSA, and audio libraries
|
||||
|
||||
### Vosk Model Download
|
||||
The application uses the Vosk small English model for speech recognition:
|
||||
- **Source**: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
|
||||
- **Size**: ~50MB
|
||||
- **Language**: English only
|
||||
- **Accuracy**: Good for simple sentences and commands
|
||||
|
||||
### Installation Steps
|
||||
1. Enter nix development environment: `nix develop`
|
||||
2. Download Vosk model: `wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip`
|
||||
3. Extract model: `unzip vosk-model-small-en-us-0.15.zip`
|
||||
4. Build application: `zig build`
|
||||
5. Run: `./zig-out/bin/stt`
|
||||
|
||||
## Usage
|
||||
The application will:
|
||||
- Initialize audio capture from default microphone
|
||||
- Load the Vosk speech recognition model
|
||||
- Process audio in real-time
|
||||
- Output recognized text to terminal
|
||||
- Exit on Ctrl+C
|
||||
|
||||
## Dependencies
|
||||
- Vosk C API library
|
||||
- ALSA for audio capture
|
||||
- Standard C libraries for audio processing
|
32
build.zig
Normal file
32
build.zig
Normal file
|
@ -0,0 +1,32 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "stt",
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
}),
|
||||
});
|
||||
|
||||
exe.linkLibC();
|
||||
exe.addIncludePath(b.path("vosk-linux-x86_64-0.3.45"));
|
||||
exe.addLibraryPath(b.path("vosk-linux-x86_64-0.3.45"));
|
||||
exe.linkSystemLibrary("vosk");
|
||||
exe.linkSystemLibrary("asound");
|
||||
|
||||
b.installArtifact(exe);
|
||||
|
||||
const run_step = b.step("run", "Run the app");
|
||||
const run_cmd = b.addRunArtifact(exe);
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
|
||||
if (b.args) |args| {
|
||||
run_cmd.addArgs(args);
|
||||
}
|
||||
}
|
81
build.zig.zon
Normal file
81
build.zig.zon
Normal file
|
@ -0,0 +1,81 @@
|
|||
.{
|
||||
// This is the default name used by packages depending on this one. For
|
||||
// example, when a user runs `zig fetch --save <url>`, this field is used
|
||||
// as the key in the `dependencies` table. Although the user can choose a
|
||||
// different name, most users will stick with this provided value.
|
||||
//
|
||||
// It is redundant to include "zig" in this name because it is already
|
||||
// within the Zig package namespace.
|
||||
.name = ._1_stt_2,
|
||||
// This is a [Semantic Version](https://semver.org/).
|
||||
// In a future version of Zig it will be used for package deduplication.
|
||||
.version = "0.0.0",
|
||||
// Together with name, this represents a globally unique package
|
||||
// identifier. This field is generated by the Zig toolchain when the
|
||||
// package is first created, and then *never changes*. This allows
|
||||
// unambiguous detection of one package being an updated version of
|
||||
// another.
|
||||
//
|
||||
// When forking a Zig project, this id should be regenerated (delete the
|
||||
// field and run `zig build`) if the upstream project is still maintained.
|
||||
// Otherwise, the fork is *hostile*, attempting to take control over the
|
||||
// original project's identity. Thus it is recommended to leave the comment
|
||||
// on the following line intact, so that it shows up in code reviews that
|
||||
// modify the field.
|
||||
.fingerprint = 0xe6cb5784eea38627, // Changing this has security and trust implications.
|
||||
// Tracks the earliest Zig version that the package considers to be a
|
||||
// supported use case.
|
||||
.minimum_zig_version = "0.15.1",
|
||||
// This field is optional.
|
||||
// Each dependency must either provide a `url` and `hash`, or a `path`.
|
||||
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
|
||||
// Once all dependencies are fetched, `zig build` no longer requires
|
||||
// internet connectivity.
|
||||
.dependencies = .{
|
||||
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
|
||||
//.example = .{
|
||||
// // When updating this field to a new URL, be sure to delete the corresponding
|
||||
// // `hash`, otherwise you are communicating that you expect to find the old hash at
|
||||
// // the new URL. If the contents of a URL change this will result in a hash mismatch
|
||||
// // which will prevent zig from using it.
|
||||
// .url = "https://example.com/foo.tar.gz",
|
||||
//
|
||||
// // This is computed from the file contents of the directory of files that is
|
||||
// // obtained after fetching `url` and applying the inclusion rules given by
|
||||
// // `paths`.
|
||||
// //
|
||||
// // This field is the source of truth; packages do not come from a `url`; they
|
||||
// // come from a `hash`. `url` is just one of many possible mirrors for how to
|
||||
// // obtain a package matching this `hash`.
|
||||
// //
|
||||
// // Uses the [multihash](https://multiformats.io/multihash/) format.
|
||||
// .hash = "...",
|
||||
//
|
||||
// // When this is provided, the package is found in a directory relative to the
|
||||
// // build root. In this case the package's hash is irrelevant and therefore not
|
||||
// // computed. This field and `url` are mutually exclusive.
|
||||
// .path = "foo",
|
||||
//
|
||||
// // When this is set to `true`, a package is declared to be lazily
|
||||
// // fetched. This makes the dependency only get fetched if it is
|
||||
// // actually used.
|
||||
// .lazy = false,
|
||||
//},
|
||||
},
|
||||
// Specifies the set of files and directories that are included in this package.
|
||||
// Only files and directories listed here are included in the `hash` that
|
||||
// is computed for this package. Only files listed here will remain on disk
|
||||
// when using the zig package manager. As a rule of thumb, one should list
|
||||
// files required for compilation plus any license(s).
|
||||
// Paths are relative to the build root. Use the empty string (`""`) to refer to
|
||||
// the build root itself.
|
||||
// A directory listed here means that all files within, recursively, are included.
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
"build.zig.zon",
|
||||
"src",
|
||||
// For example...
|
||||
//"LICENSE",
|
||||
//"README.md",
|
||||
},
|
||||
}
|
61
flake.lock
generated
Normal file
61
flake.lock
generated
Normal file
|
@ -0,0 +1,61 @@
|
|||
{
|
||||
"nodes": {
|
||||
"flake-utils": {
|
||||
"inputs": {
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1731533236,
|
||||
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1757347588,
|
||||
"narHash": "sha256-tLdkkC6XnsY9EOZW9TlpesTclELy8W7lL2ClL+nma8o=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "b599843bad24621dcaa5ab60dac98f9b0eb1cabe",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"flake-utils": "flake-utils",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
32
flake.nix
Normal file
32
flake.nix
Normal file
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
description = "Vosk speech recognition development environment";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, flake-utils }:
|
||||
flake-utils.lib.eachDefaultSystem (system:
|
||||
let
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
in
|
||||
{
|
||||
devShells.default = pkgs.mkShell {
|
||||
buildInputs = with pkgs; [
|
||||
clang
|
||||
llvm
|
||||
cmake
|
||||
pkg-config
|
||||
zlib
|
||||
alsa-lib
|
||||
alsa-plugins
|
||||
SDL2
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
export ALSA_PLUGIN_DIR=${pkgs.alsa-plugins}/lib/alsa-lib
|
||||
'';
|
||||
};
|
||||
});
|
||||
}
|
91
src/main.zig
Normal file
91
src/main.zig
Normal file
|
@ -0,0 +1,91 @@
|
|||
const std = @import("std");
|
||||
const c = @cImport({
|
||||
@cInclude("vosk_api.h");
|
||||
@cInclude("alsa/asoundlib.h");
|
||||
});
|
||||
|
||||
const SAMPLE_RATE = 16000;
|
||||
const BUFFER_SIZE = 4000;
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
|
||||
// Initialize Vosk
|
||||
c.vosk_set_log_level(-1);
|
||||
const model = c.vosk_model_new("vosk-model-small-en-us-0.15");
|
||||
if (model == null) {
|
||||
std.debug.print("Failed to load model\n", .{});
|
||||
return;
|
||||
}
|
||||
defer c.vosk_model_free(model);
|
||||
|
||||
const rec = c.vosk_recognizer_new(model, SAMPLE_RATE);
|
||||
if (rec == null) {
|
||||
std.debug.print("Failed to create recognizer\n", .{});
|
||||
return;
|
||||
}
|
||||
defer c.vosk_recognizer_free(rec);
|
||||
|
||||
// Try to open default capture device
|
||||
var handle: ?*c.snd_pcm_t = null;
|
||||
var err = c.snd_pcm_open(&handle, "default", c.SND_PCM_STREAM_CAPTURE, c.SND_PCM_NONBLOCK);
|
||||
if (err < 0) {
|
||||
std.debug.print("Cannot open default audio device: {s}\n", .{c.snd_strerror(err)});
|
||||
std.debug.print("Make sure no other applications are using the microphone\n", .{});
|
||||
return;
|
||||
}
|
||||
defer _ = c.snd_pcm_close(handle);
|
||||
|
||||
// Set to blocking mode
|
||||
err = c.snd_pcm_nonblock(handle, 0);
|
||||
if (err < 0) {
|
||||
std.debug.print("Cannot set blocking mode: {s}\n", .{c.snd_strerror(err)});
|
||||
return;
|
||||
}
|
||||
|
||||
// Configure audio - try simple parameters first
|
||||
err = c.snd_pcm_set_params(handle, c.SND_PCM_FORMAT_S16_LE, c.SND_PCM_ACCESS_RW_INTERLEAVED, 1, SAMPLE_RATE, 1, 100000);
|
||||
if (err < 0) {
|
||||
std.debug.print("Cannot configure audio: {s}\n", .{c.snd_strerror(err)});
|
||||
return;
|
||||
}
|
||||
|
||||
std.debug.print("Audio configured successfully\n", .{});
|
||||
std.debug.print("Listening... (Ctrl+C to exit)\n", .{});
|
||||
|
||||
var buffer: [BUFFER_SIZE]i16 = undefined;
|
||||
var frame_count: u32 = 0;
|
||||
|
||||
while (true) {
|
||||
const frames = c.snd_pcm_readi(handle, &buffer, BUFFER_SIZE / 2);
|
||||
if (frames < 0) {
|
||||
std.debug.print("Audio read error: {s}\n", .{c.snd_strerror(@intCast(frames))});
|
||||
err = c.snd_pcm_recover(handle, @intCast(frames), 0);
|
||||
if (err < 0) {
|
||||
std.debug.print("Cannot recover from error: {s}\n", .{c.snd_strerror(err)});
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
frame_count += 1;
|
||||
if (frame_count % 50 == 0) {
|
||||
// Show we're getting audio data
|
||||
var max_sample: u16 = 0;
|
||||
for (0..@intCast(frames)) |i| {
|
||||
const abs_sample = @abs(buffer[i]);
|
||||
if (abs_sample > max_sample) {
|
||||
max_sample = abs_sample;
|
||||
}
|
||||
}
|
||||
std.debug.print("Audio: {} frames, max level: {}\n", .{ frames, max_sample });
|
||||
}
|
||||
|
||||
const result = c.vosk_recognizer_accept_waveform(rec, @ptrCast(&buffer), @intCast(frames * 2));
|
||||
if (result != 0) {
|
||||
const text = c.vosk_recognizer_result(rec);
|
||||
std.debug.print("RECOGNIZED: {s}\n", .{text});
|
||||
}
|
||||
}
|
||||
}
|
23
src/root.zig
Normal file
23
src/root.zig
Normal file
|
@ -0,0 +1,23 @@
|
|||
//! By convention, root.zig is the root source file when making a library.
|
||||
const std = @import("std");
|
||||
|
||||
pub fn bufferedPrint() !void {
|
||||
// Stdout is for the actual output of your application, for example if you
|
||||
// are implementing gzip, then only the compressed bytes should be sent to
|
||||
// stdout, not any debugging messages.
|
||||
var stdout_buffer: [1024]u8 = undefined;
|
||||
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||
const stdout = &stdout_writer.interface;
|
||||
|
||||
try stdout.print("Run `zig build test` to run the tests.\n", .{});
|
||||
|
||||
try stdout.flush(); // Don't forget to flush!
|
||||
}
|
||||
|
||||
pub fn add(a: i32, b: i32) i32 {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
test "basic add functionality" {
|
||||
try std.testing.expect(add(3, 7) == 10);
|
||||
}
|
Loading…
Add table
Reference in a new issue