import { initWhisperVad } from 'whisper.rn'const vadContext = await initWhisperVad({ filePath: require('./assets/ggml-silero-v6.2.0.bin'), // VAD model file useGpu: true, // Use GPU acceleration (iOS only) nThreads: 4, // Number of threads for processing})
type VadContextOptions = { filePath: string | number // Path to VAD model or require() asset isBundleAsset?: boolean // Is filePath a bundle asset (for string paths) useGpu?: boolean // Use GPU acceleration (iOS only, default: true) nThreads?: number // Processing threads (default: 2 for 4-core, 4 for 8+)}
Detect speech in WAV files, base64 audio, or bundled assets:
const segments = await vadContext.detectSpeech( 'file:///path/to/audio.wav', { threshold: 0.5, // Speech probability threshold (0.0-1.0) minSpeechDurationMs: 250, // Minimum speech duration in ms minSilenceDurationMs: 100, // Minimum silence duration in ms maxSpeechDurationS: 30, // Maximum speech duration in seconds speechPadMs: 30, // Padding around speech segments in ms samplesOverlap: 0.1, // Overlap between analysis windows })for (const segment of segments) { console.log(`Speech: ${segment.t0}ms - ${segment.t1}ms`)}
type VadOptions = { threshold?: number // Probability threshold (0.0-1.0, default: 0.5) minSpeechDurationMs?: number // Min speech duration in ms (default: 250) minSilenceDurationMs?: number // Min silence to end speech in ms (default: 100) maxSpeechDurationS?: number // Max continuous speech in seconds (default: 30) speechPadMs?: number // Padding before/after speech in ms (default: 30) samplesOverlap?: number // Analysis window overlap (0.0-1.0, default: 0.1)}
// Release single VAD contextawait vadContext.release()// Release all VAD contextsimport { releaseAllWhisperVad } from 'whisper.rn'await releaseAllWhisperVad()
Always release VAD contexts when done to free native resources.