import React, { useCallback, useEffect, useRef, useState } from 'react';
import { View, Text, Button, ScrollView, Switch } from 'react-native';
import RNFS from 'react-native-fs';
import { initWhisper, initWhisperVad } from 'whisper.rn';
import type { WhisperContext, WhisperVadContext } from 'whisper.rn';
import {
RealtimeTranscriber,
RingBufferVad,
VAD_PRESETS,
AudioPcmStreamAdapter,
type RealtimeTranscribeEvent,
type RealtimeVadEvent,
} from 'whisper.rn/realtime-transcription';
export default function RealtimeTranscription() {
const whisperContextRef = useRef<WhisperContext | null>(null);
const vadContextRef = useRef<WhisperVadContext | null>(null);
const transcriberRef = useRef<RealtimeTranscriber | null>(null);
const [logs, setLogs] = useState<string[]>([]);
const [currentText, setCurrentText] = useState<string>('');
const [isTranscribing, setIsTranscribing] = useState(false);
const [vadPreset, setVadPreset] = useState<keyof typeof VAD_PRESETS>('default');
const log = useCallback((...messages: any[]) => {
const timestamp = new Date().toLocaleTimeString();
setLogs((prev) => [...prev, `${timestamp}: ${messages.join(' ')}`]);
}, []);
useEffect(() => {
return () => {
whisperContextRef.current?.release();
vadContextRef.current?.release();
transcriberRef.current?.release();
};
}, []);
const initialize = async () => {
try {
log('Initializing contexts...');
// Initialize Whisper
const whisperCtx = await initWhisper({
filePath: require('../assets/ggml-base.bin'),
});
whisperContextRef.current = whisperCtx;
log('Whisper initialized');
// Initialize VAD
const vadCtx = await initWhisperVad({
filePath: require('../assets/ggml-silero-v6.2.0.bin'),
useGpu: true,
nThreads: 4,
});
vadContextRef.current = vadCtx;
log('VAD initialized');
} catch (error) {
log('Error initializing:', error);
}
};
const startTranscription = async () => {
if (!whisperContextRef.current || !vadContextRef.current) {
log('Contexts not initialized');
return;
}
try {
const audioStream = new AudioPcmStreamAdapter();
const vadWrapper = new RingBufferVad(vadContextRef.current, {
vadOptions: VAD_PRESETS[vadPreset],
vadPreset,
logger: (msg) => console.log(msg),
});
const transcriber = new RealtimeTranscriber(
{
whisperContext: whisperContextRef.current,
vadContext: vadWrapper,
audioStream,
fs: RNFS,
},
{
logger: (msg) => log(msg),
audioSliceSec: 30,
audioMinSec: 0.5,
maxSlicesInMemory: 3,
transcribeOptions: {
language: 'en',
maxLen: 1,
},
audioOutputPath: `${RNFS.DocumentDirectoryPath}/realtime.wav`,
},
{
onTranscribe: (event: RealtimeTranscribeEvent) => {
if (event.data?.result) {
log(`Transcribed: "${event.data.result.substring(0, 50)}..."`);
}
},
onVad: (event: RealtimeVadEvent) => {
if (event.type !== 'silence') {
log(`VAD: ${event.type}`);
}
},
onError: (error) => log('Error:', error),
onStatusChange: (isActive) => setIsTranscribing(isActive),
onSliceTranscriptionStabilized: (text) => setCurrentText(text),
}
);
transcriberRef.current = transcriber;
await transcriber.start();
log('Realtime transcription started');
} catch (error) {
log('Error starting transcription:', error);
}
};
const stopTranscription = async () => {
if (!transcriberRef.current) return;
try {
await transcriberRef.current.stop();
log('Transcription stopped');
} catch (error) {
log('Error stopping:', error);
}
};
return (
<ScrollView style={{ padding: 20 }}>
<Button title="Initialize" onPress={initialize} />
<View style={{ marginTop: 10 }}>
<Text>VAD Preset: {vadPreset}</Text>
<Button
title="Change VAD Preset"
onPress={() => {
const presets = Object.keys(VAD_PRESETS) as Array<keyof typeof VAD_PRESETS>;
const currentIndex = presets.indexOf(vadPreset);
const nextPreset = presets[(currentIndex + 1) % presets.length];
setVadPreset(nextPreset);
log(`Changed VAD preset to: ${nextPreset}`);
}}
/>
</View>
<View style={{ marginTop: 10 }}>
<Button
title={isTranscribing ? 'Stop' : 'Start Realtime'}
onPress={isTranscribing ? stopTranscription : startTranscription}
disabled={!whisperContextRef.current}
/>
</View>
{currentText && (
<View style={{ marginTop: 20, padding: 10, backgroundColor: '#e8f5e8' }}>
<Text style={{ fontWeight: 'bold' }}>Current Transcription:</Text>
<Text>{currentText}</Text>
</View>
)}
<View style={{ marginTop: 20 }}>
<Text style={{ fontWeight: 'bold' }}>Logs:</Text>
{logs.slice(-10).map((log, i) => (
<Text key={i} style={{ fontSize: 12 }}>{log}</Text>
))}
</View>
</ScrollView>
);
}