Skip to main content

Overview

The useTextToSpeech hook manages a text-to-speech (TTS) model instance for converting text to natural-sounding speech audio. It supports both one-shot synthesis and streaming audio generation.

Import

import { useTextToSpeech } from 'react-native-executorch';

Hook Signature

const tts = useTextToSpeech({ model, voice, preventLoad }: TextToSpeechProps): TextToSpeechType

Parameters

model
KokoroConfig
required
Model configuration
voice
VoiceConfig
required
Voice configuration
preventLoad
boolean
default:"false"
If true, prevents automatic model loading when the hook mounts

Return Value

State Properties

isReady
boolean
Indicates whether the TTS model is loaded and ready to synthesize speech.
isGenerating
boolean
Indicates whether the model is currently generating audio.
downloadProgress
number
Download progress as a value between 0 and 1.
error
RnExecutorchError | null
Contains error details if the model fails to load or encounters an error.

Methods

forward
function
Synthesizes speech from text in a single pass.
forward(input: TextToSpeechInput): Promise<Float32Array>
Returns promise resolving to Float32Array of audio samples.
stream
function
Streams generated audio incrementally.
stream(input: TextToSpeechStreamingInput): Promise<void>
streamStop
function
Stops the currently active audio generation stream.
streamStop(): void

Usage Examples

Basic Text-to-Speech

import { useTextToSpeech } from 'react-native-executorch';
import { useState } from 'react';
import { Audio } from 'expo-av';

function TextToSpeechDemo() {
  const [text, setText] = useState('');
  const [sound, setSound] = useState<Audio.Sound | null>(null);
  
  const tts = useTextToSpeech({
    model: {
      type: 'kokoro',
      durationPredictorSource: 'https://example.com/duration.pte',
      synthesizerSource: 'https://example.com/synthesizer.pte',
    },
    voice: {
      lang: 'en-us',
      voiceSource: require('./voices/voice-en-us.bin'),
      extra: {
        taggerSource: require('./kokoro/tagger.bin'),
        lexiconSource: require('./kokoro/lexicon.bin'),
      },
    },
  });
  
  const speak = async () => {
    if (!tts.isReady || !text) return;
    
    try {
      const audioData = await tts.forward({ text });
      
      // Convert to playable audio
      const { sound: newSound } = await Audio.Sound.createAsync({
        uri: convertToWav(audioData),
      });
      
      setSound(newSound);
      await newSound.playAsync();
    } catch (error) {
      console.error('TTS failed:', error);
    }
  };
  
  return (
    <View>
      <Text>Status: {tts.isReady ? 'Ready' : 'Loading...'}</Text>
      
      <TextInput
        value={text}
        onChangeText={setText}
        placeholder="Enter text to speak..."
        multiline
      />
      
      <Button
        title="Speak"
        onPress={speak}
        disabled={!tts.isReady || tts.isGenerating}
      />
    </View>
  );
}

function convertToWav(audioData: Float32Array): string {
  // Convert Float32Array to WAV file
  // Implementation depends on your audio library
  return '';
}

Streaming TTS for Real-time Playback

import { useTextToSpeech } from 'react-native-executorch';
import { useState } from 'react';
import { AudioPlayer } from 'react-native-audio-streaming';

function StreamingTTS() {
  const [text, setText] = useState('');
  const [audioPlayer] = useState(() => new AudioPlayer());
  
  const tts = useTextToSpeech({
    model: {
      type: 'kokoro',
      durationPredictorSource: require('./models/duration.pte'),
      synthesizerSource: require('./models/synthesizer.pte'),
    },
    voice: {
      lang: 'en-us',
      voiceSource: require('./voices/voice.bin'),
      extra: {
        taggerSource: require('./kokoro/tagger.bin'),
        lexiconSource: require('./kokoro/lexicon.bin'),
      },
    },
  });
  
  const speakStreaming = async () => {
    if (!tts.isReady || !text) return;
    
    try {
      await tts.stream({
        text,
        speed: 1.0,
        onBegin: async () => {
          console.log('Starting audio stream...');
          await audioPlayer.start();
        },
        onNext: async (audioChunk) => {
          // Feed chunk to audio player
          await audioPlayer.writeAudio(audioChunk);
        },
        onEnd: async () => {
          console.log('Stream complete');
          await audioPlayer.finish();
        },
      });
    } catch (error) {
      console.error('Streaming TTS failed:', error);
    }
  };
  
  return (
    <View>
      <TextInput value={text} onChangeText={setText} />
      <Button title="Speak (Streaming)" onPress={speakStreaming} />
    </View>
  );
}

Variable Speech Speed

import { useTextToSpeech } from 'react-native-executorch';
import { useState } from 'react';
import Slider from '@react-native-community/slider';

function VariableSpeedTTS() {
  const [text, setText] = useState('');
  const [speed, setSpeed] = useState(1.0);
  
  const tts = useTextToSpeech({
    model: {
      type: 'kokoro',
      durationPredictorSource: 'https://example.com/duration.pte',
      synthesizerSource: 'https://example.com/synthesizer.pte',
    },
    voice: {
      lang: 'en-gb',
      voiceSource: 'https://example.com/voice-en-gb.bin',
    },
  });
  
  const speak = async () => {
    if (!tts.isReady || !text) return;
    
    try {
      const audioData = await tts.forward({ text, speed });
      // Play audio
      playAudio(audioData);
    } catch (error) {
      console.error('TTS failed:', error);
    }
  };
  
  return (
    <View>
      <TextInput value={text} onChangeText={setText} />
      
      <Text>Speed: {speed.toFixed(2)}x</Text>
      <Slider
        value={speed}
        onValueChange={setSpeed}
        minimumValue={0.5}
        maximumValue={2.0}
        step={0.1}
      />
      
      <Button title="Speak" onPress={speak} />
    </View>
  );
}

function playAudio(data: Float32Array) {
  // Implementation
}

Read-Along Text Highlighter

import { useTextToSpeech } from 'react-native-executorch';
import { useState } from 'react';

function ReadAlongApp() {
  const [text, setText] = useState('The quick brown fox jumps over the lazy dog.');
  const [highlightRange, setHighlightRange] = useState<[number, number] | null>(null);
  
  const tts = useTextToSpeech({
    model: {
      type: 'kokoro',
      durationPredictorSource: require('./models/duration.pte'),
      synthesizerSource: require('./models/synthesizer.pte'),
    },
    voice: {
      lang: 'en-us',
      voiceSource: require('./voices/voice.bin'),
    },
  });
  
  const readAloud = async () => {
    if (!tts.isReady || !text) return;
    
    const words = text.split(' ');
    let charPosition = 0;
    
    for (const word of words) {
      const start = charPosition;
      const end = start + word.length;
      
      setHighlightRange([start, end]);
      
      // Speak word
      const audioData = await tts.forward({ text: word });
      await playAndWait(audioData);
      
      charPosition = end + 1; // +1 for space
    }
    
    setHighlightRange(null);
  };
  
  const renderText = () => {
    if (!highlightRange) return <Text>{text}</Text>;
    
    const [start, end] = highlightRange;
    const before = text.substring(0, start);
    const highlight = text.substring(start, end);
    const after = text.substring(end);
    
    return (
      <Text>
        {before}
        <Text style={{ backgroundColor: 'yellow' }}>{highlight}</Text>
        {after}
      </Text>
    );
  };
  
  return (
    <View>
      <TextInput value={text} onChangeText={setText} />
      <Button title="Read Aloud" onPress={readAloud} />
      <View style={{ padding: 20 }}>
        {renderText()}
      </View>
    </View>
  );
}

function playAndWait(audio: Float32Array): Promise<void> {
  return new Promise((resolve) => {
    // Play audio and resolve when done
    setTimeout(resolve, 500); // Placeholder
  });
}

Notes

The model and voice assets automatically load when the hook mounts unless preventLoad is set to true.
For real-time playback, use the stream method with onNext callback to feed audio chunks to your audio player.
Adjust the speed parameter to create different voice characteristics - slower for emphasis, faster for quick narration.

Supported Languages

Currently supported:
  • 'en-us': American English
  • 'en-gb': British English

Performance Considerations

  • Text Length: Longer text takes more time to synthesize
  • Streaming: Use streaming for immediate playback start
  • Speed Parameter: Higher speeds generate audio faster but may affect quality

See Also

Build docs developers (and LLMs) love