Use the stream() method to get an iterator of audio chunks:
from elevenlabs import streamfrom elevenlabs.client import ElevenLabsclient = ElevenLabs( api_key="YOUR_API_KEY")audio_stream = client.text_to_speech.stream( text="This is a test of real-time streaming.", voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2")# Play the stream directlystream(audio_stream)
from elevenlabs import stream# Option 1: Use the stream() helper to play audio as it arrivesaudio_stream = client.text_to_speech.stream( text="Play audio in real-time.", voice_id="JBFqnCBsd6RMkjVDRZzb")stream(audio_stream)
audio_stream = client.text_to_speech.stream( text="Streaming in different formats.", voice_id="JBFqnCBsd6RMkjVDRZzb", output_format="mp3_22050_32" # Lower quality for faster streaming)stream(audio_stream)
Lower sample rates and bitrates reduce bandwidth and improve streaming speed but decrease audio quality.
from elevenlabs.play import saveaudio_stream = client.text_to_speech.stream( text="Stream and save simultaneously.", voice_id="JBFqnCBsd6RMkjVDRZzb")# The stream() function returns the complete audiocomplete_audio = stream(audio_stream)# Save the complete audiosave(complete_audio, "streamed_output.mp3")
audio_stream = client.text_to_speech.stream_with_timestamps( text="The first move is what sets everything in motion.", voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2", output_format="mp3_44100_128")for chunk in audio_stream: # Each chunk contains audio and character alignment data if hasattr(chunk, 'audio'): audio_bytes = chunk.audio alignment = chunk.alignment print(f"Audio chunk with {len(alignment)} alignment points")
For long streams, consider buffering to prevent memory issues:
chunks_buffer = []max_buffer_size = 100for chunk in audio_stream: chunks_buffer.append(chunk) if len(chunks_buffer) >= max_buffer_size: # Process or save buffered chunks process_chunks(chunks_buffer) chunks_buffer = []