Skip to main content
The SpeechRecognition library supports multiple languages through various recognition engines. Each engine has different language capabilities and uses different language code formats.

Quick Start

Most recognition engines accept a language parameter with BCP-47 language tags:
import speech_recognition as sr

r = sr.Recognizer()

with sr.Microphone() as source:
    print("Parlez maintenant!")  # "Speak now!" in French
    audio = r.listen(source)

# Recognize French speech
text = r.recognize_google(audio, language="fr-FR")
print(f"Vous avez dit: {text}")

Language Codes

Language codes follow the BCP-47 standard: language-REGION Common Language Codes:
LanguageCodeExample
English (US)en-USDefault for most engines
English (UK)en-GBBritish English
Spanish (Spain)es-ESEuropean Spanish
Spanish (Mexico)es-MXLatin American Spanish
French (France)fr-FRStandard French
French (Canada)fr-CACanadian French
Germande-DEStandard German
Italianit-ITStandard Italian
Portuguese (Brazil)pt-BRBrazilian Portuguese
Portuguese (Portugal)pt-PTEuropean Portuguese
Russianru-RUStandard Russian
Japaneseja-JPStandard Japanese
Chinese (Mandarin)zh-CNSimplified Chinese
Chinese (Traditional)zh-TWTraditional Chinese
Koreanko-KRStandard Korean
Arabicar-SASaudi Arabian Arabic
Hindihi-INIndian Hindi
Dutchnl-NLStandard Dutch
Language support varies by recognition engine. Check the specific engine documentation for the complete list of supported languages.

Google Speech Recognition

Google Speech Recognition supports the most languages:
import speech_recognition as sr

r = sr.Recognizer()

with sr.Microphone() as source:
    audio = r.listen(source)

# English (default)
text_en = r.recognize_google(audio)  # Defaults to en-US

# Spanish
text_es = r.recognize_google(audio, language="es-ES")

# French
text_fr = r.recognize_google(audio, language="fr-FR")

# German
text_de = r.recognize_google(audio, language="de-DE")

# Japanese
text_ja = r.recognize_google(audio, language="ja-JP")

# Chinese (Mandarin)
text_zh = r.recognize_google(audio, language="zh-CN")

Example with Different Languages

import speech_recognition as sr
from os import path

r = sr.Recognizer()

# English audio
with sr.AudioFile("english.wav") as source:
    audio = r.record(source)
print("English:", r.recognize_google(audio, language="en-US"))

# French audio  
with sr.AudioFile("french.aiff") as source:
    audio = r.record(source)
print("French:", r.recognize_google(audio, language="fr-FR"))

# Chinese audio
with sr.AudioFile("chinese.flac") as source:
    audio = r.record(source)
print("Chinese:", r.recognize_google(audio, language="zh-CN"))
Google Speech Recognition supports over 100 languages. See the full list on StackOverflow.

Microsoft Azure Speech

Azure Speech supports 90+ languages with BCP-47 tags:
import speech_recognition as sr

r = sr.Recognizer()
AZURE_KEY = "your-azure-speech-api-key"

with sr.Microphone() as source:
    audio = r.listen(source)

# Spanish (Spain)
text_es = r.recognize_azure(
    audio,
    key=AZURE_KEY,
    language="es-ES"
)

# German
text_de = r.recognize_azure(
    audio,
    key=AZURE_KEY,
    language="de-DE"
)

# Arabic
text_ar = r.recognize_azure(
    audio,
    key=AZURE_KEY,
    language="ar-SA"
)

Azure Location Parameter

Azure also requires a location parameter:
text = r.recognize_azure(
    audio,
    key=AZURE_KEY,
    language="en-US",
    location="westus"  # Azure region
)

Microsoft Bing Speech

Bing Speech supports 30+ languages:
import speech_recognition as sr

r = sr.Recognizer()
BING_KEY = "your-bing-api-key"

with sr.Microphone() as source:
    audio = r.listen(source)

# Italian
text_it = r.recognize_bing(audio, key=BING_KEY, language="it-IT")

# Russian
text_ru = r.recognize_bing(audio, key=BING_KEY, language="ru-RU")

# Portuguese (Brazil)
text_pt = r.recognize_bing(audio, key=BING_KEY, language="pt-BR")

IBM Speech to Text

IBM supports 20+ languages with RFC5646 tags:
import speech_recognition as sr

r = sr.Recognizer()
IBM_KEY = "your-ibm-api-key"

with sr.Microphone() as source:
    audio = r.listen(source)

# US English (default)
text_en = r.recognize_ibm(audio, key=IBM_KEY, language="en-US")

# UK English
text_en_gb = r.recognize_ibm(audio, key=IBM_KEY, language="en-GB")

# Japanese
text_ja = r.recognize_ibm(audio, key=IBM_KEY, language="ja-JP")

# Korean
text_ko = r.recognize_ibm(audio, key=IBM_KEY, language="ko-KR")
IBM language codes use the format LANGUAGE_BroadbandModel. See the IBM documentation for details.

Whisper (Local)

Whisper supports 99 languages using language names:
import speech_recognition as sr

r = sr.Recognizer()

with sr.Microphone() as source:
    audio = r.listen(source)

# English (using language name, not code)
text_en = r.recognize_whisper(audio, language="english")

# Spanish
text_es = r.recognize_whisper(audio, language="spanish")

# French  
text_fr = r.recognize_whisper(audio, language="french")

# German
text_de = r.recognize_whisper(audio, language="german")

# Japanese
text_ja = r.recognize_whisper(audio, language="japanese")

# Auto-detect language (slower but accurate)
text_auto = r.recognize_whisper(audio)  # No language parameter

Whisper Language Names

Whisper uses language names (“english”, “spanish”), not BCP-47 codes (“en-US”, “es-ES”).
Common Whisper Languages:
  • "english" - English
  • "spanish" - Spanish
  • "french" - French
  • "german" - German
  • "italian" - Italian
  • "portuguese" - Portuguese
  • "russian" - Russian
  • "japanese" - Japanese
  • "chinese" - Chinese
  • "korean" - Korean
  • "arabic" - Arabic
  • "hindi" - Hindi

Whisper API (OpenAI)

OpenAI’s Whisper API uses ISO 639-1 codes:
import speech_recognition as sr
import os

r = sr.Recognizer()
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"

with sr.Microphone() as source:
    audio = r.listen(source)

# English (ISO 639-1 code)
text_en = r.recognize_openai(audio, language="en")

# Spanish
text_es = r.recognize_openai(audio, language="es")

# French
text_fr = r.recognize_openai(audio, language="fr")

# Auto-detect (no language parameter)
text_auto = r.recognize_openai(audio)

Multi-Language Recognition

Try multiple languages to find the best match:
import speech_recognition as sr

r = sr.Recognizer()

with sr.Microphone() as source:
    print("Speak in any language...")
    audio = r.listen(source)

# Try multiple languages
languages = [
    ("en-US", "English"),
    ("es-ES", "Spanish"),
    ("fr-FR", "French"),
    ("de-DE", "German"),
]

for code, name in languages:
    try:
        text = r.recognize_google(audio, language=code)
        print(f"{name}: {text}")
    except sr.UnknownValueError:
        print(f"{name}: [unintelligible]")
    except sr.RequestError as e:
        print(f"{name}: [error - {e}]")

Language-Specific Optimizations

Adjust for Language Characteristics

Different languages may need different settings:
import speech_recognition as sr

r = sr.Recognizer()

# For tonal languages (Chinese, Vietnamese)
r.pause_threshold = 0.6  # Shorter pauses
r.phrase_threshold = 0.2  # Shorter minimum phrase

# For languages with longer words (German, Finnish)
r.pause_threshold = 1.0  # Longer pauses
r.phrase_threshold = 0.4  # Longer minimum phrase

Example: Multi-Language Voice Assistant

import speech_recognition as sr

class MultiLanguageAssistant:
    def __init__(self):
        self.r = sr.Recognizer()
        self.m = sr.Microphone()
        self.current_language = "en-US"
        
    def set_language(self, language_code):
        """Set the recognition language"""
        self.current_language = language_code
        print(f"Language set to {language_code}")
        
    def listen_and_recognize(self):
        """Listen and recognize in current language"""
        with self.m as source:
            self.r.adjust_for_ambient_noise(source)
            print(f"Listening in {self.current_language}...")
            audio = self.r.listen(source)
        
        try:
            text = self.r.recognize_google(
                audio, 
                language=self.current_language
            )
            return text
        except sr.UnknownValueError:
            return None
        except sr.RequestError as e:
            print(f"Error: {e}")
            return None

# Usage
assistant = MultiLanguageAssistant()

# English mode
assistant.set_language("en-US")
print("You said:", assistant.listen_and_recognize())

# Spanish mode
assistant.set_language("es-ES")  
print("Dijiste:", assistant.listen_and_recognize())

# French mode
assistant.set_language("fr-FR")
print("Vous avez dit:", assistant.listen_and_recognize())

Engine Language Support Comparison

EngineLanguagesCode FormatNotes
Google100+BCP-47 (en-US)Most comprehensive
Azure90+BCP-47 (en-US)Requires location
Bing30+BCP-47 (en-US)Legacy API
IBM20+RFC5646 (en-US)Good accuracy
Whisper99Language names (“english”)Local processing
Whisper API99ISO 639-1 (en)Cloud-based
SphinxLimitedLanguage-specific modelsOffline only
HoundifyEnglish onlyN/AEnglish-only

Troubleshooting

If the wrong language is being recognized:
  1. Explicitly set the language parameter:
text = r.recognize_google(audio, language="es-ES")
  1. Ensure you’re using the correct code format for your engine
  2. Try a different regional variant:
# Try both variants
text1 = r.recognize_google(audio, language="es-ES")  # Spain
text2 = r.recognize_google(audio, language="es-MX")  # Mexico
If recognition quality is poor for non-English languages:
  1. Use a higher-quality microphone
  2. Adjust for ambient noise:
with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source, duration=2)
    audio = r.listen(source)
  1. Try a different recognition engine
  2. Use Whisper for better multilingual support:
text = r.recognize_whisper(audio, language="spanish")
Some engines have different parameter names:
# Google - uses 'language'
r.recognize_google(audio, language="fr-FR")

# Whisper - uses 'language' with names
r.recognize_whisper(audio, language="french")

# Sphinx - uses language-specific models
r.recognize_sphinx(audio)  # Model determines language
If you get an unsupported language error:
  1. Check the engine’s documentation for supported languages
  2. Verify the language code format is correct
  3. Try a different engine with better language support
  4. Use Whisper for the widest language coverage

Complete Multi-Language Example

#!/usr/bin/env python3
import speech_recognition as sr

def recognize_multi_language(audio_file, languages):
    """
    Recognize speech in multiple languages.
    
    Args:
        audio_file: Path to audio file
        languages: Dict of {language_code: language_name}
    """
    r = sr.Recognizer()
    
    with sr.AudioFile(audio_file) as source:
        audio = r.record(source)
    
    print(f"\nRecognizing: {audio_file}\n")
    
    for code, name in languages.items():
        try:
            text = r.recognize_google(audio, language=code)
            print(f"{name} ({code}): {text}")
        except sr.UnknownValueError:
            print(f"{name} ({code}): [unintelligible]")
        except sr.RequestError as e:
            print(f"{name} ({code}): [error - {e}]")

# Test with multiple languages
languages = {
    "en-US": "English (US)",
    "en-GB": "English (UK)",
    "es-ES": "Spanish",
    "fr-FR": "French",
    "de-DE": "German",
    "ja-JP": "Japanese",
    "zh-CN": "Chinese",
}

recognize_multi_language("audio.wav", languages)

API Reference

Language Parameters by Engine

# Google Speech Recognition
r.recognize_google(audio, language="en-US")

# Azure Speech
r.recognize_azure(audio, key=KEY, language="en-US", location="westus")

# Bing Speech  
r.recognize_bing(audio, key=KEY, language="en-US")

# IBM Speech to Text
r.recognize_ibm(audio, key=KEY, language="en-US")

# Whisper (local)
r.recognize_whisper(audio, language="english")

# Whisper API (OpenAI)
r.recognize_openai(audio, language="en")

# Wit.ai (language set in app settings)
r.recognize_wit(audio, key=KEY)

See Also