Overview
LiteLLM provides standardized exception types that mirror OpenAI’s exception structure while adding provider-specific information and retry tracking.
Base Exception Hierarchy
All LiteLLM exceptions inherit from their OpenAI counterparts:
from litellm import (
AuthenticationError,
BadRequestError,
NotFoundError,
RateLimitError,
Timeout,
APIError,
ServiceUnavailableError,
InternalServerError
)
Exception Types
AuthenticationError
Raised when authentication fails (status code: 401).
Attributes
Error message with litellm prefix
Provider that raised the error (e.g., “openai”, “anthropic”)
Model that was being called
Number of retries attempted
Maximum retries configured
Example
import litellm
from litellm import AuthenticationError
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
api_key="invalid-key"
)
except AuthenticationError as e:
print(f"Status: {e.status_code}") # 401
print(f"Provider: {e.llm_provider}") # openai
print(f"Model: {e.model}") # gpt-4
print(f"Message: {e.message}")
BadRequestError
Raised for invalid requests (status code: 400).
Attributes
Same as AuthenticationError, plus:
Response body containing error details
Example
import litellm
from litellm import BadRequestError
try:
response = litellm.completion(
model="gpt-4",
messages="invalid message format" # Should be a list
)
except BadRequestError as e:
print(f"Error: {e.message}")
print(f"Body: {e.body}")
NotFoundError
Raised when the requested resource/model is not found (status code: 404).
Example
import litellm
from litellm import NotFoundError
try:
response = litellm.completion(
model="gpt-nonexistent",
messages=[{"role": "user", "content": "Hello"}]
)
except NotFoundError as e:
print(f"Model not found: {e.model}")
print(f"Provider: {e.llm_provider}")
RateLimitError
Raised when rate limits are exceeded (status code: 429).
Attributes
Includes all base attributes, plus provider-specific rate limit headers in the response.
Example
import litellm
from litellm import RateLimitError
import time
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except RateLimitError as e:
print(f"Rate limit hit: {e.message}")
print(f"Provider: {e.llm_provider}")
# Check retry-after header if available
if hasattr(e.response, 'headers'):
retry_after = e.response.headers.get('retry-after')
if retry_after:
print(f"Retry after: {retry_after} seconds")
time.sleep(int(retry_after))
Timeout
Raised when a request times out (status code: 408 or provider-specific).
Example
import litellm
from litellm import Timeout
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
timeout=0.001 # Very short timeout
)
except Timeout as e:
print(f"Request timed out: {e.message}")
print(f"Retries attempted: {e.num_retries}")
ContextWindowExceededError
Subclass of BadRequestError. Raised when the input exceeds the model’s context window.
Example
import litellm
from litellm import ContextWindowExceededError
try:
# Very long messages that exceed context window
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "...very long text..."}]
)
except ContextWindowExceededError as e:
print(f"Context window exceeded for {e.model}")
print(f"Try using a model with larger context window")
ContentPolicyViolationError
Subclass of BadRequestError. Raised when content violates provider’s safety policies.
Example
import litellm
from litellm import ContentPolicyViolationError
try:
response = litellm.image_generation(
prompt="Inappropriate content",
model="dall-e-3"
)
except ContentPolicyViolationError as e:
print(f"Content policy violation: {e.message}")
print(f"Provider: {e.llm_provider}")
ServiceUnavailableError
Raised when the provider’s service is unavailable (status code: 503).
Example
import litellm
from litellm import ServiceUnavailableError
import time
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except ServiceUnavailableError as e:
print(f"Service unavailable: {e.llm_provider}")
print("Retrying in 5 seconds...")
time.sleep(5)
InternalServerError
Raised for server-side errors (status code: 500).
Example
import litellm
from litellm import InternalServerError
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except InternalServerError as e:
print(f"Server error from {e.llm_provider}")
print(f"Retries: {e.num_retries}/{e.max_retries}")
APIError
Generic API error for various status codes.
Example
import litellm
from litellm import APIError
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except APIError as e:
print(f"API error {e.status_code}: {e.message}")
APIConnectionError
Raised when connection to the API fails.
Example
import litellm
from litellm import APIConnectionError
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
api_base="https://invalid-endpoint.com"
)
except APIConnectionError as e:
print(f"Connection failed: {e.message}")
Error Handling Patterns
Basic Error Handling
import litellm
from litellm import (
AuthenticationError,
RateLimitError,
ContextWindowExceededError,
ServiceUnavailableError
)
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except AuthenticationError:
print("Check your API key")
except RateLimitError:
print("Rate limit exceeded, slow down")
except ContextWindowExceededError:
print("Input too long, try shorter prompt")
except ServiceUnavailableError:
print("Service temporarily unavailable")
except Exception as e:
print(f"Unexpected error: {e}")
Retry with Exponential Backoff
import litellm
from litellm import RateLimitError, ServiceUnavailableError
import time
def completion_with_retry(max_retries=3):
for attempt in range(max_retries):
try:
return litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except (RateLimitError, ServiceUnavailableError) as e:
if attempt == max_retries - 1:
raise
wait_time = 2 ** attempt # Exponential backoff
print(f"Retry {attempt + 1}/{max_retries} after {wait_time}s")
time.sleep(wait_time)
raise Exception("Max retries exceeded")
Fallback to Different Model
import litellm
from litellm import RateLimitError, ServiceUnavailableError
def completion_with_fallback(models=["gpt-4", "gpt-3.5-turbo", "claude-2"]):
for model in models:
try:
return litellm.completion(
model=model,
messages=[{"role": "user", "content": "Hello"}]
)
except (RateLimitError, ServiceUnavailableError) as e:
print(f"{model} failed: {e.message}")
if model == models[-1]: # Last model
raise
print(f"Trying fallback: {models[models.index(model) + 1]}")
raise Exception("All models failed")
Handle Context Window Errors
import litellm
from litellm import ContextWindowExceededError
def smart_completion(messages, models=[("gpt-3.5-turbo", 4096), ("gpt-3.5-turbo-16k", 16384)]):
for model, _ in models:
try:
return litellm.completion(
model=model,
messages=messages
)
except ContextWindowExceededError:
if model == models[-1][0]: # Last model
raise
print(f"Context too large for {model}, trying larger model")
raise Exception("Content exceeds all available context windows")
Logging All Errors
import litellm
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except Exception as e:
# All LiteLLM exceptions have these attributes
if hasattr(e, 'llm_provider'):
logger.error(
f"Error from {e.llm_provider}",
extra={
"provider": e.llm_provider,
"model": getattr(e, 'model', None),
"status_code": getattr(e, 'status_code', None),
"num_retries": getattr(e, 'num_retries', None),
}
)
raise
Using Router with Retries
from litellm import Router
from litellm import RateLimitError, Timeout
router = Router(
model_list=[...],
# Automatic retries on failure
num_retries=3,
# Custom retry policy per error type
retry_policy={
"RateLimitError": {"max_retries": 5},
"Timeout": {"max_retries": 2},
"InternalServerError": {"max_retries": 3}
},
# Fallbacks
fallbacks=[
{"gpt-4": ["gpt-3.5-turbo", "claude-2"]}
]
)
# Router handles retries and fallbacks automatically
response = router.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
Common Exception Scenarios
Scenario 1: API Key Issues
from litellm import AuthenticationError, PermissionDeniedError
try:
response = litellm.completion(model="gpt-4", messages=[...])
except AuthenticationError:
print("Invalid API key")
except PermissionDeniedError:
print("API key lacks necessary permissions")
Scenario 2: Rate Limiting
from litellm import RateLimitError
import time
try:
response = litellm.completion(model="gpt-4", messages=[...])
except RateLimitError as e:
# Check for retry-after header
headers = getattr(e.response, 'headers', {})
retry_after = headers.get('retry-after', 60)
print(f"Rate limited. Retry after {retry_after}s")
time.sleep(int(retry_after))
Scenario 3: Model Overloaded
from litellm import ServiceUnavailableError, InternalServerError
try:
response = litellm.completion(model="gpt-4", messages=[...])
except (ServiceUnavailableError, InternalServerError) as e:
print(f"Provider temporarily unavailable: {e.llm_provider}")
# Implement exponential backoff or use fallback
Best Practices
- Always catch specific exceptions rather than generic Exception
- Use Router for automatic retries and fallbacks in production
- Implement exponential backoff for rate limit errors
- Log errors with context including provider, model, and retry count
- Have fallback models configured for critical paths
- Respect retry-after headers from providers
- Monitor error rates to detect systemic issues