Overview
Health endpoints provide status information about the proxy and its connected services.
Proxy Health
GET /health
Check if the proxy is running and healthy.
Response
Health status. Values: "healthy", "unhealthy"
Database connection status.
List of healthy model deployments.
List of unhealthy model deployments.
Example
curl http://localhost:4000/health
Response:
{
"status" : "healthy" ,
"db" : "connected" ,
"redis" : "connected" ,
"litellm_version" : "1.50.0" ,
"healthy_endpoints" : [
{ "model" : "gpt-4" , "status" : "healthy" },
{ "model" : "claude-2" , "status" : "healthy" }
],
"unhealthy_endpoints" : []
}
import requests
response = requests.get( "http://localhost:4000/health" )
health = response.json()
print ( f "Proxy Status: { health[ 'status' ] } " )
print ( f "Version: { health[ 'litellm_version' ] } " )
print ( f "Healthy Models: { len (health.get( 'healthy_endpoints' , [])) } " )
Readiness Check
GET /health/readiness
Check if the proxy is ready to accept requests.
Response
Readiness status: "ready" or "not_ready"
Whether database is initialized.
Whether router is initialized.
Example
curl http://localhost:4000/health/readiness
Response:
{
"status" : "ready" ,
"db_initialized" : true ,
"router_initialized" : true
}
Liveness Check
GET /health/liveliness
Check if the proxy is alive (basic ping).
Response
Returns 200 OK if the proxy process is running.
Example
curl http://localhost:4000/health/liveliness
Model Health
GET /health/models
Get health status of all configured models.
Response
List of healthy model deployments. Last health check timestamp.
List of unhealthy model deployments.
Example
curl http://localhost:4000/health/models
Response:
{
"healthy" : [
{
"model" : "gpt-4" ,
"api_base" : "https://api.openai.com/v1" ,
"status" : "healthy" ,
"last_check" : "2024-01-15T10:30:00Z"
},
{
"model" : "claude-2" ,
"api_base" : "https://api.anthropic.com/v1" ,
"status" : "healthy" ,
"last_check" : "2024-01-15T10:30:00Z"
}
],
"unhealthy" : []
}
import requests
response = requests.get( "http://localhost:4000/health/models" )
health_data = response.json()
print ( "Healthy Models:" )
for model in health_data[ "healthy" ]:
print ( f " - { model[ 'model' ] } : { model[ 'status' ] } " )
if health_data[ "unhealthy" ]:
print ( " \n Unhealthy Models:" )
for model in health_data[ "unhealthy" ]:
print ( f " - { model[ 'model' ] } : { model[ 'status' ] } " )
Service Health
GET /health/services
Check health of connected services (database, Redis, etc.).
Response
Database connection status. Status: "connected" or "disconnected"
Connection latency in milliseconds.
Example
curl http://localhost:4000/health/services
Response:
{
"database" : {
"status" : "connected" ,
"latency_ms" : 2.5
},
"redis" : {
"status" : "connected" ,
"latency_ms" : 1.2
},
"router" : {
"status" : "initialized" ,
"models" : 10
}
}
Monitoring Integration
Kubernetes Probes
apiVersion : v1
kind : Pod
metadata :
name : litellm-proxy
spec :
containers :
- name : litellm
image : ghcr.io/berriai/litellm:main-latest
ports :
- containerPort : 4000
livenessProbe :
httpGet :
path : /health/liveliness
port : 4000
initialDelaySeconds : 30
periodSeconds : 10
readinessProbe :
httpGet :
path : /health/readiness
port : 4000
initialDelaySeconds : 10
periodSeconds : 5
Docker Health Check
FROM ghcr.io/berriai/litellm:main-latest
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:4000/health || exit 1
Monitoring Script
import requests
import time
import smtplib
from email.message import EmailMessage
PROXY_URL = "http://localhost:4000"
ALERT_EMAIL = "[email protected] "
def check_health ():
try :
response = requests.get( f " { PROXY_URL } /health" , timeout = 10 )
health = response.json()
if health[ "status" ] != "healthy" :
send_alert( f "Proxy unhealthy: { health } " )
return False
# Check for unhealthy models
if health.get( "unhealthy_endpoints" ):
models = [m[ "model" ] for m in health[ "unhealthy_endpoints" ]]
send_alert( f "Unhealthy models: { ', ' .join(models) } " )
return True
except requests.exceptions.RequestException as e:
send_alert( f "Health check failed: { e } " )
return False
def send_alert ( message ):
# Send email alert
msg = EmailMessage()
msg[ "Subject" ] = "LiteLLM Proxy Alert"
msg[ "From" ] = "[email protected] "
msg[ "To" ] = ALERT_EMAIL
msg.set_content(message)
# Send email (configure SMTP server)
# smtp = smtplib.SMTP(...)
# smtp.send_message(msg)
print ( f "ALERT: { message } " )
# Monitor continuously
while True :
check_health()
time.sleep( 60 ) # Check every minute
Prometheus Metrics
LiteLLM proxy exposes Prometheus metrics at /metrics:
curl http://localhost:4000/metrics
Metrics include:
Request counts
Request latencies
Error rates
Token usage
Cost tracking
Model health status
import requests
import json
# DataDog
def report_to_datadog ():
health = requests.get( "http://localhost:4000/health" ).json()
# Send to DataDog (configure DataDog client)
# datadog.statsd.gauge('litellm.health', 1 if health['status'] == 'healthy' else 0)
# datadog.statsd.gauge('litellm.healthy_models', len(health.get('healthy_endpoints', [])))
# Grafana Cloud
def report_to_grafana ():
health = requests.get( "http://localhost:4000/health" ).json()
# Send to Grafana (configure Grafana client)
pass
# New Relic
def report_to_newrelic ():
health = requests.get( "http://localhost:4000/health" ).json()
# Send to New Relic (configure New Relic client)
pass
Common Health Check Patterns
Basic Health Check
import requests
def is_proxy_healthy ():
try :
response = requests.get( "http://localhost:4000/health" , timeout = 5 )
return response.status_code == 200 and response.json()[ "status" ] == "healthy"
except :
return False
if is_proxy_healthy():
print ( "Proxy is healthy" )
else :
print ( "Proxy is unhealthy" )
Detailed Health Check
import requests
def detailed_health_check ():
response = requests.get( "http://localhost:4000/health" )
health = response.json()
print ( f "Status: { health[ 'status' ] } " )
print ( f "Version: { health[ 'litellm_version' ] } " )
if health.get( 'db' ):
print ( f "Database: { health[ 'db' ] } " )
if health.get( 'redis' ):
print ( f "Redis: { health[ 'redis' ] } " )
healthy_count = len (health.get( 'healthy_endpoints' , []))
unhealthy_count = len (health.get( 'unhealthy_endpoints' , []))
print ( f " \n Models:" )
print ( f " Healthy: { healthy_count } " )
print ( f " Unhealthy: { unhealthy_count } " )
if unhealthy_count > 0 :
print ( " \n Unhealthy Models:" )
for model in health[ 'unhealthy_endpoints' ]:
print ( f " - { model[ 'model' ] } : { model.get( 'error' , 'Unknown error' ) } " )
detailed_health_check()