Quick Start with Docker
The fastest way to run LiteLLM Proxy in production.
Pull the Image
docker pull ghcr.io/berriai/litellm:main-latest
Run with Docker
docker run -d \
--name litellm-proxy \
-p 4000:4000 \
-e OPENAI_API_KEY=sk-... \
ghcr.io/berriai/litellm:main-latest
Docker Compose Setup
For production deployments with PostgreSQL and Prometheus.
Create docker-compose.yml
services:
litellm:
image: ghcr.io/berriai/litellm:main-latest
ports:
- "4000:4000"
volumes:
- ./config.yaml:/app/config.yaml
environment:
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
STORE_MODEL_IN_DB: "True"
env_file:
- .env
depends_on:
db:
condition: service_healthy
healthcheck:
test:
- CMD-SHELL
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
command:
- "--config=/app/config.yaml"
db:
image: postgres:16
restart: always
container_name: litellm_db
environment:
POSTGRES_DB: litellm
POSTGRES_USER: llmproxy
POSTGRES_PASSWORD: dbpassword9090
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
interval: 1s
timeout: 5s
retries: 10
prometheus:
image: prom/prometheus
volumes:
- prometheus_data:/prometheus
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=15d"
restart: always
volumes:
prometheus_data:
driver: local
postgres_data:
name: litellm_postgres_data
Create config.yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
- model_name: gpt-4
litellm_params:
model: openai/gpt-4
api_key: os.environ/OPENAI_API_KEY
rpm: 480
timeout: 300
- model_name: claude-3-5-sonnet
litellm_params:
model: anthropic/claude-3-5-sonnet-20241022
api_key: os.environ/ANTHROPIC_API_KEY
litellm_settings:
drop_params: true
success_callback: ["prometheus"]
num_retries: 3
request_timeout: 600
telemetry: false
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
store_model_in_db: true
database_url: os.environ/DATABASE_URL
Create .env File
# Provider API Keys
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
# LiteLLM Settings
LITELLM_MASTER_KEY=sk-1234-change-this
DATABASE_URL=postgresql://llmproxy:dbpassword9090@db:5432/litellm
# Optional
REDIS_HOST=redis
REDIS_PORT=6379
REDIS_PASSWORD=your-redis-password
Never commit .env files to version control. Add .env to your .gitignore.
Create prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'litellm'
static_configs:
- targets: ['litellm:4000']
Start the Stack
Check Logs
docker-compose logs -f litellm
Wait for the message:Uvicorn running on http://0.0.0.0:4000
Verify Health
curl http://localhost:4000/health
Access UI
Open browser to http://localhost:4000/ui
Dockerfile Reference
The LiteLLM Dockerfile uses a multi-stage build process:
# Base images
ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/wolfi-base
ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/wolfi-base
# Builder stage
FROM $LITELLM_BUILD_IMAGE AS builder
WORKDIR /app
USER root
# Install build dependencies
RUN apk add --no-cache bash gcc py3-pip python3 python3-dev openssl openssl-dev
RUN python -m pip install build
# Copy source and build
COPY . .
# Build Admin UI
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && \
chmod +x docker/build_admin_ui.sh && \
./docker/build_admin_ui.sh
# Build Python package
RUN rm -rf dist/* && python -m build
RUN pip install dist/*.whl
RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
# Runtime stage
FROM $LITELLM_RUNTIME_IMAGE AS runtime
USER root
WORKDIR /app
# Install runtime dependencies
RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile
# Copy built artifacts
COPY --from=builder /app/dist/*.whl .
COPY --from=builder /wheels/ /wheels/
# Install package
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && \
rm -f *.whl && rm -rf /wheels
# Generate Prisma client
RUN prisma generate --schema=./litellm/proxy/schema.prisma
# Setup entrypoint
COPY . .
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp
RUN apk add --no-cache supervisor
COPY docker/supervisord.conf /etc/supervisord.conf
ENTRYPOINT ["docker/prod_entrypoint.sh"]
CMD ["--port", "4000"]
Production Deployment
With Redis Cache
Add Redis to your docker-compose.yml:
services:
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 3s
retries: 5
volumes:
redis_data:
driver: local
Update config.yaml:
litellm_settings:
cache: true
cache_params:
type: redis
host: redis
port: 6379
router_settings:
redis_host: redis
redis_port: 6379
Environment-Specific Configs
# docker-compose.prod.yml
services:
litellm:
image: ghcr.io/berriai/litellm:main-stable
restart: always
deploy:
replicas: 3
resources:
limits:
cpus: '2'
memory: 4G
reservations:
cpus: '1'
memory: 2G
Kubernetes Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-proxy
spec:
replicas: 3
selector:
matchLabels:
app: litellm-proxy
template:
metadata:
labels:
app: litellm-proxy
spec:
containers:
- name: litellm
image: ghcr.io/berriai/litellm:main-stable
ports:
- containerPort: 4000
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: litellm-secrets
key: database-url
- name: LITELLM_MASTER_KEY
valueFrom:
secretKeyRef:
name: litellm-secrets
key: master-key
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /health/liveliness
port: 4000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health/readiness
port: 4000
initialDelaySeconds: 20
periodSeconds: 5
volumes:
- name: config
configMap:
name: litellm-config
---
apiVersion: v1
kind: Service
metadata:
name: litellm-proxy
spec:
selector:
app: litellm-proxy
ports:
- protocol: TCP
port: 4000
targetPort: 4000
type: LoadBalancer
Monitoring & Logs
View Logs
# All services
docker-compose logs -f
# Specific service
docker-compose logs -f litellm
# Last 100 lines
docker-compose logs --tail=100 litellm
Access Prometheus
Open http://localhost:9090 to view Prometheus metrics.
Useful queries:
litellm_requests_total - Total requests
litellm_request_duration_seconds - Request latency
litellm_spend_total - Total spend
Health Checks
# Liveness (is container running)
curl http://localhost:4000/health/liveliness
# Readiness (can accept traffic)
curl http://localhost:4000/health/readiness
# Full health check
curl http://localhost:4000/health
Maintenance
Update to Latest Version
docker-compose pull
docker-compose up -d
Backup Database
docker exec litellm_db pg_dump -U llmproxy litellm > backup.sql
Restore Database
cat backup.sql | docker exec -i litellm_db psql -U llmproxy litellm
Scale Services
# Scale to 3 replicas
docker-compose up -d --scale litellm=3
Troubleshooting
Container Won’t Start
Check logs:
docker-compose logs litellm
Common issues:
- Database not ready: Wait for DB health check
- Port conflict: Change port mapping
- Invalid config: Validate YAML syntax
High Memory Usage
Increase memory limits:
services:
litellm:
deploy:
resources:
limits:
memory: 4G
Database Connection Issues
Verify DATABASE_URL:
docker exec litellm env | grep DATABASE_URL
Test connection:
docker exec litellm_db psql -U llmproxy -d litellm -c "SELECT 1;"