Overview
LiteLLM’s prompt management system allows you to store, version, and dynamically inject prompts from external prompt management tools. This enables:- Centralized prompt storage and versioning
- A/B testing different prompt versions
- Dynamic prompt updates without code changes
- Team collaboration on prompt engineering
- Integration with prompt management platforms
Supported Platforms
- Langfuse: Full-featured prompt management with versioning
- Custom: Build your own prompt management integration
Quick Start
Configure Prompt Management
Set up your prompt management integration:
import litellm
from litellm.integrations.langfuse import LangfusePromptManagement
# Initialize Langfuse prompt management
litellm.prompt_management = LangfusePromptManagement(
langfuse_public_key="pk_...",
langfuse_secret_key="sk_...",
langfuse_host="https://cloud.langfuse.com"
)
Use Prompts in Completions
Reference prompts by ID:
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "What is AI?"}],
prompt_id="my-prompt-template",
prompt_variables={
"topic": "artificial intelligence",
"detail_level": "beginner"
}
)
print(response.choices[0].message.content)
Version Your Prompts
Specify prompt versions or labels:
# Use specific version
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Query"}],
prompt_id="my-prompt",
prompt_version=3
)
# Use labeled version
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Query"}],
prompt_id="my-prompt",
prompt_label="production"
)
Creating Custom Prompt Management
Implement the Base Class
from litellm.integrations.prompt_management_base import PromptManagementBase
from litellm.types.prompts.init_prompts import PromptSpec
from typing import Optional, List, Dict, Tuple
class CustomPromptManagement(PromptManagementBase):
@property
def integration_name(self) -> str:
"""Unique identifier for your integration"""
return "my-prompt-manager"
def should_run_prompt_management(
self,
prompt_id: Optional[str],
prompt_spec: Optional[PromptSpec],
dynamic_callback_params,
) -> bool:
"""
Determine if prompt management should be activated.
Returns:
True if prompt_id is provided or prompt_spec exists
"""
return prompt_id is not None or prompt_spec is not None
def _compile_prompt_helper(
self,
prompt_id: Optional[str],
prompt_spec: Optional[PromptSpec],
prompt_variables: Optional[dict],
dynamic_callback_params,
prompt_label: Optional[str] = None,
prompt_version: Optional[int] = None,
) -> dict:
"""
Fetch and compile prompt from your backend.
Returns:
Dict with keys:
- prompt_id: str
- prompt_template: List[Message]
- prompt_template_model: Optional[str]
- prompt_template_optional_params: Optional[Dict]
- completed_messages: Optional[List[Message]]
"""
# Fetch prompt from your backend
prompt_data = self._fetch_prompt(
prompt_id=prompt_id,
version=prompt_version,
label=prompt_label
)
# Replace variables
compiled_messages = self._replace_variables(
template=prompt_data["messages"],
variables=prompt_variables or {}
)
return {
"prompt_id": prompt_id,
"prompt_template": compiled_messages,
"prompt_template_model": prompt_data.get("model"),
"prompt_template_optional_params": prompt_data.get("params"),
"completed_messages": None # Will be merged with client messages
}
async def async_compile_prompt_helper(
self,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params,
prompt_spec: Optional[PromptSpec] = None,
prompt_label: Optional[str] = None,
prompt_version: Optional[int] = None,
) -> dict:
"""
Async version of _compile_prompt_helper.
"""
# Async fetch from backend
prompt_data = await self._async_fetch_prompt(
prompt_id=prompt_id,
version=prompt_version,
label=prompt_label
)
compiled_messages = self._replace_variables(
template=prompt_data["messages"],
variables=prompt_variables or {}
)
return {
"prompt_id": prompt_id,
"prompt_template": compiled_messages,
"prompt_template_model": prompt_data.get("model"),
"prompt_template_optional_params": prompt_data.get("params"),
"completed_messages": None
}
def _fetch_prompt(self, prompt_id, version=None, label=None):
"""Sync fetch from your backend"""
# Implement your API call
import httpx
response = httpx.get(
f"https://your-api.com/prompts/{prompt_id}",
params={"version": version, "label": label}
)
return response.json()
async def _async_fetch_prompt(self, prompt_id, version=None, label=None):
"""Async fetch from your backend"""
import httpx
async with httpx.AsyncClient() as client:
response = await client.get(
f"https://your-api.com/prompts/{prompt_id}",
params={"version": version, "label": label}
)
return response.json()
def _replace_variables(self, template: List[dict], variables: dict) -> List[dict]:
"""Replace {variable} placeholders in template"""
compiled = []
for message in template:
content = message["content"]
# Replace variables
for key, value in variables.items():
content = content.replace(f"{{{key}}}", str(value))
compiled.append({
"role": message["role"],
"content": content
})
return compiled
# Register your prompt management
import litellm
litellm.prompt_management = CustomPromptManagement()
Using Your Custom Integration
# Use prompts from your system
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Additional context"}],
prompt_id="customer-support-template",
prompt_variables={
"customer_name": "John Doe",
"issue_type": "billing",
"priority": "high"
}
)
Advanced Features
Override Model from Prompt
# Prompt template can specify the model
response = await litellm.acompletion(
model="gpt-3.5-turbo", # Fallback model
messages=[{"role": "user", "content": "Hello"}],
prompt_id="special-model-prompt", # May override to gpt-4
ignore_prompt_manager_model=False # Allow override (default)
)
# Force use of specified model
response = await litellm.acompletion(
model="gpt-3.5-turbo", # Always use this
messages=[{"role": "user", "content": "Hello"}],
prompt_id="special-model-prompt",
ignore_prompt_manager_model=True # Ignore prompt's model
)
Override Parameters from Prompt
# Prompt template can include parameters (temperature, max_tokens, etc.)
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
prompt_id="creative-prompt", # May set temperature=0.9
temperature=0.5, # Overridden by prompt
ignore_prompt_manager_optional_params=False # Allow override
)
# Keep your parameters
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
prompt_id="creative-prompt",
temperature=0.5, # Use this instead
ignore_prompt_manager_optional_params=True # Ignore prompt's params
)
Message Merging
Prompt templates are prepended to your messages:# Prompt template:
# [
# {"role": "system", "content": "You are a helpful assistant."},
# {"role": "user", "content": "Context: {context}"}
# ]
response = await litellm.acompletion(
model="gpt-4",
messages=[
{"role": "user", "content": "What is the answer?"}
],
prompt_id="template-with-context",
prompt_variables={"context": "Paris is the capital of France."}
)
# Final messages sent to model:
# [
# {"role": "system", "content": "You are a helpful assistant."},
# {"role": "user", "content": "Context: Paris is the capital of France."},
# {"role": "user", "content": "What is the answer?"}
# ]
Langfuse Integration
Setup Langfuse Prompt Management
Setup Langfuse Prompt Management
import litellm
from litellm.integrations.langfuse import LangfusePromptManagement
# Initialize
litellm.prompt_management = LangfusePromptManagement(
langfuse_public_key="pk_lf_...",
langfuse_secret_key="sk_lf_...",
langfuse_host="https://cloud.langfuse.com" # or your self-hosted URL
)
# Use with versioning
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Query"}],
prompt_id="my-prompt",
prompt_version=2, # Specific version
prompt_variables={"var1": "value1"}
)
# Use with labels
response = await litellm.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Query"}],
prompt_id="my-prompt",
prompt_label="production", # Use labeled version
prompt_variables={"var1": "value1"}
)
Using with LiteLLM Proxy
Configure prompt management in your proxy config:config.yaml
prompt_management:
provider: langfuse
langfuse_public_key: pk_lf_...
langfuse_secret_key: sk_lf_...
langfuse_host: https://cloud.langfuse.com
import openai
client = openai.OpenAI(
api_key="proxy-key",
base_url="http://localhost:4000"
)
# Prompt management via extra_body
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
extra_body={
"prompt_id": "customer-support",
"prompt_variables": {
"customer_name": "Jane",
"issue": "login problem"
}
}
)
Best Practices
Version Control Your Prompts
Version Control Your Prompts
Always use versioning for production prompts:
# Good: Explicit version
response = await litellm.acompletion(
model="gpt-4",
messages=messages,
prompt_id="prod-prompt",
prompt_version=5 # Locked version
)
# Or use labels
response = await litellm.acompletion(
model="gpt-4",
messages=messages,
prompt_id="prod-prompt",
prompt_label="stable" # Points to tested version
)
# Risky: Always latest
response = await litellm.acompletion(
model="gpt-4",
messages=messages,
prompt_id="prod-prompt" # Gets latest, may change unexpectedly
)
Validate Prompt Variables
Validate Prompt Variables
Check that all required variables are provided:
def validate_variables(prompt_id: str, variables: dict) -> bool:
required_vars = get_required_variables(prompt_id)
return all(var in variables for var in required_vars)
if validate_variables("my-prompt", prompt_variables):
response = await litellm.acompletion(
model="gpt-4",
messages=messages,
prompt_id="my-prompt",
prompt_variables=prompt_variables
)
else:
raise ValueError("Missing required prompt variables")
A/B Test Prompts
A/B Test Prompts
Test different prompt versions:
import random
# Random A/B test
prompt_version = random.choice([1, 2])
response = await litellm.acompletion(
model="gpt-4",
messages=messages,
prompt_id="experiment-prompt",
prompt_version=prompt_version,
metadata={"experiment": f"version_{prompt_version}"} # Track in logs
)
Cache Compiled Prompts
Cache Compiled Prompts
Reduce API calls by caching:
from functools import lru_cache
@lru_cache(maxsize=100)
def get_cached_prompt(prompt_id: str, version: int):
return litellm.prompt_management._compile_prompt_helper(
prompt_id=prompt_id,
prompt_version=version,
prompt_variables={},
dynamic_callback_params={}
)
# Use cached version for static prompts
prompt = get_cached_prompt("static-prompt", 3)
Reference
Source Code
- Base class:
litellm/integrations/prompt_management_base.py:22 - Custom prompt management:
litellm/proxy/custom_prompt_management.py:10 - Langfuse integration:
litellm/integrations/langfuse/langfuse_prompt_management.py
Response Format
Prompt management returns:{
"prompt_id": str,
"prompt_template": List[AllMessageValues],
"prompt_template_model": Optional[str],
"prompt_template_optional_params": Optional[Dict[str, Any]],
"completed_messages": Optional[List[AllMessageValues]]
}