Skip to main content

Overview

LiteLLM’s prompt management system allows you to store, version, and dynamically inject prompts from external prompt management tools. This enables:
  • Centralized prompt storage and versioning
  • A/B testing different prompt versions
  • Dynamic prompt updates without code changes
  • Team collaboration on prompt engineering
  • Integration with prompt management platforms

Supported Platforms

  • Langfuse: Full-featured prompt management with versioning
  • Custom: Build your own prompt management integration

Quick Start

1

Configure Prompt Management

Set up your prompt management integration:
import litellm
from litellm.integrations.langfuse import LangfusePromptManagement

# Initialize Langfuse prompt management
litellm.prompt_management = LangfusePromptManagement(
    langfuse_public_key="pk_...",
    langfuse_secret_key="sk_...",
    langfuse_host="https://cloud.langfuse.com"
)
2

Use Prompts in Completions

Reference prompts by ID:
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "What is AI?"}],
    prompt_id="my-prompt-template",
    prompt_variables={
        "topic": "artificial intelligence",
        "detail_level": "beginner"
    }
)

print(response.choices[0].message.content)
3

Version Your Prompts

Specify prompt versions or labels:
# Use specific version
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_version=3
)

# Use labeled version
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_label="production"
)

Creating Custom Prompt Management

Implement the Base Class

from litellm.integrations.prompt_management_base import PromptManagementBase
from litellm.types.prompts.init_prompts import PromptSpec
from typing import Optional, List, Dict, Tuple

class CustomPromptManagement(PromptManagementBase):
    @property
    def integration_name(self) -> str:
        """Unique identifier for your integration"""
        return "my-prompt-manager"
    
    def should_run_prompt_management(
        self,
        prompt_id: Optional[str],
        prompt_spec: Optional[PromptSpec],
        dynamic_callback_params,
    ) -> bool:
        """
        Determine if prompt management should be activated.
        
        Returns:
            True if prompt_id is provided or prompt_spec exists
        """
        return prompt_id is not None or prompt_spec is not None
    
    def _compile_prompt_helper(
        self,
        prompt_id: Optional[str],
        prompt_spec: Optional[PromptSpec],
        prompt_variables: Optional[dict],
        dynamic_callback_params,
        prompt_label: Optional[str] = None,
        prompt_version: Optional[int] = None,
    ) -> dict:
        """
        Fetch and compile prompt from your backend.
        
        Returns:
            Dict with keys:
            - prompt_id: str
            - prompt_template: List[Message]
            - prompt_template_model: Optional[str]
            - prompt_template_optional_params: Optional[Dict]
            - completed_messages: Optional[List[Message]]
        """
        # Fetch prompt from your backend
        prompt_data = self._fetch_prompt(
            prompt_id=prompt_id,
            version=prompt_version,
            label=prompt_label
        )
        
        # Replace variables
        compiled_messages = self._replace_variables(
            template=prompt_data["messages"],
            variables=prompt_variables or {}
        )
        
        return {
            "prompt_id": prompt_id,
            "prompt_template": compiled_messages,
            "prompt_template_model": prompt_data.get("model"),
            "prompt_template_optional_params": prompt_data.get("params"),
            "completed_messages": None  # Will be merged with client messages
        }
    
    async def async_compile_prompt_helper(
        self,
        prompt_id: Optional[str],
        prompt_variables: Optional[dict],
        dynamic_callback_params,
        prompt_spec: Optional[PromptSpec] = None,
        prompt_label: Optional[str] = None,
        prompt_version: Optional[int] = None,
    ) -> dict:
        """
        Async version of _compile_prompt_helper.
        """
        # Async fetch from backend
        prompt_data = await self._async_fetch_prompt(
            prompt_id=prompt_id,
            version=prompt_version,
            label=prompt_label
        )
        
        compiled_messages = self._replace_variables(
            template=prompt_data["messages"],
            variables=prompt_variables or {}
        )
        
        return {
            "prompt_id": prompt_id,
            "prompt_template": compiled_messages,
            "prompt_template_model": prompt_data.get("model"),
            "prompt_template_optional_params": prompt_data.get("params"),
            "completed_messages": None
        }
    
    def _fetch_prompt(self, prompt_id, version=None, label=None):
        """Sync fetch from your backend"""
        # Implement your API call
        import httpx
        
        response = httpx.get(
            f"https://your-api.com/prompts/{prompt_id}",
            params={"version": version, "label": label}
        )
        
        return response.json()
    
    async def _async_fetch_prompt(self, prompt_id, version=None, label=None):
        """Async fetch from your backend"""
        import httpx
        
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"https://your-api.com/prompts/{prompt_id}",
                params={"version": version, "label": label}
            )
            
            return response.json()
    
    def _replace_variables(self, template: List[dict], variables: dict) -> List[dict]:
        """Replace {variable} placeholders in template"""
        compiled = []
        
        for message in template:
            content = message["content"]
            
            # Replace variables
            for key, value in variables.items():
                content = content.replace(f"{{{key}}}", str(value))
            
            compiled.append({
                "role": message["role"],
                "content": content
            })
        
        return compiled

# Register your prompt management
import litellm

litellm.prompt_management = CustomPromptManagement()

Using Your Custom Integration

# Use prompts from your system
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Additional context"}],
    prompt_id="customer-support-template",
    prompt_variables={
        "customer_name": "John Doe",
        "issue_type": "billing",
        "priority": "high"
    }
)

Advanced Features

Override Model from Prompt

# Prompt template can specify the model
response = await litellm.acompletion(
    model="gpt-3.5-turbo",  # Fallback model
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="special-model-prompt",  # May override to gpt-4
    ignore_prompt_manager_model=False  # Allow override (default)
)

# Force use of specified model
response = await litellm.acompletion(
    model="gpt-3.5-turbo",  # Always use this
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="special-model-prompt",
    ignore_prompt_manager_model=True  # Ignore prompt's model
)

Override Parameters from Prompt

# Prompt template can include parameters (temperature, max_tokens, etc.)
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="creative-prompt",  # May set temperature=0.9
    temperature=0.5,  # Overridden by prompt
    ignore_prompt_manager_optional_params=False  # Allow override
)

# Keep your parameters
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="creative-prompt",
    temperature=0.5,  # Use this instead
    ignore_prompt_manager_optional_params=True  # Ignore prompt's params
)

Message Merging

Prompt templates are prepended to your messages:
# Prompt template:
# [
#   {"role": "system", "content": "You are a helpful assistant."},
#   {"role": "user", "content": "Context: {context}"}
# ]

response = await litellm.acompletion(
    model="gpt-4",
    messages=[
        {"role": "user", "content": "What is the answer?"}
    ],
    prompt_id="template-with-context",
    prompt_variables={"context": "Paris is the capital of France."}
)

# Final messages sent to model:
# [
#   {"role": "system", "content": "You are a helpful assistant."},
#   {"role": "user", "content": "Context: Paris is the capital of France."},
#   {"role": "user", "content": "What is the answer?"}
# ]

Langfuse Integration

import litellm
from litellm.integrations.langfuse import LangfusePromptManagement

# Initialize
litellm.prompt_management = LangfusePromptManagement(
    langfuse_public_key="pk_lf_...",
    langfuse_secret_key="sk_lf_...",
    langfuse_host="https://cloud.langfuse.com"  # or your self-hosted URL
)

# Use with versioning
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_version=2,  # Specific version
    prompt_variables={"var1": "value1"}
)

# Use with labels
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_label="production",  # Use labeled version
    prompt_variables={"var1": "value1"}
)

Using with LiteLLM Proxy

Configure prompt management in your proxy config:
config.yaml
prompt_management:
  provider: langfuse
  langfuse_public_key: pk_lf_...
  langfuse_secret_key: sk_lf_...
  langfuse_host: https://cloud.langfuse.com
Then use via OpenAI SDK:
import openai

client = openai.OpenAI(
    api_key="proxy-key",
    base_url="http://localhost:4000"
)

# Prompt management via extra_body
response = client.chat.completions.create(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    extra_body={
        "prompt_id": "customer-support",
        "prompt_variables": {
            "customer_name": "Jane",
            "issue": "login problem"
        }
    }
)

Best Practices

Always use versioning for production prompts:
# Good: Explicit version
response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="prod-prompt",
    prompt_version=5  # Locked version
)

# Or use labels
response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="prod-prompt",
    prompt_label="stable"  # Points to tested version
)

# Risky: Always latest
response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="prod-prompt"  # Gets latest, may change unexpectedly
)
Check that all required variables are provided:
def validate_variables(prompt_id: str, variables: dict) -> bool:
    required_vars = get_required_variables(prompt_id)
    return all(var in variables for var in required_vars)

if validate_variables("my-prompt", prompt_variables):
    response = await litellm.acompletion(
        model="gpt-4",
        messages=messages,
        prompt_id="my-prompt",
        prompt_variables=prompt_variables
    )
else:
    raise ValueError("Missing required prompt variables")
Test different prompt versions:
import random

# Random A/B test
prompt_version = random.choice([1, 2])

response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="experiment-prompt",
    prompt_version=prompt_version,
    metadata={"experiment": f"version_{prompt_version}"}  # Track in logs
)
Reduce API calls by caching:
from functools import lru_cache

@lru_cache(maxsize=100)
def get_cached_prompt(prompt_id: str, version: int):
    return litellm.prompt_management._compile_prompt_helper(
        prompt_id=prompt_id,
        prompt_version=version,
        prompt_variables={},
        dynamic_callback_params={}
    )

# Use cached version for static prompts
prompt = get_cached_prompt("static-prompt", 3)

Reference

Source Code

  • Base class: litellm/integrations/prompt_management_base.py:22
  • Custom prompt management: litellm/proxy/custom_prompt_management.py:10
  • Langfuse integration: litellm/integrations/langfuse/langfuse_prompt_management.py

Response Format

Prompt management returns:
{
    "prompt_id": str,
    "prompt_template": List[AllMessageValues],
    "prompt_template_model": Optional[str],
    "prompt_template_optional_params": Optional[Dict[str, Any]],
    "completed_messages": Optional[List[AllMessageValues]]
}

Build docs developers (and LLMs) love