Skip to main content
The official OpenAI Python SDK works seamlessly with Codex-LB. Simply point it at Codex-LB’s /v1 endpoint to leverage account pooling and usage tracking.

Endpoint

http://127.0.0.1:2455/v1
The OpenAI SDK uses the standard OpenAI-compatible /v1 endpoint.

Installation

Install the OpenAI Python SDK:
pip install openai

Basic Usage

When API key authentication is disabled (default):
from openai import OpenAI

client = OpenAI(
    base_url="http://127.0.0.1:2455/v1",
    api_key="dummy",  # any string works when auth is disabled
)

response = client.chat.completions.create(
    model="gpt-5.3-codex",
    messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
The OpenAI SDK requires an api_key parameter. When auth is disabled, any string value works.

Complete Example

Here’s a complete example with error handling:
import os
from openai import OpenAI, OpenAIError

# Initialize client
client = OpenAI(
    base_url="http://127.0.0.1:2455/v1",
    api_key=os.environ.get("CODEX_LB_API_KEY", "dummy"),
)

def chat(prompt: str, model: str = "gpt-5.3-codex") -> str:
    """Send a chat completion request to Codex-LB."""
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=1000,
        )
        return response.choices[0].message.content
    
    except OpenAIError as e:
        print(f"Error: {e}")
        return None

if __name__ == "__main__":
    result = chat("Write a hello world function in Python")
    if result:
        print(result)

Streaming Responses

Stream responses for real-time output:
from openai import OpenAI
import os

client = OpenAI(
    base_url="http://127.0.0.1:2455/v1",
    api_key=os.environ.get("CODEX_LB_API_KEY", "dummy"),
)

stream = client.chat.completions.create(
    model="gpt-5.3-codex",
    messages=[{"role": "user", "content": "Write a story about AI"}],
    stream=True,
)

for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

print()  # newline at end

Advanced Features

Async Client

Use the async client for concurrent requests:
import asyncio
import os
from openai import AsyncOpenAI

client = AsyncOpenAI(
    base_url="http://127.0.0.1:2455/v1",
    api_key=os.environ.get("CODEX_LB_API_KEY", "dummy"),
)

async def chat_async(prompt: str) -> str:
    response = await client.chat.completions.create(
        model="gpt-5.3-codex",
        messages=[{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content

async def main():
    tasks = [
        chat_async("What is Python?"),
        chat_async("What is JavaScript?"),
        chat_async("What is Rust?"),
    ]
    results = await asyncio.gather(*tasks)
    for result in results:
        print(result)
        print("-" * 40)

if __name__ == "__main__":
    asyncio.run(main())

Function Calling

Use function calling (tools) for structured outputs:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City name, e.g. San Francisco"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"]
                    }
                },
                "required": ["location"]
            }
        }
    }
]

response = client.chat.completions.create(
    model="gpt-5.3-codex",
    messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
    tools=tools,
    tool_choice="auto",
)

if response.choices[0].message.tool_calls:
    tool_call = response.choices[0].message.tool_calls[0]
    print(f"Function: {tool_call.function.name}")
    print(f"Arguments: {tool_call.function.arguments}")

Vision (Image Input)

Send images to vision-capable models:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What's in this image?"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://example.com/image.jpg"
                    }
                }
            ]
        }
    ]
)
print(response.choices[0].message.content)

Configuration Options

ParameterDescriptionDefault
base_urlCodex-LB endpointRequired
api_keyAPI key or any stringRequired
timeoutRequest timeout in seconds600
max_retriesMax retry attempts2
default_headersCustom headers for all requests{}
Example with custom config:
client = OpenAI(
    base_url="http://127.0.0.1:2455/v1",
    api_key=os.environ.get("CODEX_LB_API_KEY", "dummy"),
    timeout=120.0,
    max_retries=3,
    default_headers={"X-Custom-Header": "value"},
)

Verify Configuration

Test your connection:
from openai import OpenAI
import os

client = OpenAI(
    base_url="http://127.0.0.1:2455/v1",
    api_key=os.environ.get("CODEX_LB_API_KEY", "dummy"),
)

# List available models
models = client.models.list()
print("Available models:")
for model in models.data:
    print(f"  - {model.id}")

# Test a simple completion
response = client.chat.completions.create(
    model="gpt-5.3-codex",
    messages=[{"role": "user", "content": "Hello!"}],
)
print(f"\nResponse: {response.choices[0].message.content}")

Troubleshooting

Ensure Codex-LB is running:
curl http://127.0.0.1:2455/v1/models
If using Docker:
docker ps | grep codex-lb
docker logs codex-lb
API key auth is enabled but your key is missing or invalid:
# Check if the environment variable is set
import os
print(os.environ.get("CODEX_LB_API_KEY"))
  1. Verify the key is valid in the dashboard
  2. Ensure you’re reading from the correct environment variable
  3. Check the key hasn’t expired
The requested model isn’t available:
# List available models
models = client.models.list()
for model in models.data:
    print(model.id)
  1. Verify the model ID matches exactly (case-sensitive)
  2. Ensure at least one account supports the model
  3. Check model sync in dashboard settings
Increase the timeout for long-running requests:
client = OpenAI(
    base_url="http://127.0.0.1:2455/v1",
    api_key=os.environ.get("CODEX_LB_API_KEY", "dummy"),
    timeout=300.0,  # 5 minutes
)
You’re hitting rate limits:
  1. Check your API key limits in the dashboard
  2. Implement exponential backoff:
    from openai import OpenAI
    import time
    
    def chat_with_retry(prompt, max_retries=3):
        for attempt in range(max_retries):
            try:
                return client.chat.completions.create(
                    model="gpt-5.3-codex",
                    messages=[{"role": "user", "content": prompt}],
                )
            except Exception as e:
                if "rate_limit" in str(e).lower():
                    wait = 2 ** attempt
                    print(f"Rate limited, waiting {wait}s...")
                    time.sleep(wait)
                else:
                    raise
    
  3. Add more accounts to the pool

Remote Access

If Codex-LB is running on a different machine:
client = OpenAI(
    base_url="https://your-server.com/v1",
    api_key=os.environ.get("CODEX_LB_API_KEY"),
)
When exposing Codex-LB remotely:
  • Always enable API key authentication
  • Use HTTPS with a reverse proxy (nginx, Caddy)
  • Configure firewall rules to restrict access
  • See Production Deployment

Next Steps

API Keys

Create and manage API keys for authentication

Rate Limiting

Configure rate limits per key or account

Chat Completions API

Full API reference for /v1/chat/completions

Usage Tracking

Monitor SDK usage in the dashboard

Build docs developers (and LLMs) love