Skip to main content
Retrieves a list of all models available through the LLM Gateway, including their capabilities, supported providers, and pricing information.

Endpoint

GET https://api.llmgateway.io/v1/models

Authentication

Requires authentication using Bearer token or x-api-key header. See Authentication.

Query Parameters

include_deactivated
boolean
default:false
Include models that have been deactivated.Example: ?include_deactivated=true
exclude_deprecated
boolean
default:false
Exclude models that have been deprecated.Example: ?exclude_deprecated=true

Response

data
array
Array of model objects.Each model contains:

Examples

List All Models

curl https://api.llmgateway.io/v1/models \
  -H "Authorization: Bearer $LLMGATEWAY_API_KEY"

Filter Models by Capability

import os
from openai import OpenAI

client = OpenAI(
    api_key=os.environ.get("LLMGATEWAY_API_KEY"),
    base_url="https://api.llmgateway.io/v1"
)

models = client.models.list()

# Find models with vision support
vision_models = [
    model for model in models.data
    if 'image' in model.architecture['input_modalities']
]

print("Models with vision support:")
for model in vision_models:
    print(f"  - {model.id}")

# Find free models
free_models = [model for model in models.data if model.free]

print("\nFree models:")
for model in free_models:
    print(f"  - {model.id}")

# Find models with reasoning support
reasoning_models = [
    model for model in models.data
    if any(p['reasoning'] for p in model.providers)
]

print("\nModels with reasoning:")
for model in reasoning_models:
    print(f"  - {model.id}")

Compare Pricing

import os
from openai import OpenAI

client = OpenAI(
    api_key=os.environ.get("LLMGATEWAY_API_KEY"),
    base_url="https://api.llmgateway.io/v1"
)

models = client.models.list()

# Sort by total pricing (input + output)
models_by_price = sorted(
    models.data,
    key=lambda m: float(m.pricing['prompt']) + float(m.pricing['completion'])
)

print("Top 10 cheapest models:")
for model in models_by_price[:10]:
    total_price = float(model.pricing['prompt']) + float(model.pricing['completion'])
    print(f"{model.id}: ${total_price:.8f} per token")

Check Model Capabilities

import os
from openai import OpenAI

client = OpenAI(
    api_key=os.environ.get("LLMGATEWAY_API_KEY"),
    base_url="https://api.llmgateway.io/v1"
)

models = client.models.list()

# Find a specific model
model = next((m for m in models.data if m.id == 'gpt-4o'), None)

if model:
    print(f"Model: {model.id}")
    print(f"Context length: {model.context_length:,} tokens")
    print(f"Streaming: {any(p['streaming'] for p in model.providers)}")
    print(f"Vision: {any(p['vision'] for p in model.providers)}")
    print(f"Tools: {any(p['tools'] for p in model.providers)}")
    print(f"JSON output: {model.json_output}")
    print(f"Structured outputs: {model.structured_outputs}")
    print(f"\nSupported parameters:")
    for param in model.supported_parameters:
        print(f"  - {param}")
    print(f"\nProviders:")
    for provider in model.providers:
        print(f"  - {provider['providerId']}: {provider['modelName']}")

Response Example

{
  "data": [
    {
      "id": "gpt-4o",
      "name": "gpt-4o",
      "aliases": ["gpt-4o-latest"],
      "created": 1677858242,
      "description": "gpt-4o provided by openai, azure",
      "family": "openai",
      "architecture": {
        "input_modalities": ["text", "image"],
        "output_modalities": ["text"],
        "tokenizer": "GPT"
      },
      "top_provider": {
        "is_moderated": true
      },
      "providers": [
        {
          "providerId": "openai",
          "modelName": "gpt-4o-2024-08-06",
          "pricing": {
            "prompt": "0.0000025",
            "completion": "0.00001",
            "image": "0"
          },
          "streaming": true,
          "vision": true,
          "cancellation": true,
          "tools": true,
          "parallelToolCalls": true,
          "reasoning": false,
          "stability": "stable"
        }
      ],
      "pricing": {
        "prompt": "0.0000025",
        "completion": "0.00001",
        "image": "0",
        "request": "0",
        "input_cache_read": "0",
        "input_cache_write": "0",
        "web_search": "0",
        "internal_reasoning": "0"
      },
      "context_length": 128000,
      "supported_parameters": [
        "temperature",
        "max_tokens",
        "top_p",
        "frequency_penalty",
        "presence_penalty",
        "response_format",
        "tools",
        "tool_choice"
      ],
      "json_output": true,
      "structured_outputs": true,
      "free": false,
      "stability": "stable"
    }
  ]
}

Notes

  • Models with all providers deactivated are excluded by default
  • Use include_deactivated=true to see all models including deactivated ones
  • Use exclude_deprecated=true to hide deprecated models
  • Pricing is in USD per token (typically shown as cost per million tokens)
  • Context length represents the maximum total tokens (input + output)
  • Some models may be available through multiple providers with different pricing
  • The stability field indicates the maturity level of the model

Build docs developers (and LLMs) love