Skip to main content

Overview

The LLM API provides a unified interface for interacting with various language models. Strix uses LiteLLM under the hood to support multiple providers.

LLM Class

Main class for LLM interactions.

Constructor

from strix.llm import LLM, LLMConfig

llm = LLM(
    config: LLMConfig,
    agent_name: str | None = None
)
config
LLMConfig
required
LLM configuration object
agent_name
str | None
Name of the agent using this LLM
Example:
from strix.llm import LLM, LLMConfig

config = LLMConfig(
    model_name="claude-3-5-sonnet-20241022",
    scan_mode="standard"
)

llm = LLM(config, agent_name="SecurityScanner")

Properties

config
LLMConfig
LLM configuration
agent_name
str | None
Associated agent name
agent_id
str | None
Associated agent ID
system_prompt
str
Loaded system prompt

Methods

generate

async def generate(
    conversation_history: list[dict[str, Any]]
) -> AsyncIterator[LLMResponse]
Generates a streaming response from the LLM.
conversation_history
list[dict[str, Any]]
required
List of message dictionaries with “role” and “content” keys
return
AsyncIterator[LLMResponse]
Async iterator yielding LLMResponse objects
Example:
messages = [
    {"role": "user", "content": "Analyze this HTTP response for security issues"}
]

async for response in llm.generate(messages):
    print(response.content, end="", flush=True)
    
    # Check for tool invocations
    if response.tool_invocations:
        print(f"\nTools to execute: {response.tool_invocations}")

set_agent_identity

def set_agent_identity(
    agent_name: str | None,
    agent_id: str | None
) -> None
Sets the agent identity for telemetry.
agent_name
str | None
Agent name
agent_id
str | None
Agent ID

LLMConfig

Configuration for LLM behavior and model selection.

Constructor

from strix.llm import LLMConfig

config = LLMConfig(
    model_name: str | None = None,
    enable_prompt_caching: bool = True,
    skills: list[str] | None = None,
    timeout: int | None = None,
    scan_mode: str = "deep"
)
model_name
str | None
Model identifier (defaults to STRIX_LLM environment variable)
enable_prompt_caching
bool
default:"True"
Enable prompt caching for supported providers (Anthropic)
skills
list[str] | None
List of skill names to load for this configuration
timeout
int | None
Request timeout in seconds (defaults to 300)
scan_mode
str
default:"'deep'"
Scan mode: “quick”, “standard”, or “deep”
Example:
from strix.llm import LLMConfig

# Basic configuration
config = LLMConfig(
    model_name="claude-3-5-sonnet-20241022",
    scan_mode="standard"
)

# Advanced configuration
advanced_config = LLMConfig(
    model_name="gpt-4o",
    enable_prompt_caching=False,
    skills=["web_security", "api_testing"],
    timeout=600,
    scan_mode="deep"
)

Properties

model_name
str
Model name as configured
litellm_model
str
Model name formatted for LiteLLM
canonical_model
str
Canonical model name for cost calculation
api_key
str | None
API key from environment or config
api_base
str | None
API base URL
enable_prompt_caching
bool
Whether prompt caching is enabled
skills
list[str]
Loaded skills
timeout
int
Request timeout in seconds
scan_mode
str
Current scan mode

LLMResponse

Response from an LLM generation.
@dataclass
class LLMResponse:
    content: str
    tool_invocations: list[dict[str, Any]] | None = None
    thinking_blocks: list[dict[str, Any]] | None = None
content
str
Generated text content
tool_invocations
list[dict[str, Any]] | None
Parsed tool invocations from the response
thinking_blocks
list[dict[str, Any]] | None
Extended thinking blocks (for reasoning models like o1)
Example:
async for response in llm.generate(messages):
    if response.tool_invocations:
        for tool in response.tool_invocations:
            print(f"Tool: {tool['toolName']}")
            print(f"Args: {tool['args']}")
    
    if response.thinking_blocks:
        print("Model reasoning:")
        for block in response.thinking_blocks:
            print(block.get("thinking", ""))

Supported Models

Strix Models

Hosted models with prefix strix/:
config = LLMConfig(model_name="strix/claude-3-5-sonnet-20241022")

Anthropic

# Set environment variable
export STRIX_LLM=anthropic/claude-3-5-sonnet-20241022
export LLM_API_KEY=sk-ant-...

config = LLMConfig()  # Uses environment variables

OpenAI

export STRIX_LLM=gpt-4o
export LLM_API_KEY=sk-...

config = LLMConfig()

Custom Providers

export STRIX_LLM=custom-model
export LLM_API_BASE=https://api.example.com/v1
export LLM_API_KEY=your-key

config = LLMConfig()

Error Handling

LLMRequestFailedError

from strix.llm import LLMRequestFailedError

class LLMRequestFailedError(Exception):
    def __init__(
        message: str,
        details: str | None = None
    )
Raised when an LLM request fails.
message
str
required
Error message
details
str | None
Additional error details
Example:
from strix.llm import LLM, LLMConfig, LLMRequestFailedError

try:
    llm = LLM(LLMConfig(model_name="gpt-4o"))
    async for response in llm.generate(messages):
        print(response.content)
except LLMRequestFailedError as e:
    print(f"LLM request failed: {e.message}")
    if e.details:
        print(f"Details: {e.details}")

Scan Modes

Scan modes affect the reasoning effort and system prompts:
quick
Scan Mode
Fast scanning with medium reasoning effort. Best for quick assessments.
standard
Scan Mode
Balanced scanning with high reasoning effort. Recommended for most use cases.
deep
Scan Mode
Thorough scanning with high reasoning effort. Best for comprehensive security assessments.
Example:
# Quick scan
quick_config = LLMConfig(
    model_name="claude-3-5-sonnet-20241022",
    scan_mode="quick"
)

# Deep scan
deep_config = LLMConfig(
    model_name="claude-3-5-sonnet-20241022",
    scan_mode="deep"
)

Environment Variables

STRIX_LLM
str
required
Model name (e.g., “claude-3-5-sonnet-20241022”, “gpt-4o”)
LLM_API_KEY
str
required
API key for the provider
LLM_API_BASE
str
default:"None"
Custom API base URL
LLM_TIMEOUT
str
default:"'300'"
Request timeout in seconds
STRIX_REASONING_EFFORT
str
default:"'high'"
Reasoning effort: “low”, “medium”, or “high”
STRIX_LLM_MAX_RETRIES
str
default:"'5'"
Maximum retry attempts for failed requests

Full Example

import asyncio
from strix.llm import LLM, LLMConfig, LLMRequestFailedError

async def main():
    # Configure LLM
    config = LLMConfig(
        model_name="claude-3-5-sonnet-20241022",
        enable_prompt_caching=True,
        timeout=300,
        scan_mode="standard"
    )
    
    llm = LLM(config, agent_name="TestAgent")
    
    # Prepare messages
    messages = [
        {"role": "user", "content": "What are the OWASP Top 10?"}
    ]
    
    try:
        # Stream response
        full_content = ""
        async for response in llm.generate(messages):
            full_content = response.content
            print(response.content, end="", flush=True)
        
        print(f"\n\nFinal response length: {len(full_content)}")
        
    except LLMRequestFailedError as e:
        print(f"Error: {e.message}")

if __name__ == "__main__":
    asyncio.run(main())

Build docs developers (and LLMs) love