Skip to main content
Patterns for using OpenAI, Anthropic, and other LLM providers with hypergraph.

Anthropic Claude

Setup

1

Install the Anthropic SDK

pip install anthropic
2

Initialize the client

import os
from anthropic import Anthropic

client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))

Basic Message

from hypergraph import node

@node(output_name="response")
def generate(prompt: str, system: str = "") -> str:
    """Generate a response using Claude Sonnet 4.5."""

    message = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        system=system,
        messages=[{"role": "user", "content": prompt}],
    )

    return message.content[0].text

Streaming

@node(output_name="response")
def stream_claude(prompt: str, system: str = "") -> str:
    """Stream response from Claude."""

    chunks = []

    with client.messages.stream(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        system=system,
        messages=[{"role": "user", "content": prompt}],
    ) as stream:
        for text in stream.text_stream:
            print(text, end="", flush=True)
            chunks.append(text)

    print()
    return "".join(chunks)
Streaming provides better user experience for long responses. Buffer chunks and return the full text as the node output.

Multi-Turn Conversation

@node(output_name="response")
def chat(messages: list, system: str = "") -> str:
    """Multi-turn chat with Claude."""

    message = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        system=system,
        messages=messages,  # List of {"role": "user/assistant", "content": "..."}
    )

    return message.content[0].text

Model Options

ModelUse Case
claude-opus-4-5-20251101Complex reasoning, analysis, coding
claude-sonnet-4-5-20250929Balanced performance and cost
claude-haiku-4-5Fast, cost-efficient for simple tasks

OpenAI GPT

Setup

1

Install the OpenAI SDK

pip install openai
2

Initialize the client

import os
from openai import OpenAI

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

Basic Response (Responses API)

@node(output_name="response")
def generate(prompt: str, instructions: str = "") -> str:
    """Generate a response using GPT-5.2."""

    response = client.responses.create(
        model="gpt-5.2",
        input=prompt,
        instructions=instructions,
    )

    return response.output_text
The examples use the Responses API format. Adjust for the actual OpenAI API you’re using (Chat Completions, Assistants, etc.).

Streaming

@node(output_name="response")
def stream_gpt(prompt: str, instructions: str = "") -> str:
    """Stream response from GPT-5.2."""

    chunks = []

    stream = client.responses.create(
        model="gpt-5.2",
        input=prompt,
        instructions=instructions,
        stream=True,
    )

    for part in stream:
        if part.output_text:
            print(part.output_text, end="", flush=True)
            chunks.append(part.output_text)

    print()
    return "".join(chunks)

Multi-Turn with State

The Responses API supports stateful conversations:
@node(output_name=("response", "response_id"))
def chat_turn(prompt: str, previous_response_id: str | None = None) -> tuple[str, str]:
    """Single turn in a stateful conversation."""

    response = client.responses.create(
        model="gpt-5.2",
        input=prompt,
        previous_response_id=previous_response_id,
        store=True,  # Enable state storage
    )

    return response.output_text, response.id

With Tools

@node(output_name="response")
def generate_with_tools(prompt: str) -> str:
    """Use GPT-5.2 with built-in tools."""

    response = client.responses.create(
        model="gpt-5.2",
        input=prompt,
        tools=[
            {"type": "web_search"},
            {"type": "code_interpreter", "container": {"type": "auto"}},
        ],
    )

    return response.output_text

Model Options

ModelUse Case
gpt-5.2Latest, best for coding and agentic tasks
gpt-5-miniFaster, cost-efficient
o3Reasoning model for complex problems

RAG Pattern

Combine retrieval with LLM generation:
from hypergraph import Graph, node, AsyncRunner

@node(output_name="embedding")
async def embed(query: str) -> list[float]:
    """Embed query for retrieval."""
    response = client.embeddings.create(
        model="text-embedding-3-large",
        input=query,
    )
    return response.data[0].embedding

@node(output_name="docs")
async def retrieve(embedding: list[float]) -> list[str]:
    """Search vector database."""
    results = await vector_db.search(embedding, k=5)
    return [doc["content"] for doc in results]

@node(output_name="answer")
def generate(docs: list[str], query: str) -> str:
    """Generate answer using retrieved context."""

    context = "\n\n---\n\n".join(docs)

    message = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=2048,
        system=f"Answer based on this context:\n\n{context}",
        messages=[{"role": "user", "content": query}],
    )

    return message.content[0].text


rag_pipeline = Graph([embed, retrieve, generate])
RAG pipelines benefit from hypergraph’s automatic parallelization — embed and retrieve run as soon as their inputs are available.

Structured Outputs

With Anthropic

from pydantic import BaseModel

class Analysis(BaseModel):
    sentiment: str
    confidence: float
    topics: list[str]

@node(output_name="analysis")
def analyze(text: str) -> Analysis:
    """Extract structured data from text."""

    message = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        messages=[{
            "role": "user",
            "content": f"Analyze this text and return JSON:\n\n{text}",
        }],
    )

    # Parse the JSON response
    import json
    data = json.loads(message.content[0].text)
    return Analysis(**data)

With OpenAI

@node(output_name="analysis")
def analyze(text: str) -> dict:
    """Extract structured data using GPT-5.2."""

    response = client.responses.create(
        model="gpt-5.2",
        input=f"Analyze this text:\n\n{text}",
        text={"format": {"type": "json_object"}},
    )

    import json
    return json.loads(response.output_text)
Use Pydantic models for validation and type safety. They integrate seamlessly with hypergraph’s type checking.

Error Handling

from anthropic import APIError, RateLimitError

@node(output_name="response")
def safe_generate(prompt: str, max_retries: int = 3) -> str:
    """Generate with retry logic."""

    for attempt in range(max_retries):
        try:
            message = client.messages.create(
                model="claude-sonnet-4-5-20250929",
                max_tokens=1024,
                messages=[{"role": "user", "content": prompt}],
            )
            return message.content[0].text

        except RateLimitError:
            if attempt < max_retries - 1:
                import time
                time.sleep(2 ** attempt)  # Exponential backoff
                continue
            raise

        except APIError as e:
            raise RuntimeError(f"API error: {e}")
Always implement retry logic with exponential backoff for production LLM integrations. Rate limits and transient errors are common.

Dependency Injection with .bind()

Best practice: Use .bind() to provide shared LLM clients at the graph level instead of global variables or function defaults.
from anthropic import Anthropic

# Define nodes with client as a parameter
@node(output_name="embedding")
def embed(query: str, client: Anthropic) -> list[float]:
    """Embed query using the provided client."""
    # Use client for API calls
    return [0.1, 0.2, 0.3]  # Simplified

@node(output_name="answer")
def generate(docs: list[str], query: str, client: Anthropic) -> str:
    """Generate answer using the provided client."""
    context = "\n\n---\n\n".join(docs)

    message = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=2048,
        system=f"Answer based on this context:\n\n{context}",
        messages=[{"role": "user", "content": query}],
    )

    return message.content[0].text

# Create client once and bind it to the graph
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
rag_pipeline = Graph([embed, retrieve, generate]).bind(client=client)

Why use .bind() instead of function defaults?

1

Shared state

Bound values are intentionally shared across runs (no deep-copy)
2

Non-copyable objects

Many clients use thread locks internally and can’t be deep-copied
3

Testability

Easy to swap in mock clients for testing
4

Lifecycle control

You manage when the client is created and destroyed
@node(output_name="answer")
def generate(query: str, client: Anthropic = Anthropic()) -> str:
    # This might raise: GraphConfigError: cannot deep-copy default value
    ...

Testing LLM Nodes

With .bind(), testing is straightforward:
from unittest.mock import MagicMock

def test_generate():
    # Create mock client
    mock_client = MagicMock()
    mock_response = MagicMock()
    mock_response.content = [MagicMock(text="Test response")]
    mock_client.messages.create.return_value = mock_response

    # Test the pure function
    result = generate.func(
        docs=["doc1", "doc2"],
        query="test query",
        client=mock_client,
    )
    assert result == "Test response"

    # Or test the graph with bound mock client
    graph = Graph([generate]).bind(client=mock_client)
    runner = SyncRunner()
    result = runner.run(graph, {"docs": ["doc1"], "query": "test"})
    assert result["answer"] == "Test response"
Mock clients at the boundary. Test your business logic without hitting real LLM APIs.

See Also

Multi-Turn RAG

Conversational RAG example

Test Without Framework

Test nodes with .func()

Build docs developers (and LLMs) love