Testing LangGraph Agents

Testing ensures your agents behave correctly and reliably. This guide covers unit testing, integration testing, and evaluation strategies.

Testing Setup

GAIA uses pytest for testing:

# Run all tests
cd apps/api && uv run pytest

# Run specific test file
uv run pytest tests/test_agents.py

# Run with coverage
uv run pytest --cov=app/agents

# Run specific test
uv run pytest tests/test_agents.py::test_create_todo

Project Structure

apps/api/
├── app/
│   └── agents/
│       ├── core/
│       ├── tools/
│       └── prompts/
└── tests/
    ├── conftest.py
    ├── test_agents.py
    ├── test_tools.py
    └── test_prompts.py

Unit Testing Tools

Basic Tool Test

import pytest
from langchain_core.runnables.config import RunnableConfig
from app.agents.tools.weather_tool import get_weather

@pytest.mark.asyncio
async def test_get_weather():
    """Test weather tool returns valid data."""
    config = RunnableConfig(
        configurable={"user_id": "test-user-123"}
    )

    result = await get_weather(config, location="London,UK")

    assert isinstance(result, (dict, str))
    assert "London" in str(result)

Tool with Mocked Service

import pytest
from unittest.mock import AsyncMock, patch
from app.agents.tools.todo_tool import create_todo
from app.models.todo_models import Priority

@pytest.mark.asyncio
async def test_create_todo_success():
    """Test creating a todo with mocked service."""
    config = RunnableConfig(
        configurable={"user_id": "test-user"}
    )

    mock_todo = {
        "id": "todo-123",
        "title": "Test Task",
        "completed": False,
    }

    with patch(
        "app.agents.tools.todo_tool.create_todo_service",
        new=AsyncMock(return_value={"todo": mock_todo}),
    ):
        result = await create_todo(
            config,
            title="Test Task",
            description="Test description",
            priority="high",
        )

    assert result["success"] is True
    assert result["todo"]["title"] == "Test Task"

@pytest.mark.asyncio
async def test_create_todo_no_auth():
    """Test todo creation without authentication."""
    config = RunnableConfig(configurable={})

    result = await create_todo(config, title="Test Task")

    assert "error" in result
    assert "authentication" in result["error"].lower()

Integration Testing Agents

Test Agent Execution

import pytest
from datetime import datetime
from app.agents.core.agent import call_agent_silent
from app.models.message_models import MessageRequestWithHistory

@pytest.mark.asyncio
async def test_agent_creates_todo():
    """Test agent can create todo from natural language."""
    request = MessageRequestWithHistory(
        message="Remind me to buy groceries tomorrow at 5pm",
        messages=[],
        fileData=[],
        fileIds=[],
    )

    user = {
        "user_id": "test-user-123",
        "email": "[email protected]",
        "name": "Test User",
    }

    response, tool_data = await call_agent_silent(
        request=request,
        conversation_id="test-conv-123",
        user=user,
        user_time=datetime.now(),
    )

    # Verify response
    assert "groceries" in response.lower()
    assert "tomorrow" in response.lower()

    # Verify tool usage
    assert "create_todo" in tool_data

Test Streaming Agent

import pytest
from app.agents.core.agent import call_agent

@pytest.mark.asyncio
async def test_agent_streaming():
    """Test agent streams responses."""
    request = MessageRequestWithHistory(
        message="What's the weather like?",
        messages=[],
    )

    user = {"user_id": "test", "name": "Test"}

    generator = await call_agent(
        request=request,
        conversation_id="test-conv",
        user=user,
        user_time=datetime.now(),
    )

    chunks = []
    async for chunk in generator:
        chunks.append(chunk)

    # Verify streaming
    assert len(chunks) > 0
    assert any("data:" in chunk for chunk in chunks)

Testing Prompts

Test Prompt Formatting

import pytest
from app.agents.prompts.my_agent_prompts import build_my_agent_prompt

def test_prompt_includes_context():
    """Test prompt includes all dynamic context."""
    prompt = build_my_agent_prompt(
        current_datetime="2026-02-19T10:00:00",
        user_timezone="America/New_York",
        memories=["User prefers morning meetings"],
        user_name="Alice",
    )

    assert "2026-02-19" in prompt
    assert "America/New_York" in prompt
    assert "morning meetings" in prompt
    assert "Alice" in prompt

Test Prompt Behavior

@pytest.mark.asyncio
async def test_prompt_tone_matching():
    """Test agent matches user's communication style."""
    casual_request = MessageRequestWithHistory(
        message="hey can u help me out?",
        messages=[],
    )

    response, _ = await call_agent_silent(
        request=casual_request,
        conversation_id="test",
        user={"user_id": "test", "name": "Test"},
        user_time=datetime.now(),
    )

    # Verify casual tone
    assert not response.istitle()  # Not formal
    assert len(response.split()) < 50  # Concise

Testing Graph Nodes

Test Pre-Model Hook

import pytest
from app.agents.core.nodes.trim_messages_node import trim_messages_node
from app.agents.core.state import State
from langchain_core.messages import HumanMessage, AIMessage

@pytest.mark.asyncio
async def test_trim_messages_node():
    """Test message trimming keeps recent messages."""
    state = State(
        messages=[
            HumanMessage(content=f"Message {i}")
            for i in range(100)
        ]
    )

    config = RunnableConfig(configurable={})

    result = await trim_messages_node(state, config)

    # Verify trimming
    assert len(result["messages"]) < 100
    assert "Message 99" in result["messages"][-1].content

Test End-Graph Hook

@pytest.mark.asyncio
async def test_follow_up_actions_node():
    """Test follow-up actions are suggested."""
    from app.agents.core.nodes.follow_up_actions_node import (
        follow_up_actions_node
    )

    state = State(
        messages=[
            HumanMessage(content="I need to plan a trip"),
            AIMessage(content="I can help with that"),
        ]
    )

    config = RunnableConfig(configurable={})

    result = await follow_up_actions_node(state, config)

    # Verify suggestions present
    assert "follow_up_actions" in result

Fixtures

Create reusable test fixtures in conftest.py:

import pytest
from datetime import datetime
from langchain_core.runnables.config import RunnableConfig

@pytest.fixture
def test_user():
    """Provide a test user."""
    return {
        "user_id": "test-user-123",
        "email": "[email protected]",
        "name": "Test User",
    }

@pytest.fixture
def test_config(test_user):
    """Provide a test RunnableConfig."""
    return RunnableConfig(
        configurable={
            "user_id": test_user["user_id"],
            "user_name": test_user["name"],
            "user_time": datetime.now(),
        }
    )

@pytest.fixture
def mock_request():
    """Provide a mock message request."""
    from app.models.message_models import MessageRequestWithHistory

    return MessageRequestWithHistory(
        message="Test message",
        messages=[],
        fileData=[],
        fileIds=[],
    )

Mocking External Services

Mock Database Calls

import pytest
from unittest.mock import AsyncMock, patch

@pytest.mark.asyncio
async def test_tool_with_database():
    """Test tool with mocked database."""
    with patch(
        "app.services.todos.todo_service.get_all_todos_service",
        new=AsyncMock(return_value={
            "todos": [{"id": "1", "title": "Test"}]
        }),
    ):
        result = await my_tool_that_uses_db(config, param="test")

    assert result["success"] is True

Mock LLM Calls

from unittest.mock import MagicMock

@pytest.fixture
def mock_llm():
    """Mock LLM for testing."""
    llm = MagicMock()
    llm.ainvoke = AsyncMock(
        return_value=AIMessage(content="Mocked response")
    )
    return llm

@pytest.mark.asyncio
async def test_agent_with_mock_llm(mock_llm):
    """Test agent with mocked LLM."""
    with patch("app.agents.llm.client.init_llm", return_value=mock_llm):
        # Test agent logic
        pass

Performance Testing

import pytest
import time

@pytest.mark.asyncio
async def test_agent_response_time():
    """Test agent responds within acceptable time."""
    request = MessageRequestWithHistory(
        message="Quick question",
        messages=[],
    )

    start = time.time()
    response, _ = await call_agent_silent(
        request=request,
        conversation_id="test",
        user={"user_id": "test", "name": "Test"},
        user_time=datetime.now(),
    )
    duration = time.time() - start

    assert duration < 5.0  # Response within 5 seconds

Test Coverage

Run tests with coverage reporting:

# Generate coverage report
cd apps/api
uv run pytest --cov=app/agents --cov-report=html

# View report
open htmlcov/index.html

Testing Best Practices:

Test both success and failure paths
Mock external dependencies (databases, APIs)
Use fixtures for common test data
Test edge cases and invalid inputs
Verify tool calls and responses
Check error handling and logging
Run tests in CI/CD pipeline
Maintain >80% code coverage

Continuous Integration

Tests run automatically on PR:

# .github/workflows/test.yml
name: Test Suite

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3

      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.11'

      - name: Install dependencies
        run: |
          cd apps/api
          pip install uv
          uv sync

      - name: Run tests
        run: |
          cd apps/api
          uv run pytest --cov=app --cov-report=xml

      - name: Upload coverage
        uses: codecov/codecov-action@v3

Getting Started

Frontend Development

Backend Development

Agent Development

Contributing

Testing LangGraph Agents

Testing Setup

Project Structure

Unit Testing Tools

Basic Tool Test

Tool with Mocked Service

Integration Testing Agents

Test Agent Execution

Test Streaming Agent

Testing Prompts

Test Prompt Formatting

Test Prompt Behavior

Testing Graph Nodes

Test Pre-Model Hook

Test End-Graph Hook

Fixtures

Mocking External Services

Mock Database Calls

Mock LLM Calls

Performance Testing

Test Coverage

Continuous Integration

Next Steps

Contributing

Code Style

Build docs developers (and LLMs) love

Getting Started

Frontend Development

Backend Development

Agent Development

Contributing

​Testing Setup

​Project Structure

​Unit Testing Tools

​Basic Tool Test

​Tool with Mocked Service

​Integration Testing Agents

​Test Agent Execution

​Test Streaming Agent

​Testing Prompts

​Test Prompt Formatting

​Test Prompt Behavior

​Testing Graph Nodes

​Test Pre-Model Hook

​Test End-Graph Hook

​Fixtures

​Mocking External Services

​Mock Database Calls

​Mock LLM Calls

​Performance Testing

​Test Coverage

​Continuous Integration

​Next Steps

Contributing

Code Style

Build docs developers (and LLMs) love

Testing Setup

Project Structure

Unit Testing Tools

Basic Tool Test

Tool with Mocked Service

Integration Testing Agents

Test Agent Execution

Test Streaming Agent

Testing Prompts

Test Prompt Formatting

Test Prompt Behavior

Testing Graph Nodes

Test Pre-Model Hook

Test End-Graph Hook

Fixtures

Mocking External Services

Mock Database Calls

Mock LLM Calls

Performance Testing

Test Coverage

Continuous Integration

Next Steps