Testing
GAIA uses pytest for testing with async support and comprehensive fixtures.Testing Stack
- pytest: Testing framework
- pytest-asyncio: Async test support
- pytest-check: Soft assertions
- nest-asyncio: Nested event loop support
- pytest fixtures: Reusable test components
Test Organization
apps/api/tests/
├── __init__.py
├── conftest.py # Global fixtures
├── composio_tools/
│ ├── __init__.py
│ ├── conftest.py # Tool-specific fixtures
│ ├── config_utils.py # Configuration helpers
│ ├── test_gmail.py # Gmail tool tests
│ ├── test_calendar.py # Calendar tool tests
│ ├── test_google_docs.py # Google Docs tests
│ ├── test_google_sheets.py # Google Sheets tests
│ ├── test_linear.py # Linear integration tests
│ └── test_linkedin.py # LinkedIn tests
└── unit/
├── test_services.py # Service layer tests
├── test_models.py # Model validation tests
└── test_utils.py # Utility function tests
Running Tests
Basic Commands
# Run all tests
cd apps/api
uv run pytest
# Run specific test file
uv run pytest tests/composio_tools/test_gmail.py
# Run with verbose output
uv run pytest tests/composio_tools/test_gmail.py -v
# Run specific test
uv run pytest tests/composio_tools/test_gmail.py::TestGmailReadOperations::test_get_unread_count
# Run with output capture disabled (for interactive tests)
uv run pytest tests/composio_tools/test_gmail.py -s
With Custom Options
# Run with user ID
pytest tests/composio_tools/test_gmail.py -v --user-id USER_ID
# Skip destructive tests
pytest tests/composio_tools/ --skip-destructive
# Auto-confirm prompts
pytest tests/composio_tools/ --yes
Pytest Configuration
Custom CLI Options
def pytest_addoption(parser):
"""Add custom CLI options for pytest."""
parser.addoption(
"--user-id",
action="store",
default=None,
help="User ID for Composio authentication",
)
parser.addoption(
"--skip-destructive",
action="store_true",
default=False,
help="Skip tests that create/modify/delete events",
)
parser.addoption(
"--yes",
action="store_true",
default=False,
help="Automatically confirm all interactive prompts",
)
Fixtures
Session-Scoped Fixtures
import asyncio
import pytest
import nest_asyncio
from app.core.lazy_loader import providers
# Apply nest_asyncio for nested event loops
nest_asyncio.apply()
@pytest.fixture(scope="session")
def event_loop():
"""Create event loop for the entire test session."""
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture(scope="session")
def user_id(request) -> str:
"""Get user ID from CLI argument or config/env."""
cli_user_id = request.config.getoption("--user-id")
if cli_user_id:
return cli_user_id
# Fall back to config/env
config_user_id = get_user_id()
if not config_user_id:
pytest.fail("No user ID provided. Set EVAL_USER_ID or use --user-id flag.")
return config_user_id
@pytest.fixture(scope="session")
def composio_client(user_id: str):
"""Initialize Composio client and all required providers."""
from app.agents.evals.initialization import init_eval_providers
# Run async initialization
loop = asyncio.get_event_loop()
loop.run_until_complete(init_eval_providers())
# Get composio service from providers
composio_service = providers.get("composio_service")
if not composio_service:
pytest.fail("Composio service not available. Check COMPOSIO_KEY.")
return None
return composio_service.composio
Function-Scoped Fixtures
import json
from typing import Generator, Dict, Any
from datetime import datetime
@pytest.fixture(scope="function")
def confirm_action(request):
"""
Fixture to request user confirmation for destructive actions.
Requires running pytest with '-s' (no capture) to work interactively.
"""
def _confirm(message: str) -> None:
# Check for non-interactive mode flag
if request.config.getoption("--yes", default=False):
return
full_msg = f"\n[CONFIRMATION REQUIRED] {message}\nProceed? (y/N): "
try:
response = input(full_msg)
except OSError:
pytest.fail(
"Cannot read input. Run pytest with '-s' to enable interactive confirmation."
)
if response.lower() not in ["y", "yes"]:
pytest.skip("Skipped by user")
return _confirm
@pytest.fixture(scope="class")
def test_email(composio_client, user_id) -> Generator[Dict[str, Any], None, None]:
"""
Create a test draft email for Gmail testing.
"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
subject = f"[PYTEST] Test Email {timestamp}"
result = execute_tool(
composio_client,
"GMAIL_CREATE_EMAIL_DRAFT",
{
"to": "[email protected]",
"subject": subject,
"body": f"This is a test draft created by pytest at {timestamp}.",
},
user_id,
)
if not result.get("successful"):
pytest.skip(f"Could not create test draft: {result.get('error')}")
data = parse_data(result)
draft_id = data.get("id") or data.get("draft_id")
message_id = data.get("message", {}).get("id") or draft_id
email_info = {
"message_id": message_id,
"draft_id": draft_id,
"subject": subject,
}
yield email_info
# Cleanup: Delete the draft
if draft_id:
try:
execute_tool(
composio_client,
"GMAIL_DELETE_DRAFT",
{"draft_id": draft_id},
user_id,
)
except Exception:
pass # Best effort cleanup
Test Helpers
Tool Execution Helper
def execute_tool(
composio_client,
tool_name: str,
payload: Dict[str, Any],
user_id: str
) -> Dict[str, Any]:
"""
Execute a tool using ComposioService and LangChain adapter.
Args:
composio_client: Ignored (kept for compatibility)
tool_name: Name of the tool to execute
payload: Tool arguments
user_id: User ID to execute as
Returns:
Dict containing 'successful', 'data', etc.
"""
from app.services.composio.composio_service import get_composio_service
# Get the service which provides LangChain-compatible tools
composio_service = get_composio_service()
# Get the specific tool with all hooks applied
tool = composio_service.get_tool(tool_name, user_id=user_id)
if not tool:
raise ValueError(f"Tool {tool_name} not found")
# Invoke the tool
try:
result = tool.invoke(payload)
# Parse JSON response if string
if isinstance(result, str):
try:
result = json.loads(result)
except json.JSONDecodeError:
result = {"successful": True, "data": result}
# Parse nested JSON in data field
if isinstance(result, dict) and isinstance(result.get("data"), str):
try:
result["data"] = json.loads(result["data"])
except (json.JSONDecodeError, TypeError):
pass
return result
except Exception as e:
error_msg = str(e)
return {"successful": False, "error": error_msg, "data": None}
Data Parsing Helper
def parse_data(result: Dict[str, Any]) -> Dict[str, Any]:
"""Parse result data, handling string JSON responses."""
data = result.get("data", {})
if isinstance(data, str):
try:
data = json.loads(data)
except Exception:
pass
return data if isinstance(data, dict) else {}
Writing Tests
Basic Test Structure
import pytest
from pytest_check import check
class TestGmailReadOperations:
"""Tests for read-only Gmail operations."""
def test_get_unread_count(self, composio_client, user_id):
"""
Test GET_UNREAD_COUNT returns unread count for inbox.
"""
result = execute_tool(
composio_client,
"GMAIL_GET_UNREAD_COUNT",
{"label_id": "INBOX"},
user_id,
)
assert result.get("successful"), f"API call failed: {result.get('error')}"
data = parse_data(result)
with check:
assert "unreadCount" in data, "Should have 'unreadCount' field"
assert isinstance(data.get("unreadCount"), int), "unreadCount should be int"
assert data.get("unreadCount") >= 0, "unreadCount should be non-negative"
Tests with Setup/Teardown
class TestGmailMessageOperations:
"""Tests for Gmail message operations using draft email."""
def test_mark_as_read(self, composio_client, user_id, test_email):
"""
Test MARK_AS_READ marks the test email as read.
"""
result = execute_tool(
composio_client,
"GMAIL_MARK_AS_READ",
{"message_ids": [test_email["message_id"]]},
user_id,
)
assert result.get("successful"), f"API call failed: {result.get('error')}"
def test_star_email(self, composio_client, user_id, test_email):
"""
Test STAR_EMAIL adds star to the test email.
"""
result = execute_tool(
composio_client,
"GMAIL_STAR_EMAIL",
{
"message_ids": [test_email["message_id"]],
"unstar": False,
},
user_id,
)
assert result.get("successful"), f"API call failed: {result.get('error')}"
data = parse_data(result)
assert data.get("action") == "starred", "Should report action as 'starred'"
Async Tests
import pytest
@pytest.mark.asyncio
async def test_user_service():
"""Test user service functions."""
from app.services.user_service import get_user_by_id
# Test user retrieval
user = await get_user_by_id("test-user-id")
assert user is not None
assert user["email"] == "[email protected]"
Parametrized Tests
@pytest.mark.parametrize("status,expected_count", [
("active", 5),
("completed", 3),
("cancelled", 0),
])
async def test_count_reminders_by_status(status, expected_count):
"""Test reminder counting by status."""
from app.services.reminder_service import count_reminders
count = await count_reminders(user_id="test-user", status=status)
assert count == expected_count
Tests with Mocking
from unittest.mock import AsyncMock, patch
@pytest.mark.asyncio
async def test_send_email_notification():
"""Test email notification sending with mock."""
from app.services.notification_service import send_email_notification
with patch("app.services.notification_service.smtp_client") as mock_smtp:
mock_smtp.send = AsyncMock(return_value={"success": True})
result = await send_email_notification(
to="[email protected]",
subject="Test",
body="Test message"
)
assert result["success"]
mock_smtp.send.assert_called_once()
Testing Best Practices
1. Test Naming
# Good: Descriptive test names
def test_get_unread_count_returns_integer():
pass
def test_mark_as_read_updates_email_status():
pass
# Bad: Vague test names
def test_gmail():
pass
def test_1():
pass
2. Arrange-Act-Assert Pattern
def test_create_reminder():
# Arrange
user_id = "test-user-id"
reminder_data = {
"title": "Test Reminder",
"remind_at": datetime.now() + timedelta(hours=1)
}
# Act
reminder = await create_reminder(user_id, reminder_data)
# Assert
assert reminder["title"] == "Test Reminder"
assert reminder["user_id"] == user_id
assert reminder["status"] == "pending"
3. Use Soft Assertions
from pytest_check import check
def test_user_response_structure():
user = await get_user("test-id")
with check:
assert "user_id" in user
assert "email" in user
assert "name" in user
assert "created_at" in user
4. Test Error Cases
import pytest
from fastapi import HTTPException
@pytest.mark.asyncio
async def test_get_user_not_found():
"""Test 404 error for non-existent user."""
with pytest.raises(HTTPException) as exc_info:
await get_user_by_id("nonexistent-id")
assert exc_info.value.status_code == 404
assert "not found" in exc_info.value.detail.lower()
5. Clean Up Resources
@pytest.fixture
def temp_file():
"""Create temporary file for testing."""
file_path = "/tmp/test_file.txt"
# Setup
with open(file_path, "w") as f:
f.write("test data")
yield file_path
# Teardown
if os.path.exists(file_path):
os.remove(file_path)
6. Skip Tests Conditionally
import pytest
import os
@pytest.mark.skipif(
os.getenv("CI") == "true",
reason="Skipping in CI environment"
)
def test_local_only_feature():
pass
@pytest.fixture
def skip_destructive(request):
"""Skip destructive tests if flag is set."""
return request.config.getoption("--skip-destructive")
def test_delete_all_data(skip_destructive):
if skip_destructive:
pytest.skip("Skipping destructive test")
# Destructive test code
pass
Coverage Reporting
# Install pytest-cov
uv add --dev pytest-cov
# Run with coverage
pytest --cov=app --cov-report=html
# View coverage report
open htmlcov/index.html
Continuous Integration
GitHub Actions Example
name: Backend Tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
mongodb:
image: mongo:7
options: >-
--health-cmd "mongosh --eval 'db.runCommand({ping:1})'"
--health-interval 10s
--health-timeout 5s
--health-retries 5
redis:
image: redis:7
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
cd apps/api
pip install uv
uv sync
- name: Run tests
env:
POSTGRES_URL: postgresql://postgres:postgres@localhost:5432/test
MONGO_DB: mongodb://localhost:27017/test
REDIS_URL: redis://localhost:6379
run: |
cd apps/api
uv run pytest --cov=app --cov-report=xml
- name: Upload coverage
uses: codecov/codecov-action@v3