Skip to main content

Testing Overview

JARVIS uses pytest for backend testing with extensive coverage of the capture pipeline, agent orchestration, and API endpoints.

Test Structure

backend/tests/
├── test_config.py              # Configuration tests
├── test_pipeline.py            # Core pipeline tests
├── test_capture.py             # Image capture tests
├── test_agents.py              # Agent swarm tests
├── test_identification.py      # Face detection tests
├── test_synthesis.py           # Report synthesis tests
├── test_integration.py         # End-to-end tests
└── test_health.py              # Health check tests

Running Tests

Run All Tests

cd backend
pytest

Run Specific Test File

pytest tests/test_pipeline.py

Run Specific Test Function

pytest tests/test_pipeline.py::test_pipeline_one_face_creates_person

Run with Coverage

pytest --cov=. --cov-report=html
View coverage report: open htmlcov/index.html

Run with Verbose Output

pytest -v

Pytest Configuration

The project uses pyproject.toml for pytest configuration:
pyproject.toml
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "--cov=. --cov-report=term-missing --cov-config=pyproject.toml"
asyncio_mode = "auto"

[tool.coverage.run]
source = ["."]
omit = [
  "tests/*",
  ".venv/*",
  "__pycache__/*",
]

[tool.coverage.report]
fail_under = 70
show_missing = true
exclude_lines = [
  "pragma: no cover",
  "if __name__ == .__main__.",
]
The project maintains a minimum coverage threshold of 70%.

Writing Tests

Test Fixtures

Common fixtures for testing:
tests/conftest.py
import pytest
from PIL import Image
import io

@pytest.fixture
def sample_image() -> bytes:
    """Create a minimal valid JPEG image."""
    img = Image.new("RGB", (100, 100), color=(128, 128, 128))
    buf = io.BytesIO()
    img.save(buf, format="JPEG")
    return buf.getvalue()

@pytest.fixture
def mock_settings():
    """Mock settings for testing."""
    from config import Settings
    return Settings(
        environment="test",
        log_level="ERROR",
    )

Testing Async Functions

JARVIS uses pytest-asyncio for async tests:
import pytest

@pytest.mark.asyncio
async def test_pipeline_creates_person():
    """Test that pipeline creates a person record."""
    pipeline = CapturePipeline(...)
    result = await pipeline.process(
        capture_id="test_001",
        data=sample_image,
        content_type="image/jpeg",
    )
    assert result.success is True
    assert len(result.persons_created) == 1

Test Doubles (Mocks)

Use test doubles to avoid external dependencies:
class FakeDetector:
    """Test double for face detection."""
    
    def __init__(self, faces: list[DetectedFace] | None = None):
        self._faces = faces or []
    
    @property
    def configured(self) -> bool:
        return True
    
    async def detect_faces(self, request: FaceDetectionRequest) -> FaceDetectionResult:
        return FaceDetectionResult(
            faces=self._faces,
            frame_width=100,
            frame_height=100,
            success=True,
        )

Example Tests

Configuration Tests

tests/test_config.py
from config import Settings

def test_settings_defaults():
    s = Settings()
    assert s.app_name == "JARVIS API"
    assert s.environment == "development"
    assert s.log_level == "INFO"
    assert s.api_port == 8000

def test_settings_service_flags_all_unconfigured():
    s = Settings()
    flags = s.service_flags()
    
    assert isinstance(flags, dict)
    for key, value in flags.items():
        assert value is False, f"{key} should be False without env vars"

Pipeline Tests

tests/test_pipeline.py
import pytest
from pipeline import CapturePipeline
from identification.models import DetectedFace, BoundingBox

@pytest.mark.asyncio
async def test_pipeline_no_faces_detected():
    """Pipeline completes successfully even when no faces are found."""
    detector = FakeDetector(faces=[])
    embedder = ArcFaceEmbedder()
    db = InMemoryDatabaseGateway()
    pipeline = CapturePipeline(detector=detector, embedder=embedder, db=db)
    
    result = await pipeline.process(
        capture_id="cap_test001",
        data=sample_image,
        content_type="image/jpeg",
    )
    
    assert result.success is True
    assert result.total_frames == 1
    assert result.faces_detected == 0
    assert result.persons_created == []

@pytest.mark.asyncio
async def test_pipeline_one_face_creates_person():
    """Pipeline detects one face and creates a person record."""
    face = DetectedFace(
        bbox=BoundingBox(x=0.1, y=0.2, width=0.3, height=0.4),
        confidence=0.95,
    )
    detector = FakeDetector(faces=[face])
    embedder = ArcFaceEmbedder()
    db = InMemoryDatabaseGateway()
    pipeline = CapturePipeline(detector=detector, embedder=embedder, db=db)
    
    result = await pipeline.process(
        capture_id="cap_test002",
        data=sample_image,
        content_type="image/jpeg",
    )
    
    assert result.success is True
    assert result.faces_detected == 1
    assert len(result.persons_created) == 1
    
    # Verify person is in the database
    person = await db.get_person(result.persons_created[0])
    assert person is not None
    assert person["confidence"] == 0.95
    assert len(person["embedding"]) == 512

API Endpoint Tests

tests/test_health.py
import pytest
from fastapi.testclient import TestClient
from main import app

client = TestClient(app)

def test_health_endpoint():
    response = client.get("/health")
    assert response.status_code == 200
    data = response.json()
    assert data["status"] == "ok"
    assert "services" in data

def test_health_endpoint_returns_service_flags():
    response = client.get("/health")
    data = response.json()
    services = data["services"]
    
    assert "convex" in services
    assert "mongodb" in services
    assert "openai" in services
    assert isinstance(services["convex"], bool)

Integration Tests

Integration tests verify end-to-end workflows:
tests/test_integration.py
import pytest
from fastapi.testclient import TestClient
from main import app

client = TestClient(app)

@pytest.mark.integration
def test_capture_to_identification_flow(sample_image):
    """Test full capture → identification workflow."""
    # Upload image
    response = client.post(
        "/api/capture",
        files={"file": ("test.jpg", sample_image, "image/jpeg")},
    )
    assert response.status_code == 200
    data = response.json()
    capture_id = data["capture_id"]
    
    # Verify capture was created
    # (In real tests, you'd poll or use webhooks)
    assert capture_id is not None

Test Markers

Use pytest markers to categorize tests:
import pytest

@pytest.mark.unit
def test_config_defaults():
    """Unit test: configuration defaults."""
    pass

@pytest.mark.integration
async def test_full_pipeline():
    """Integration test: end-to-end pipeline."""
    pass

@pytest.mark.slow
async def test_browser_agent():
    """Slow test: requires browser automation."""
    pass

@pytest.mark.skip(reason="Requires PimEyes account")
async def test_pimeyes_search():
    """Skip test if PimEyes not configured."""
    pass
Run specific markers:
# Run only unit tests
pytest -m unit

# Run everything except slow tests
pytest -m "not slow"

# Run integration tests
pytest -m integration

Mocking External Services

Mock OpenAI API

from unittest.mock import patch, Mock

@pytest.mark.asyncio
@patch("openai.AsyncOpenAI")
async def test_vision_extraction(mock_openai):
    mock_client = Mock()
    mock_openai.return_value = mock_client
    
    # Configure mock response
    mock_client.chat.completions.create.return_value = {
        "choices": [{
            "message": {
                "content": '{"name": "John Doe"}'
            }
        }]
    }
    
    # Test your code
    result = await extract_identity(sample_image)
    assert result["name"] == "John Doe"

Mock Browser Use

@patch("browser_use.Agent")
async def test_linkedin_agent(mock_agent):
    mock_instance = Mock()
    mock_agent.return_value = mock_instance
    mock_instance.run.return_value = {"name": "Jane Smith"}
    
    agent = LinkedInAgent("Jane Smith", {})
    result = await agent.run()
    
    assert result["name"] == "Jane Smith"

Performance Testing

Benchmark Tests

import time
import pytest

@pytest.mark.benchmark
def test_face_detection_performance(benchmark, sample_image):
    """Benchmark face detection speed."""
    detector = MediaPipeFaceDetector()
    
    def run():
        return detector.detect_faces(sample_image)
    
    result = benchmark(run)
    assert result.success is True

@pytest.mark.asyncio
async def test_pipeline_under_load():
    """Test pipeline handles concurrent requests."""
    import asyncio
    
    async def process_one():
        return await pipeline.process("test", sample_image, "image/jpeg")
    
    # Process 10 images concurrently
    results = await asyncio.gather(*[process_one() for _ in range(10)])
    
    assert all(r.success for r in results)

Continuous Integration

Example GitHub Actions workflow:
.github/workflows/test.yml
name: Tests

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      
      - name: Install dependencies
        run: |
          pip install uv
          uv pip install -e ".[dev]"
      
      - name: Run tests
        run: pytest --cov --cov-report=xml
      
      - name: Upload coverage
        uses: codecov/codecov-action@v4
        with:
          file: ./coverage.xml

Best Practices

Use clear, descriptive test names that explain what is being tested:
# Good
def test_pipeline_creates_person_record_when_face_detected():
    pass

# Bad
def test_pipeline():
    pass
Each test should verify a single behavior:
# Good
def test_detector_returns_face_when_present():
    pass

def test_detector_returns_empty_when_no_face():
    pass

# Bad
def test_detector():
    # Tests multiple scenarios in one function
    pass
Extract common setup code into fixtures:
@pytest.fixture
def pipeline():
    return CapturePipeline(
        detector=FakeDetector(),
        embedder=ArcFaceEmbedder(),
        db=InMemoryDatabaseGateway(),
    )

def test_something(pipeline):
    result = await pipeline.process(...)
    assert result.success
Never make real API calls in tests. Use mocks:
@patch("requests.get")
def test_api_call(mock_get):
    mock_get.return_value.json.return_value = {"status": "ok"}
    result = call_external_api()
    assert result["status"] == "ok"

Next Steps

Observability

Set up Laminar tracing for production debugging

Performance

Learn about performance optimization

Contributing

Guidelines for contributing code

CI/CD

Set up continuous integration

Build docs developers (and LLMs) love