Skip to main content
PostHog uses a comprehensive testing strategy combining Python pytest, JavaScript Jest, and end-to-end Playwright tests.

Test Types

Backend Tests

pytest for Django and business logic

Frontend Tests

Jest for React components and Kea logic

E2E Tests

Playwright for full user flows

Backend Testing (pytest)

Running Tests

# Run all tests
pytest

# Run with coverage
pytest --cov=posthog --cov-report=html

Test Configuration

Tests are configured in pytest.ini:
[pytest]
pythonpath = . common
env =
    DEBUG=1
    TEST=1
DJANGO_SETTINGS_MODULE = posthog.settings
addopts = -p no:warnings --reuse-db --ignore=posthog/user_scripts

markers =
    ee
    clickhouse_only
    skip_on_multitenancy
    async_migrations
    requires_secrets

asyncio_mode = auto
Key options:
  • --reuse-db: Reuse database between test runs (faster)
  • -p no:warnings: Suppress warnings
  • --ignore: Skip certain directories

Writing Backend Tests

1

Test Structure

Organize tests in tests/ directories within products:
products/feature_flags/
  backend/
    tests/
      test_models.py
      test_logic.py
      test_api.py
      test_tasks.py
      conftest.py
2

Basic Test Example

# products/feature_flags/backend/tests/test_logic.py
import pytest
from products.feature_flags.backend import logic
from products.feature_flags.backend.models import FeatureFlag

class TestFeatureFlagLogic:
    def test_create_feature_flag(self, team):
        """Test creating a feature flag."""
        flag = logic.create_feature_flag(
            team_id=team.id,
            key="test-flag",
            enabled=True,
        )
        
        assert flag.key == "test-flag"
        assert flag.enabled is True
        assert flag.team_id == team.id
    
    def test_duplicate_key_raises_error(self, team):
        """Test that duplicate keys are rejected."""
        logic.create_feature_flag(team_id=team.id, key="test-flag", enabled=True)
        
        with pytest.raises(ValueError, match="already exists"):
            logic.create_feature_flag(team_id=team.id, key="test-flag", enabled=False)
3

Using Fixtures

# products/feature_flags/backend/tests/conftest.py
import pytest
from posthog.models import Team, Organization

@pytest.fixture
def organization():
    """Create test organization."""
    return Organization.objects.create(name="Test Org")

@pytest.fixture
def team(organization):
    """Create test team."""
    return Team.objects.create(
        organization=organization,
        name="Test Team",
    )

@pytest.fixture
def feature_flag(team):
    """Create test feature flag."""
    from products.feature_flags.backend.models import FeatureFlag
    return FeatureFlag.objects.create(
        team=team,
        key="test-flag",
        enabled=True,
    )

Parameterized Tests

Use @pytest.mark.parametrize for testing multiple inputs:
import pytest

class TestFeatureFlagEvaluation:
    @pytest.mark.parametrize(
        "flag_value,user_property,expected",
        [
            ("true", {"email": "[email protected]"}, True),
            ("false", {"email": "[email protected]"}, False),
            ("variant-a", {"plan": "enterprise"}, "variant-a"),
            ("variant-b", {"plan": "free"}, "variant-b"),
        ],
    )
    def test_flag_evaluation(
        self,
        team,
        flag_value,
        user_property,
        expected,
    ):
        """Test flag evaluation with different conditions."""
        result = evaluate_flag(team.id, flag_value, user_property)
        assert result == expected
Prefer parameterized tests over multiple assertions in a single test. This makes test failures clearer and exercises the system more thoroughly.

API Tests

Test Django REST Framework views:
from rest_framework.test import APIClient
import pytest

class TestFeatureFlagAPI:
    @pytest.fixture
    def api_client(self):
        return APIClient()
    
    def test_create_feature_flag_via_api(self, api_client, team, user):
        """Test creating feature flag via API."""
        api_client.force_authenticate(user=user)
        
        response = api_client.post(
            f"/api/projects/{team.id}/feature_flags/",
            {
                "key": "new-flag",
                "name": "New Flag",
                "enabled": True,
            },
            format="json",
        )
        
        assert response.status_code == 201
        assert response.json()["key"] == "new-flag"
    
    def test_list_feature_flags_filters_by_team(self, api_client, team, user):
        """Test that users only see their team's flags."""
        api_client.force_authenticate(user=user)
        
        # Create flags for this team
        FeatureFlag.objects.create(team=team, key="team-flag")
        
        # Create flag for different team
        other_team = Team.objects.create(organization=team.organization)
        FeatureFlag.objects.create(team=other_team, key="other-flag")
        
        response = api_client.get(f"/api/projects/{team.id}/feature_flags/")
        
        assert response.status_code == 200
        assert len(response.json()["results"]) == 1
        assert response.json()["results"][0]["key"] == "team-flag"

Testing with ClickHouse

Tests that query ClickHouse:
import pytest
from posthog.client import sync_execute

@pytest.mark.clickhouse_only
class TestEventQueries:
    def test_event_aggregation(self, team):
        """Test event count aggregation."""
        # Insert test events
        sync_execute(
            """
            INSERT INTO events (uuid, event, team_id, distinct_id, timestamp)
            VALUES (%(uuid)s, %(event)s, %(team_id)s, %(distinct_id)s, %(timestamp)s)
            """,
            {
                "uuid": uuid4(),
                "event": "$pageview",
                "team_id": team.id,
                "distinct_id": "user_123",
                "timestamp": datetime.now(),
            },
        )
        
        # Query events
        result = sync_execute(
            "SELECT count() FROM events WHERE team_id = %(team_id)s",
            {"team_id": team.id},
        )
        
        assert result[0][0] == 1

Frontend Testing (Jest)

Running Frontend Tests

# Run all frontend tests
pnpm --filter=@posthog/frontend test

# Run in watch mode
pnpm --filter=@posthog/frontend test:watch

Writing Frontend Tests

1

Component Tests

// products/feature_flags/frontend/components/FeatureFlagForm.test.tsx
import { render, screen, fireEvent } from '@testing-library/react'
import { FeatureFlagForm } from './FeatureFlagForm'

describe('FeatureFlagForm', () => {
    it('renders form fields', () => {
        render(<FeatureFlagForm />)
        
        expect(screen.getByLabelText('Key')).toBeInTheDocument()
        expect(screen.getByLabelText('Name')).toBeInTheDocument()
        expect(screen.getByRole('button', { name: 'Save' })).toBeInTheDocument()
    })
    
    it('validates required fields', async () => {
        render(<FeatureFlagForm />)
        
        fireEvent.click(screen.getByRole('button', { name: 'Save' }))
        
        expect(await screen.findByText('Key is required')).toBeInTheDocument()
    })
})
2

Kea Logic Tests

// products/feature_flags/frontend/logics/featureFlagsLogic.test.ts
import { expectLogic } from 'kea-test-utils'
import { featureFlagsLogic } from './featureFlagsLogic'

describe('featureFlagsLogic', () => {
    it('loads feature flags', async () => {
        await expectLogic(featureFlagsLogic)
            .toDispatchActions(['loadFeatureFlags', 'loadFeatureFlagsSuccess'])
            .toMatchValues({
                featureFlags: expect.arrayContaining([
                    expect.objectContaining({ key: 'test-flag' }),
                ]),
            })
    })
})

Jest Best Practices

Prefer a single top-level describe block per file. Organize tests with nested describe blocks only when it significantly improves clarity.
// ✅ Good
describe('FeatureFlagForm', () => {
    it('renders correctly', () => { /* ... */ })
    it('validates input', () => { /* ... */ })
    it('submits form', () => { /* ... */ })
})

// ❌ Avoid excessive nesting
describe('FeatureFlagForm', () => {
    describe('rendering', () => {
        describe('when loaded', () => {
            it('shows form', () => { /* ... */ })
        })
    })
})

End-to-End Testing (Playwright)

Running E2E Tests

# Run all E2E tests
pnpm --filter=@posthog/playwright test

# Run in headed mode (see browser)
pnpm --filter=@posthog/playwright test --headed

# Run specific test file
pnpm --filter=@posthog/playwright test feature-flags.spec.ts

# Debug mode
pnpm --filter=@posthog/playwright test --debug

Writing E2E Tests

// playwright/tests/feature-flags.spec.ts
import { test, expect } from '@playwright/test'

test.describe('Feature Flags', () => {
    test('create and enable feature flag', async ({ page }) => {
        await page.goto('/feature_flags')
        
        // Click new flag button
        await page.click('text=New feature flag')
        
        // Fill form
        await page.fill('[name="key"]', 'test-flag')
        await page.fill('[name="name"]', 'Test Flag')
        
        // Save
        await page.click('text=Save')
        
        // Verify created
        await expect(page.locator('text=test-flag')).toBeVisible()
    })
})

Selective Testing with Turbo

PostHog uses Turborepo for selective test execution based on changed files:
# Run tests only for changed products
pnpm turbo run backend:test

# See what would run without executing
pnpm turbo run backend:test --dry-run=json

# Force run all tests (ignore cache)
pnpm turbo run backend:test --force
How it works:
  1. Turbo tracks file changes since last test run
  2. Each product defines inputs in package.json:
    {
      "name": "@posthog/products-feature_flags",
      "scripts": {
        "backend:test": "pytest products/feature_flags/backend/tests"
      }
    }
    
  3. Only products with changed inputs are tested
  4. Contract files (facades) determine downstream dependencies
When you change a product’s facade contracts, downstream products that depend on it will also be retested.

Testing Best Practices

General Principles

  • No doc comments in Python tests - Test names should be self-explanatory
  • Prefer parameterized tests - Use @pytest.mark.parametrize for multiple cases
  • One assertion per concept - But use parameterized tests instead of multiple tests
  • Test behavior, not implementation - Focus on what, not how
  • Keep tests isolated - No shared state between tests

Code Examples

class TestFeatureFlagEvaluation:
    @pytest.mark.parametrize(
        "enabled,expected",
        [(True, "variant-a"), (False, None)],
    )
    def test_returns_variant_when_enabled(
        self,
        team,
        enabled,
        expected,
    ):
        result = evaluate_flag(team.id, enabled)
        assert result == expected

Security Testing

Always test team isolation:
def test_users_cannot_access_other_teams_flags(self, api_client, user, team):
    """Verify users can't access flags from other teams."""
    other_team = Team.objects.create(organization=team.organization)
    other_flag = FeatureFlag.objects.create(team=other_team, key="other-flag")
    
    api_client.force_authenticate(user=user)
    response = api_client.get(f"/api/projects/{team.id}/feature_flags/{other_flag.id}/")
    
    assert response.status_code == 404

Continuous Integration

Tests run automatically on every pull request:
  • Backend tests: pytest across all changed products
  • Frontend tests: Jest for changed components
  • E2E tests: Playwright for critical user flows
  • Type checking: TypeScript and mypy (baseline only)
  • Linting: Ruff (Python) and oxlint (TypeScript)
Do not run mypy for type checks locally - it takes too long. Rely on the baseline and LSP for type hints.

Next Steps

Contributing

Learn how to contribute to PostHog

Architecture

Understand the system architecture

Build docs developers (and LLMs) love