Skip to main content
Load data from any REST API using dlt’s declarative configuration. The REST API source handles pagination, authentication, and nested resources automatically.

Quick Start

Here’s a complete example loading GitHub issues and comments:
import dlt
from dlt.sources.rest_api import rest_api_source

source = rest_api_source({
    "client": {
        "base_url": "https://api.github.com/repos/dlt-hub/dlt/",
        "auth": {
            "type": "bearer",
            "token": dlt.secrets["github_token"],
        },
    },
    "resources": [
        {
            "name": "issues",
            "endpoint": {
                "path": "issues",
                "params": {
                    "state": "open",
                    "per_page": 100,
                },
            },
        },
        {
            "name": "comments",
            "endpoint": {
                "path": "issues/{resources.issues.number}/comments",
            },
            "include_from_parent": ["number"],
        },
    ],
})

pipeline = dlt.pipeline(
    pipeline_name="github_api",
    destination="duckdb",
    dataset_name="github_data",
)

load_info = pipeline.run(source)
print(load_info)

Configuration

Using RESTAPIConfig

For better type hints and IDE support, use the RESTAPIConfig type:
from dlt.sources.rest_api import RESTAPIConfig, rest_api_resources

@dlt.source
def github_source(access_token=dlt.secrets.value):
    config: RESTAPIConfig = {
        "client": {
            "base_url": "https://api.github.com/repos/dlt-hub/dlt/",
            "auth": {
                "type": "bearer",
                "token": access_token,
            } if access_token else None,
        },
        "resource_defaults": {
            "primary_key": "id",
            "write_disposition": "merge",
            "endpoint": {
                "params": {
                    "per_page": 100,
                },
            },
        },
        "resources": [
            {
                "name": "issues",
                "endpoint": {
                    "path": "issues",
                    "params": {
                        "sort": "updated",
                        "direction": "desc",
                        "state": "open",
                    },
                },
            },
        ],
    }
    
    yield from rest_api_resources(config)

Authentication

config = {
    "client": {
        "base_url": "https://api.example.com/",
        "auth": {
            "type": "bearer",
            "token": dlt.secrets["api_token"],
        },
    },
}

Pagination

The REST API source automatically detects and handles pagination:
# Automatic pagination detection
source = rest_api_source({
    "client": {
        "base_url": "https://pokeapi.co/api/v2/",
        # Paginator is automatically inferred
    },
    "resources": ["pokemon", "berry"],
})
Or configure it explicitly:
config = {
    "client": {
        "base_url": "https://api.example.com/",
        "paginator": {
            "type": "json_link",
            "next_url_path": "paging.next",
        },
    },
}

Resource Relationships

Load nested resources by referencing parent resource fields:
config = {
    "resources": [
        {
            "name": "issues",
            "endpoint": "issues",
        },
        {
            "name": "issue_comments",
            "endpoint": {
                # Use {resources.issues.number} to reference parent
                "path": "issues/{resources.issues.number}/comments",
            },
            # Include parent fields in child table
            "include_from_parent": ["id", "number"],
        },
    ],
}

Incremental Loading

Combine REST API source with incremental loading:
from dlt.common.pendulum import pendulum

config: RESTAPIConfig = {
    "client": {
        "base_url": "https://api.github.com/repos/dlt-hub/dlt/",
    },
    "resources": [
        {
            "name": "issues",
            "endpoint": {
                "path": "issues",
                "params": {
                    "since": "{incremental.start_value}",
                },
                "incremental": {
                    "cursor_path": "updated_at",
                    "initial_value": pendulum.today().subtract(days=30).to_iso8601_string(),
                },
            },
        },
    ],
}

Testing Connection

Verify your API configuration before running the pipeline:
from dlt.sources.rest_api import check_connection

source = rest_api_source(config)

can_connect, error_msg = check_connection(
    source,
    "issues",  # Test this endpoint
)

if not can_connect:
    print(f"Connection failed: {error_msg}")
else:
    print("Connection successful!")

Complete Example: Pokemon API

import dlt
from dlt.sources.rest_api import rest_api_source

def load_pokemon():
    pipeline = dlt.pipeline(
        pipeline_name="pokemon_api",
        destination="duckdb",
        dataset_name="pokemon_data",
    )
    
    pokemon_source = rest_api_source({
        "client": {
            "base_url": "https://pokeapi.co/api/v2/",
        },
        "resource_defaults": {
            "endpoint": {
                "params": {
                    "limit": 1000,
                },
            },
        },
        "resources": [
            "pokemon",
            "berry",
            "location",
        ],
    })
    
    load_info = pipeline.run(pokemon_source)
    print(load_info)

if __name__ == "__main__":
    load_pokemon()

Next Steps

Incremental Loading

Add incremental loading to track changes

Schema Evolution

Handle schema changes automatically

Build docs developers (and LLMs) love