Using LeRobotDataset

The LeRobotDataset class is the main interface for loading and using robot learning datasets in LeRobot.

Loading a Dataset

From Hugging Face Hub

from lerobot.datasets.lerobot_dataset import LeRobotDataset

# Load entire dataset
dataset = LeRobotDataset("lerobot/pusht")
print(f"Loaded {len(dataset)} frames from {dataset.num_episodes} episodes")

From Local Directory

from pathlib import Path

# Load from local directory
dataset = LeRobotDataset(
    "my-dataset",
    root="/path/to/datasets"
)

# Or use default cache location (HF_LEROBOT_HOME)
import os
os.environ["HF_LEROBOT_HOME"] = "/path/to/cache"
dataset = LeRobotDataset("lerobot/pusht")  # Downloads to HF_LEROBOT_HOME

Loading Specific Episodes

# Load only specific episodes (faster)
dataset = LeRobotDataset(
    "lerobot/aloha_mobile_cabinet",
    episodes=[0, 1, 5, 10]  # Only load these episodes
)

print(f"Loaded {dataset.num_episodes} episodes")  # 4

Accessing Data

Single Samples

# Get a single frame
sample = dataset[0]

# Sample is a dictionary with all features
print(sample.keys())
# dict_keys(['observation.state', 'observation.images.top', 
#            'action', 'timestamp', 'frame_index', 'episode_index', 
#            'index', 'task_index', 'task'])

# Access specific modalities
state = sample["observation.state"]  # torch.Tensor [14]
image = sample["observation.images.top"]  # torch.Tensor [3, 480, 640]
action = sample["action"]  # torch.Tensor [14]
task = sample["task"]  # str: "Push the T-shaped block"

Using with DataLoader

import torch
from torch.utils.data import DataLoader

dataset = LeRobotDataset("lerobot/pusht")

dataloader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

for batch in dataloader:
    states = batch["observation.state"]  # [32, 14]
    images = batch["observation.images.top"]  # [32, 3, 480, 640]
    actions = batch["action"]  # [32, 14]
    
    # Your training code here
    loss = model(states, images, actions)
    loss.backward()

Delta Timestamps

Load temporal sequences for history or future frames:

# Load 2 past frames, current, and 2 future frames for actions
delta_timestamps = {
    "action": [-0.1, -0.05, 0, 0.05, 0.1],  # 5 frames at 20Hz (0.05s apart)
    "observation.state": [-0.1, -0.05, 0],   # 3 past + current
}

dataset = LeRobotDataset(
    "lerobot/pusht",
    delta_timestamps=delta_timestamps
)

sample = dataset[100]
print(sample["action"].shape)  # [5, 14] - sequence of 5 actions
print(sample["observation.state"].shape)  # [3, 14] - sequence of 3 states

# Check which frames are padded (at episode boundaries)
print(sample["action_is_pad"])  # [False, False, False, False, True]

With FPS-based Indices

from lerobot.datasets.factory import resolve_delta_timestamps
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata

# Load metadata first
meta = LeRobotDatasetMetadata("lerobot/pusht")

# Define delta in terms of frame indices
class PolicyConfig:
    observation_delta_indices = [-2, -1, 0]  # 3 past frames
    action_delta_indices = [0, 1, 2, 3]      # current + 3 future

policy_cfg = PolicyConfig()

# Convert to timestamps based on dataset FPS
delta_timestamps = resolve_delta_timestamps(policy_cfg, meta)
print(delta_timestamps)
# {'observation.state': [-0.066, -0.033, 0],
#  'action': [0, 0.033, 0.066, 0.1]}

dataset = LeRobotDataset(
    "lerobot/pusht",
    delta_timestamps=delta_timestamps
)

Image Transforms

from torchvision.transforms import v2 as transforms

# Define transforms
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

dataset = LeRobotDataset(
    "lerobot/aloha_mobile_cabinet",
    image_transforms=image_transforms
)

sample = dataset[0]
print(sample["observation.images.top"].shape)  # [3, 224, 224]

Using Built-in Transform Pipeline

from lerobot.datasets.transforms import ImageTransforms
from lerobot.configs.train import ImageTransformsConfig

# Configure transforms
transform_cfg = ImageTransformsConfig(
    enable=True,
    max_num_transforms=3,
    random_order=False,
    brightness=0.2,
    contrast=0.2,
    saturation=0.2,
    hue=0.05,
    sharpness=0.2,
)

image_transforms = ImageTransforms(transform_cfg)

dataset = LeRobotDataset(
    "lerobot/pusht",
    image_transforms=image_transforms
)

Video Backend Selection

# Use torchcodec (faster, default if available)
dataset = LeRobotDataset(
    "lerobot/aloha_mobile_cabinet",
    video_backend="torchcodec"
)

# Use PyAV (more compatible)
dataset = LeRobotDataset(
    "lerobot/aloha_mobile_cabinet",
    video_backend="pyav"
)

# Use video_reader (requires custom build)
dataset = LeRobotDataset(
    "lerobot/aloha_mobile_cabinet",
    video_backend="video_reader"
)

Dataset Properties

dataset = LeRobotDataset("lerobot/pusht")

# Basic info
print(f"Repository ID: {dataset.repo_id}")
print(f"Root directory: {dataset.root}")
print(f"FPS: {dataset.fps}")
print(f"Number of episodes: {dataset.num_episodes}")
print(f"Number of frames: {dataset.num_frames}")

# Features
print(f"Features: {list(dataset.features.keys())}")
print(f"Feature shapes: {dataset.meta.shapes}")
print(f"Feature names: {dataset.meta.names}")

# Visual modalities
print(f"Camera keys: {dataset.meta.camera_keys}")
print(f"Video keys: {dataset.meta.video_keys}")
print(f"Image keys: {dataset.meta.image_keys}")

# Metadata
print(f"Robot type: {dataset.meta.robot_type}")
print(f"Total tasks: {dataset.meta.total_tasks}")

Working with Episodes

# Access episode metadata
episodes = dataset.meta.episodes

for i in range(dataset.num_episodes):
    ep = episodes[i]
    print(f"Episode {i}:")
    print(f"  Length: {ep['length']} frames")
    print(f"  Duration: {ep['length'] / dataset.fps:.2f}s")
    print(f"  Tasks: {ep['tasks']}")
    print(f"  Dataset indices: {ep['dataset_from_index']}-{ep['dataset_to_index']}")

# Get all frames from a specific episode
ep_idx = 5
ep = episodes[ep_idx]
from_idx = ep["dataset_from_index"]
to_idx = ep["dataset_to_index"]

episode_frames = [dataset[i] for i in range(from_idx, to_idx)]
print(f"Episode {ep_idx} has {len(episode_frames)} frames")

Statistics and Normalization

import torch

# Access pre-computed statistics
state_stats = dataset.meta.stats["observation.state"]
action_stats = dataset.meta.stats["action"]

print(f"State mean: {state_stats['mean']}")
print(f"State std: {state_stats['std']}")
print(f"State min: {state_stats['min']}")
print(f"State max: {state_stats['max']}")

# Manual normalization
def normalize(x, stats):
    mean = torch.tensor(stats["mean"])
    std = torch.tensor(stats["std"])
    return (x - mean) / std

def denormalize(x_norm, stats):
    mean = torch.tensor(stats["mean"])
    std = torch.tensor(stats["std"])
    return x_norm * std + mean

sample = dataset[0]
normalized_state = normalize(sample["observation.state"], state_stats)
original_state = denormalize(normalized_state, state_stats)

Advanced Options

Download Control

# Skip video download (use for state-only training)
dataset = LeRobotDataset(
    "lerobot/aloha_mobile_cabinet",
    download_videos=False
)

# Force cache refresh
dataset = LeRobotDataset(
    "lerobot/pusht",
    force_cache_sync=True
)

# Use specific revision/branch
dataset = LeRobotDataset(
    "lerobot/pusht",
    revision="v3.0"  # or a git branch/tag
)

Tolerance Settings

# Adjust timestamp tolerance for video frame extraction
dataset = LeRobotDataset(
    "lerobot/pusht",
    tolerance_s=1e-3  # Default is 1e-4 (0.1ms)
)

Example: Training Loop

from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

# Load dataset with temporal context
delta_timestamps = {
    "observation.state": [-0.1, -0.05, 0],
    "action": [0, 0.033, 0.066, 0.1],  # predict 4 future actions
}

dataset = LeRobotDataset(
    "lerobot/pusht",
    delta_timestamps=delta_timestamps
)

dataloader = DataLoader(
    dataset, 
    batch_size=32, 
    shuffle=True,
    num_workers=4
)

model = YourPolicy()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()

for epoch in range(100):
    for batch in dataloader:
        # Get observations
        state = batch["observation.state"]  # [B, 3, state_dim]
        image = batch["observation.images.top"]  # [B, 3, H, W]
        
        # Get action labels
        action = batch["action"]  # [B, 4, action_dim]
        action_is_pad = batch["action_is_pad"]  # [B, 4]
        
        # Forward pass
        pred_action = model(state, image)
        
        # Compute loss (ignore padded actions)
        loss = criterion(pred_action[~action_is_pad], action[~action_is_pad])
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Get Started

Core Concepts

Tutorials

Datasets

Simulation

Inference

Advanced

Using LeRobotDataset

Loading a Dataset

From Hugging Face Hub

From Local Directory

Loading Specific Episodes

Accessing Data

Single Samples

Using with DataLoader

Delta Timestamps

With FPS-based Indices

Image Transforms

Using Built-in Transform Pipeline

Video Backend Selection

Dataset Properties

Working with Episodes

Statistics and Normalization

Advanced Options

Download Control

Tolerance Settings

Example: Training Loop

See Also

Build docs developers (and LLMs) love

Get Started

Core Concepts

Tutorials

Datasets

Simulation

Inference

Advanced

​Loading a Dataset

​From Hugging Face Hub

​From Local Directory

​Loading Specific Episodes

​Accessing Data

​Single Samples

​Using with DataLoader

​Delta Timestamps

​With FPS-based Indices

​Image Transforms

​Using Built-in Transform Pipeline

​Video Backend Selection

​Dataset Properties

​Working with Episodes

​Statistics and Normalization

​Advanced Options

​Download Control

​Tolerance Settings

​Example: Training Loop

​See Also

Build docs developers (and LLMs) love

Loading a Dataset

From Hugging Face Hub

From Local Directory

Loading Specific Episodes

Accessing Data

Single Samples

Using with DataLoader

Delta Timestamps

With FPS-based Indices

Image Transforms

Using Built-in Transform Pipeline

Video Backend Selection

Dataset Properties

Working with Episodes

Statistics and Normalization

Advanced Options

Download Control

Tolerance Settings

Example: Training Loop

See Also