Skip to main content

Data Structure

The F1 Stats Archive organizes data in a hierarchical directory structure:
├── 2024/
│   ├── events.json
│   ├── bahrain-grand-prix/
│   │   ├── event_info.json
│   │   ├── results.json
│   │   ├── quali_results.json
│   │   ├── laptimes.json
│   │   ├── pitstops.json
│   │   ├── driverPoints.json
│   │   └── teamPoints.json
│   ├── saudi-arabian-grand-prix/
│   └── ...
├── 2025/
└── ...

Reading JSON Data with Python

Basic File Reading

Here’s how to read race results from the archive:
import json
from pathlib import Path

# Read race results
results_file = Path("2024/bahrain-grand-prix/results.json")
with open(results_file, "r") as f:
    results_data = json.load(f)

# Access race information
races = results_data["MRData"]["RaceTable"]["Races"]
if races:
    race = races[0]
    print(f"Race: {race['raceName']}")
    print(f"Circuit: {race['Circuit']['circuitName']}")

Traversing the Directory Structure

Use Python’s pathlib module to programmatically explore the archive:
from pathlib import Path
import json

def get_all_seasons():
    """Get list of all available seasons."""
    base_dir = Path(".")
    seasons = [d.name for d in base_dir.iterdir() 
               if d.is_dir() and d.name.isdigit()]
    return sorted(seasons)

def get_races_for_season(year):
    """Get all races for a specific season."""
    season_dir = Path(str(year))
    
    # Read events.json to get race information
    events_file = season_dir / "events.json"
    with open(events_file, "r") as f:
        events_data = json.load(f)
    
    races = events_data["MRData"]["RaceTable"]["Races"]
    return races

def get_race_folder_name(race_name):
    """Convert race name to folder name format."""
    return race_name.lower().replace(" ", "-")
Race folder names are created by converting the race name to lowercase and replacing spaces with hyphens.

Common Query Examples

Find All Races in 2024

import json
from pathlib import Path

def get_2024_races():
    """Get all races from the 2024 season."""
    events_file = Path("2024/events.json")
    
    with open(events_file, "r") as f:
        data = json.load(f)
    
    races = data["MRData"]["RaceTable"]["Races"]
    
    for race in races:
        print(f"Round {race['round']}: {race['raceName']}")
        print(f"  Date: {race['date']}")
        print(f"  Circuit: {race['Circuit']['circuitName']}")
        print()

get_2024_races()

Get Fastest Lap Times for a Race

import json
from pathlib import Path

def get_fastest_laps(year, race_folder):
    """Get fastest lap times from a race."""
    laptimes_file = Path(f"{year}/{race_folder}/laptimes.json")
    
    with open(laptimes_file, "r") as f:
        data = json.load(f)
    
    races = data["MRData"]["RaceTable"]["Races"]
    if not races:
        return
    
    race = races[0]
    print(f"Race: {race['raceName']}")
    print(f"\nFastest laps per driver:")
    
    # Process all laps to find fastest per driver
    driver_fastest = {}
    
    for lap in race.get("Laps", []):
        for timing in lap.get("Timings", []):
            driver_id = timing["driverId"]
            lap_time = timing["time"]
            
            if driver_id not in driver_fastest:
                driver_fastest[driver_id] = lap_time
            elif lap_time < driver_fastest[driver_id]:
                driver_fastest[driver_id] = lap_time
    
    for driver_id, time in sorted(driver_fastest.items()):
        print(f"  {driver_id}: {time}")

# Example usage
get_fastest_laps(2024, "bahrain-grand-prix")

Query Race Results

import json
from pathlib import Path

def get_race_podium(year, race_folder):
    """Get the top 3 finishers from a race."""
    results_file = Path(f"{year}/{race_folder}/results.json")
    
    with open(results_file, "r") as f:
        data = json.load(f)
    
    races = data["MRData"]["RaceTable"]["Races"]
    if not races:
        return
    
    results = races[0].get("Results", [])
    
    print(f"Podium for {races[0]['raceName']}:\n")
    for result in results[:3]:
        position = result["position"]
        driver = result["Driver"]
        constructor = result["Constructor"]
        
        print(f"{position}. {driver['givenName']} {driver['familyName']}")
        print(f"   Team: {constructor['name']}")
        print()

# Example usage
get_race_podium(2024, "bahrain-grand-prix")

Analyze Driver Points Progression

import json
from pathlib import Path

def get_driver_points_after_race(year, race_folder):
    """Get driver championship standings after a specific race."""
    points_file = Path(f"{year}/{race_folder}/driverPoints.json")
    
    with open(points_file, "r") as f:
        data = json.load(f)
    
    standings_list = data["MRData"]["StandingsTable"]["StandingsLists"]
    if not standings_list:
        return
    
    standings = standings_list[0]
    print(f"Driver Standings after Round {standings['round']}:\n")
    
    for standing in standings["DriverStandings"]:
        position = standing["position"]
        driver = standing["Driver"]
        points = standing["points"]
        wins = standing["wins"]
        
        print(f"{position}. {driver['givenName']} {driver['familyName']}")
        print(f"   Points: {points} | Wins: {wins}")
        print()

# Example usage
get_driver_points_after_race(2024, "bahrain-grand-prix")

Working with Multiple Races

from pathlib import Path
import json

def analyze_season(year):
    """Analyze all races in a season."""
    season_dir = Path(str(year))
    
    # Get all race folders
    race_folders = [d for d in season_dir.iterdir() 
                    if d.is_dir()]
    
    print(f"Season {year} Analysis")
    print(f"Total races: {len(race_folders)}\n")
    
    for race_folder in sorted(race_folders):
        results_file = race_folder / "results.json"
        
        if results_file.exists():
            with open(results_file, "r") as f:
                data = json.load(f)
            
            races = data["MRData"]["RaceTable"]["Races"]
            if races and "Results" in races[0]:
                race = races[0]
                winner = race["Results"][0]
                driver = winner["Driver"]
                
                print(f"{race['raceName']}")
                print(f"  Winner: {driver['givenName']} {driver['familyName']}")
                print(f"  Date: {race['date']}")
                print()

# Example usage
analyze_season(2024)
Not all races have complete data available. Always check if files exist and handle missing data gracefully.

Data Format Notes

  • All timestamps are in ISO 8601 format
  • Times are typically in UTC
  • Lap times are formatted as strings (e.g., “1:32.123”)
  • The data follows the Ergast API JSON schema
  • Some historical races may have incomplete data

Build docs developers (and LLMs) love