Data Visualization Guide

Visualize Formula 1 data with beautiful, interactive charts. This guide covers everything from simple plots to complex dashboards.

Setup

Install required visualization libraries:

pip install matplotlib seaborn plotly pandas numpy

Basic Plotting with Matplotlib

Driver Championship Progression

Track how championship standings evolve throughout a season:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Load data
driver_standings = pd.read_csv('data/driver_standings.csv')
races = pd.read_csv('data/races.csv')
drivers = pd.read_csv('data/drivers.csv')

# Get 2024 championship battle
year = 2024
races_2024 = races[races['year'] == year]
standings_2024 = driver_standings[driver_standings['raceId'].isin(races_2024['raceId'])]

# Merge with race and driver info
championship_data = (
    standings_2024
    .merge(races_2024[['raceId', 'round', 'name']], on='raceId')
    .merge(drivers[['driverId', 'forename', 'surname', 'code']], on='driverId')
    .sort_values(['round', 'position'])
)

# Plot top 5 drivers
top_drivers = (
    championship_data
    .groupby('driverId')['points']
    .max()
    .nlargest(5)
    .index
)

plt.figure(figsize=(14, 7))

for driver_id in top_drivers:
    driver_data = championship_data[championship_data['driverId'] == driver_id]
    driver_name = f"{driver_data.iloc[0]['code']}"
    plt.plot(driver_data['round'], driver_data['points'], 
             marker='o', linewidth=2, label=driver_name)

plt.xlabel('Race Round', fontsize=12)
plt.ylabel('Championship Points', fontsize=12)
plt.title(f'{year} F1 World Championship Progression', fontsize=14, fontweight='bold')
plt.legend(title='Driver', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('championship_progression.png', dpi=300)
plt.show()

Constructor Performance Heatmap

Visualize constructor performance across seasons:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load data
constructor_standings = pd.read_csv('data/constructor_standings.csv')
races = pd.read_csv('data/races.csv')
constructors = pd.read_csv('data/constructors.csv')

# Get final standings per season (last race)
final_standings = (
    constructor_standings
    .merge(races[['raceId', 'year', 'round']], on='raceId')
    .merge(constructors[['constructorId', 'name']], on='constructorId')
    .sort_values(['year', 'round'])
    .groupby(['year', 'constructorId', 'name'])
    .last()
    .reset_index()
)

# Filter to recent years and top teams
recent_years = final_standings[final_standings['year'] >= 2014]
top_teams = recent_years.groupby('name')['wins'].sum().nlargest(6).index

# Create pivot table
heatmap_data = (
    recent_years[recent_years['name'].isin(top_teams)]
    .pivot(index='name', columns='year', values='position')
)

# Plot heatmap
plt.figure(figsize=(14, 8))
sns.heatmap(
    heatmap_data, 
    annot=True, 
    fmt='g', 
    cmap='RdYlGn_r',
    cbar_kws={'label': 'Championship Position'},
    linewidths=0.5
)

plt.title('Constructor Championship Positions (2014-2024)', 
          fontsize=14, fontweight='bold', pad=20)
plt.xlabel('Season', fontsize=12)
plt.ylabel('Constructor', fontsize=12)
plt.tight_layout()
plt.savefig('constructor_heatmap.png', dpi=300)
plt.show()

Interactive Plots with Plotly

Lap Time Evolution

Create an interactive lap time analysis:

import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Load data
lap_times = pd.read_csv('data/lap_times.csv')
races = pd.read_csv('data/races.csv')
drivers = pd.read_csv('data/drivers.csv')

# Get a specific race (e.g., Monaco 2024)
monaco_2024 = races[
    (races['year'] == 2024) & 
    (races['name'].str.contains('Monaco'))
].iloc[0]['raceId']

# Get lap times for this race
race_laps = lap_times[lap_times['raceId'] == monaco_2024]

# Merge with driver info
race_laps = race_laps.merge(
    drivers[['driverId', 'code', 'forename', 'surname']], 
    on='driverId'
)

# Convert milliseconds to seconds
race_laps['seconds'] = race_laps['milliseconds'] / 1000

# Get top 5 finishers
top_5_drivers = race_laps.groupby('driverId')['position'].last().nsmallest(5).index
plot_data = race_laps[race_laps['driverId'].isin(top_5_drivers)]

# Create interactive plot
fig = go.Figure()

for driver_id in top_5_drivers:
    driver_laps = plot_data[plot_data['driverId'] == driver_id]
    driver_name = f"{driver_laps.iloc[0]['code']}"
    
    fig.add_trace(go.Scatter(
        x=driver_laps['lap'],
        y=driver_laps['seconds'],
        mode='lines+markers',
        name=driver_name,
        hovertemplate='<b>%{text}</b><br>' +
                      'Lap: %{x}<br>' +
                      'Time: %{y:.3f}s<br>' +
                      '<extra></extra>',
        text=[driver_name] * len(driver_laps)
    ))

fig.update_layout(
    title='Monaco GP 2024 - Lap Time Evolution (Top 5)',
    xaxis_title='Lap Number',
    yaxis_title='Lap Time (seconds)',
    hovermode='closest',
    height=600,
    template='plotly_dark'
)

fig.write_html('lap_times_monaco.html')
fig.show()

Pit Stop Strategy Comparison

Visualize different pit stop strategies:

import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Load data
pit_stops = pd.read_csv('data/pit_stops.csv')
results = pd.read_csv('data/results.csv')
drivers = pd.read_csv('data/drivers.csv')
races = pd.read_csv('data/races.csv')

# Get a specific race
race_id = 1100  # Example race
race_info = races[races['raceId'] == race_id].iloc[0]

# Get pit stops for this race
race_pit_stops = pit_stops[pit_stops['raceId'] == race_id]

# Merge with driver info and results
pit_data = (
    race_pit_stops
    .merge(drivers[['driverId', 'code']], on='driverId')
    .merge(results[['raceId', 'driverId', 'position']], 
           on=['raceId', 'driverId'])
    .sort_values('position')
)

# Get top 10 finishers
top_10 = pit_data[pit_data['position'] <= 10]

# Create strategy visualization
fig = go.Figure()

for idx, driver_id in enumerate(top_10['driverId'].unique()):
    driver_stops = top_10[top_10['driverId'] == driver_id]
    driver_code = driver_stops.iloc[0]['code']
    final_pos = driver_stops.iloc[0]['position']
    
    fig.add_trace(go.Scatter(
        x=driver_stops['lap'],
        y=[idx] * len(driver_stops),
        mode='markers',
        marker=dict(size=15, symbol='diamond'),
        name=f"P{final_pos} - {driver_code}",
        hovertemplate='<b>%{text}</b><br>' +
                      'Pit Stop Lap: %{x}<br>' +
                      'Duration: %{customdata:.2f}s<br>' +
                      '<extra></extra>',
        text=[driver_code] * len(driver_stops),
        customdata=driver_stops['duration']
    ))

fig.update_layout(
    title=f'{race_info["name"]} {race_info["year"]} - Pit Stop Strategies',
    xaxis_title='Lap Number',
    yaxis_title='Driver (by finishing position)',
    yaxis=dict(
        tickmode='array',
        tickvals=list(range(len(top_10['driverId'].unique()))),
        ticktext=[f"P{p}" for p in range(1, len(top_10['driverId'].unique()) + 1)]
    ),
    height=600,
    showlegend=True,
    hovermode='closest'
)

fig.write_html('pit_stop_strategy.html')
fig.show()

3D Constructor Performance Visualization

Create a 3D scatter plot of constructor performance:

import pandas as pd
import plotly.express as px

# Load data
results = pd.read_csv('data/results.csv')
races = pd.read_csv('data/races.csv')
constructors = pd.read_csv('data/constructors.csv')

# Calculate constructor stats by year
constructor_stats = (
    results
    .merge(races[['raceId', 'year']], on='raceId')
    .merge(constructors[['constructorId', 'name']], on='constructorId')
    .groupby(['year', 'constructorId', 'name'])
    .agg({
        'points': 'sum',
        'position': lambda x: (x == 1).sum(),
        'raceId': 'count'
    })
    .rename(columns={'position': 'wins', 'raceId': 'races'})
    .reset_index()
)

# Filter to recent years and successful teams
recent_stats = constructor_stats[
    (constructor_stats['year'] >= 2014) &
    (constructor_stats['points'] > 0)
]

# Calculate win rate
recent_stats['win_rate'] = (recent_stats['wins'] / recent_stats['races'] * 100).round(2)

# Create 3D scatter
fig = px.scatter_3d(
    recent_stats,
    x='year',
    y='points',
    z='win_rate',
    color='name',
    size='wins',
    hover_data=['wins', 'races'],
    title='Constructor Performance Evolution (2014-2024)',
    labels={
        'year': 'Season',
        'points': 'Total Points',
        'win_rate': 'Win Rate (%)',
        'name': 'Constructor'
    },
    height=700
)

fig.update_traces(marker=dict(line=dict(width=1, color='white')))
fig.write_html('constructor_3d.html')
fig.show()

Plotly Tips:

Use fig.write_html() to save interactive plots
Set template='plotly_dark' for dark mode
Add hovermode='closest' for better tooltips
Export to static images with fig.write_image('plot.png') (requires kaleido)

Advanced Visualizations

Race Results Sankey Diagram

Show how grid positions translate to finishing positions:

import pandas as pd
import plotly.graph_objects as go

# Load data
results = pd.read_csv('data/results.csv')
races = pd.read_csv('data/races.csv')

# Get a specific race
race_id = 1100
race_results = results[results['raceId'] == race_id].copy()

# Filter out DNFs and limit to top 10
race_results = race_results[
    (race_results['position'].notna()) &
    (race_results['grid'] <= 10) &
    (race_results['positionOrder'] <= 10)
]

# Create source and target nodes
source = race_results['grid'].astype(int).tolist()
target = [10 + int(x) for x in race_results['positionOrder'].tolist()]
value = [1] * len(race_results)

# Create labels
labels = [f'Grid {i}' for i in range(1, 11)] + [f'P{i}' for i in range(1, 11)]

# Create colors based on position change
colors = []
for s, t in zip(source, target):
    if t - 10 < s:
        colors.append('rgba(0, 255, 0, 0.4)')  # Green for position gain
    elif t - 10 > s:
        colors.append('rgba(255, 0, 0, 0.4)')  # Red for position loss
    else:
        colors.append('rgba(128, 128, 128, 0.4)')  # Gray for no change

# Create Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color='black', width=0.5),
        label=labels,
        color='blue'
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        color=colors
    )
)])

fig.update_layout(
    title='Grid Position to Finish Position Flow',
    font_size=12,
    height=600
)

fig.write_html('race_flow.html')
fig.show()

Circuit Win Distribution Map

Create a world map of circuit locations with win counts:

import pandas as pd
import plotly.express as px

# Load data
circuits = pd.read_csv('data/circuits.csv')
races = pd.read_csv('data/races.csv')
results = pd.read_csv('data/results.csv')

# Count races per circuit
race_counts = (
    races
    .groupby('circuitId')
    .agg({
        'raceId': 'count',
        'year': ['min', 'max']
    })
    .reset_index()
)
race_counts.columns = ['circuitId', 'total_races', 'first_race', 'last_race']

# Merge with circuit data
circuit_data = circuits.merge(race_counts, on='circuitId')

# Create map
fig = px.scatter_geo(
    circuit_data,
    lat='lat',
    lon='lng',
    hover_name='name',
    hover_data={
        'country': True,
        'total_races': True,
        'first_race': True,
        'last_race': True,
        'lat': False,
        'lng': False
    },
    size='total_races',
    color='total_races',
    color_continuous_scale='Reds',
    title='Formula 1 Circuits Worldwide',
    projection='natural earth'
)

fig.update_layout(
    height=600,
    geo=dict(
        showland=True,
        landcolor='rgb(243, 243, 243)',
        coastlinecolor='rgb(204, 204, 204)',
    )
)

fig.write_html('circuit_map.html')
fig.show()

Dashboard with Plotly Dash

Create an interactive dashboard:

import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd

# Load data
drivers = pd.read_csv('data/drivers.csv')
results = pd.read_csv('data/results.csv')
races = pd.read_csv('data/races.csv')

# Merge data
full_data = (
    results
    .merge(races[['raceId', 'year', 'name']], on='raceId')
    .merge(drivers[['driverId', 'forename', 'surname', 'code']], on='driverId')
)

# Initialize app
app = dash.Dash(__name__)

# App layout
app.layout = html.Div([
    html.H1('Formula 1 Dashboard', style={'textAlign': 'center'}),
    
    html.Div([
        html.Label('Select Driver:'),
        dcc.Dropdown(
            id='driver-dropdown',
            options=[
                {'label': f"{row['forename']} {row['surname']}", 'value': row['driverId']}
                for _, row in drivers[drivers['driverId'].isin(full_data['driverId'].unique())].iterrows()
            ],
            value=1,  # Default to Lewis Hamilton
            clearable=False
        )
    ], style={'width': '48%', 'display': 'inline-block'}),
    
    html.Div([
        html.Label('Select Year Range:'),
        dcc.RangeSlider(
            id='year-slider',
            min=full_data['year'].min(),
            max=full_data['year'].max(),
            value=[2014, full_data['year'].max()],
            marks={str(year): str(year) for year in range(2010, 2025, 5)},
            step=1
        )
    ], style={'width': '48%', 'float': 'right', 'display': 'inline-block'}),
    
    html.Div([
        dcc.Graph(id='points-over-time'),
        dcc.Graph(id='position-distribution')
    ])
])

# Callbacks
@app.callback(
    [Output('points-over-time', 'figure'),
     Output('position-distribution', 'figure')],
    [Input('driver-dropdown', 'value'),
     Input('year-slider', 'value')]
)
def update_graphs(driver_id, year_range):
    # Filter data
    filtered = full_data[
        (full_data['driverId'] == driver_id) &
        (full_data['year'] >= year_range[0]) &
        (full_data['year'] <= year_range[1])
    ]
    
    # Points over time
    yearly_points = filtered.groupby('year')['points'].sum().reset_index()
    fig1 = px.line(
        yearly_points,
        x='year',
        y='points',
        title='Points per Season',
        markers=True
    )
    
    # Position distribution
    position_counts = filtered['positionOrder'].value_counts().sort_index()
    fig2 = px.bar(
        x=position_counts.index,
        y=position_counts.values,
        title='Finishing Position Distribution',
        labels={'x': 'Position', 'y': 'Count'}
    )
    
    return fig1, fig2

if __name__ == '__main__':
    app.run_server(debug=True)

Run the dashboard:

python dashboard.py

Then open http://127.0.0.1:8050 in your browser.

Seaborn Statistical Plots

Driver Age vs Performance

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load data
drivers = pd.read_csv('data/drivers.csv')
results = pd.read_csv('data/results.csv')
races = pd.read_csv('data/races.csv')

# Merge and calculate age at race time
full_data = (
    results
    .merge(races[['raceId', 'year', 'date']], on='raceId')
    .merge(drivers[['driverId', 'dob', 'forename', 'surname']], on='driverId')
)

# Convert dates
full_data['race_date'] = pd.to_datetime(full_data['date'])
full_data['birth_date'] = pd.to_datetime(full_data['dob'])
full_data['age'] = (full_data['race_date'] - full_data['birth_date']).dt.days / 365.25

# Filter recent years and wins
wins = full_data[
    (full_data['position'] == 1) &
    (full_data['year'] >= 2000)
].copy()

# Create plot
plt.figure(figsize=(12, 6))
sns.scatterplot(
    data=wins,
    x='age',
    y='points',
    hue='year',
    size='year',
    sizes=(50, 200),
    alpha=0.7,
    palette='viridis'
)

plt.title('Driver Age at Race Win (2000-Present)', fontsize=14, fontweight='bold')
plt.xlabel('Age (years)', fontsize=12)
plt.ylabel('Points Scored', fontsize=12)
plt.legend(title='Season', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig('age_vs_performance.png', dpi=300, bbox_inches='tight')
plt.show()

Exporting Visualizations

High-Resolution Images

import matplotlib.pyplot as plt

# Your plotting code here
plt.figure(figsize=(16, 9))
# ... create plot ...

# Save as high-res PNG
plt.savefig('plot.png', dpi=300, bbox_inches='tight', facecolor='white')

# Save as vector (SVG/PDF) for publications
plt.savefig('plot.svg', format='svg', bbox_inches='tight')
plt.savefig('plot.pdf', format='pdf', bbox_inches='tight')

Interactive HTML

import plotly.graph_objects as go

# Create plotly figure
fig = go.Figure()
# ... add traces ...

# Save as standalone HTML
fig.write_html(
    'interactive_plot.html',
    include_plotlyjs='cdn',  # Use CDN for smaller file size
    config={'displayModeBar': True, 'displaylogo': False}
)

Next Steps

Learn data analysis patterns for preparing visualization data
Set up integrations with BI tools like Tableau
Explore querying techniques for complex visualizations

Get Started

Data Access

Data Schema

Guides

Data Visualization

Data Visualization Guide

Setup

Basic Plotting with Matplotlib

Driver Championship Progression

Constructor Performance Heatmap

Interactive Plots with Plotly

Lap Time Evolution

Pit Stop Strategy Comparison

3D Constructor Performance Visualization

Advanced Visualizations

Race Results Sankey Diagram

Circuit Win Distribution Map

Dashboard with Plotly Dash

Seaborn Statistical Plots

Driver Age vs Performance

Exporting Visualizations

High-Resolution Images

Interactive HTML

Next Steps

Build docs developers (and LLMs) love

Get Started

Data Access

Data Schema

Guides

​Data Visualization Guide

​Setup

​Basic Plotting with Matplotlib

​Driver Championship Progression

​Constructor Performance Heatmap

​Interactive Plots with Plotly

​Lap Time Evolution

​Pit Stop Strategy Comparison

​3D Constructor Performance Visualization

​Advanced Visualizations

​Race Results Sankey Diagram

​Circuit Win Distribution Map

​Dashboard with Plotly Dash

​Seaborn Statistical Plots

​Driver Age vs Performance

​Exporting Visualizations

​High-Resolution Images

​Interactive HTML

​Next Steps

Build docs developers (and LLMs) love

Data Visualization Guide

Setup

Basic Plotting with Matplotlib

Driver Championship Progression

Constructor Performance Heatmap

Interactive Plots with Plotly

Lap Time Evolution

Pit Stop Strategy Comparison

3D Constructor Performance Visualization

Advanced Visualizations

Race Results Sankey Diagram

Circuit Win Distribution Map

Dashboard with Plotly Dash

Seaborn Statistical Plots

Driver Age vs Performance

Exporting Visualizations

High-Resolution Images

Interactive HTML

Next Steps