The events.py script is the foundation of the data collection pipeline. It fetches the race calendar for each season and creates the directory structure for storing race data.
The main function iterates through seasons and processes each race:
def main(): base_dir = "." start_year = 2026 end_year = 2026 for year in range(start_year, end_year + 1): print(f"Processing year {year}...") # Create year directory year_dir = os.path.join(base_dir, str(year)) create_directory(year_dir) # Fetch races for the year url = f"https://api.jolpi.ca/ergast/f1/{year}/races/" data = fetch_with_rate_limit(url) # Save year data to a JSON file in the year folder with open(os.path.join(year_dir, f"events.json"), "w") as f: json.dump(data, f, indent=2) # Process each race if ( "MRData" in data and "RaceTable" in data["MRData"] and "Races" in data["MRData"]["RaceTable"] ): races = data["MRData"]["RaceTable"]["Races"] for race in races: race_name = race["raceName"] race_slug = slugify(race_name) # Create race directory race_dir = os.path.join(year_dir, f"{race_slug}") create_directory(race_dir) # Save race data to a JSON file with open(os.path.join(race_dir, "event_info.json"), "w") as f: json.dump(race, f, indent=2) print(f"Processed: {year} - {race_name}")