Discover tasks from the dataset and query completed runs from logs.
discover_tasks()
Query available benchmark tasks from the dataset/ directory.
Function signature
from cooperbench import discover_tasks
tasks = discover_tasks(
subset: str | None = None,
repo_filter: str | None = None,
task_filter: int | None = None,
features_filter: list[int] | None = None,
) -> list[dict]
Parameters
Use a predefined task subset (e.g., "lite"). Subsets are defined in dataset/subsets/.
Filter by repository name (e.g., "llama_index_task").
Filter by specific task ID.
features_filter
list[int] | None
default:"None"
Filter to a specific feature pair (e.g., [1, 2]).
Returns
List of task dictionaries with the following structure:
Feature pair (e.g., [1, 2])
Usage examples
List all tasks
from cooperbench import discover_tasks
tasks = discover_tasks()
print(f"Found {len(tasks)} tasks")
for task in tasks[:5]:
print(f"{task['repo']}/task{task['task_id']}: features {task['features']}")
Found 450 tasks
llama_index_task/task1: features [1, 2]
llama_index_task/task1: features [1, 3]
llama_index_task/task1: features [2, 3]
llama_index_task/task2: features [1, 2]
django_task/task1: features [1, 2]
Filter by subset
# Get tasks from the lite subset
lite_tasks = discover_tasks(subset="lite")
print(f"Lite subset has {len(lite_tasks)} tasks")
Filter by repository
# Get all tasks from a specific repository
llama_tasks = discover_tasks(repo_filter="llama_index_task")
for task in llama_tasks:
print(f"Task {task['task_id']}: features {task['features']}")
Filter by task ID
# Get all feature pairs for a specific task
task1_pairs = discover_tasks(
repo_filter="llama_index_task",
task_filter=1,
)
print(f"Task 1 has {len(task1_pairs)} feature pairs:")
for pair in task1_pairs:
print(f" {pair['features']}")
Filter by feature pair
# Find all tasks with features [1, 2]
tasks_with_f1_f2 = discover_tasks(features_filter=[1, 2])
for task in tasks_with_f1_f2:
print(f"{task['repo']}/task{task['task_id']}")
Combine filters
# Get a very specific task
specific = discover_tasks(
repo_filter="llama_index_task",
task_filter=1,
features_filter=[1, 2],
)
if specific:
task = specific[0]
print(f"Found: {task['repo']}/task{task['task_id']} with features {task['features']}")
discover_runs()
Query completed runs from the logs/ directory.
Function signature
from cooperbench import discover_runs
runs = discover_runs(
run_name: str,
subset: str | None = None,
repo_filter: str | None = None,
task_filter: int | None = None,
features_filter: list[int] | None = None,
) -> list[dict]
Parameters
Name of the run to query (corresponds to the run name used in run()).
Filter to a specific subset (e.g., "lite").
Filter by repository name.
features_filter
list[int] | None
default:"None"
Filter to a specific feature pair.
Returns
List of run dictionaries with the following structure:
Feature pair (e.g., [1, 2])
Path to the run’s log directory
Execution mode ("coop" or "solo")
Usage examples
List all completed runs
from cooperbench import discover_runs
runs = discover_runs(run_name="my_experiment")
print(f"Found {len(runs)} completed runs")
for run in runs[:5]:
print(f"{run['repo']}/task{run['task_id']}: features {run['features']} ({run['setting']} mode)")
Check which tasks are complete
from cooperbench import discover_tasks, discover_runs
# Compare available vs completed
all_tasks = discover_tasks(subset="lite")
completed = discover_runs(run_name="my_experiment", subset="lite")
print(f"Progress: {len(completed)}/{len(all_tasks)} tasks completed")
Find runs to evaluate
from pathlib import Path
from cooperbench import discover_runs
# Find runs that haven't been evaluated yet
runs = discover_runs(run_name="my_experiment")
unevaluated = [
run for run in runs
if not (Path(run["log_dir"]) / "eval.json").exists()
]
print(f"{len(unevaluated)} runs need evaluation")
Filter by repository and setting
# Find all cooperative runs from a specific repo
coop_runs = discover_runs(
run_name="my_experiment",
repo_filter="llama_index_task",
)
coop_runs = [r for r in coop_runs if r["setting"] == "coop"]
print(f"Found {len(coop_runs)} cooperative runs")
Access run results
import json
from pathlib import Path
from cooperbench import discover_runs
runs = discover_runs(run_name="my_experiment")
for run in runs:
result_path = Path(run["log_dir"]) / "result.json"
if result_path.exists():
with open(result_path) as f:
result = json.load(f)
print(f"{run['repo']}/task{run['task_id']}: ${result.get('total_cost', 0):.2f}")
Working with subsets
Subsets are predefined collections of tasks stored in dataset/subsets/. They’re useful for quick testing and benchmarking.
{
"name": "lite",
"description": "Lightweight subset for quick testing",
"tasks": [
{
"repo": "llama_index_task",
"task_id": 1,
"pairs": [[1, 2], [1, 3]]
},
{
"repo": "django_task",
"task_id": 5
}
]
}
- If
pairs is specified, only those feature pairs are included
- If
pairs is omitted, all pairwise feature combinations are used
Load subset programmatically
from cooperbench.runner.tasks import load_subset
subset_data = load_subset("lite")
print(f"Tasks: {subset_data['tasks']}")
print(f"Specific pairs: {subset_data['pairs']}")