Skip to main content

Overview

GEPAResult is an immutable snapshot of optimization results returned by optimize_anything() and optimize(). It contains the best candidate, all explored candidates, validation scores, Pareto frontier information, and optimization metadata.

Class Definition

@dataclass(frozen=True)
class GEPAResult(Generic[RolloutOutput, DataId]):
    """Immutable snapshot returned by optimization functions."""

Key Properties

best_candidate

@property
def best_candidate(self) -> str | dict[str, str]
The optimized parameter(s). Returns a plain str when seed_candidate was a string, otherwise returns the full dict[str, str] parameter mapping.
result = optimize_anything(
    seed_candidate="Initial prompt",
    evaluator=evaluate,
    # ...
)
print(result.best_candidate)  # str

result = optimize_anything(
    seed_candidate={"system": "You are helpful", "user": "Answer:"},
    evaluator=evaluate,
    # ...
)
print(result.best_candidate)  # dict[str, str]

best_idx

@property
def best_idx(self) -> int
Index of the highest-scoring candidate in the candidates list.

num_candidates

@property
def num_candidates(self) -> int
Total number of candidates explored during optimization.

num_val_instances

@property
def num_val_instances(self) -> int
Number of validation instances.

best_refiner_prompt

@property
def best_refiner_prompt(self) -> str | None
The refiner prompt from the best candidate, or None if the refiner was not enabled.

Core Data Fields

candidates
list[dict[str, str]]
All candidates explored during optimization. Each candidate is a dict mapping component names to text values.
parents
list[list[ProgramIdx | None]]
Lineage information. parents[i] is a list of parent indices for candidate i. Enables tracing the evolutionary history.
val_aggregate_scores
list[float]
Per-candidate average validation score (higher is better). val_aggregate_scores[i] is the aggregate score for candidates[i].
val_subscores
list[dict[DataId, float]]
Per-candidate, per-validation-instance scores. val_subscores[i][data_id] is the score for candidate i on validation instance data_id.
per_val_instance_best_candidates
dict[DataId, set[ProgramIdx]]
Pareto frontier. For each validation example, the set of candidate indices achieving the best score on that example.
discovery_eval_counts
list[int]
Number of evaluations when each candidate was discovered. discovery_eval_counts[i] is the evaluation count when candidates[i] was first explored.

Optional Fields

val_aggregate_subscores
list[dict[str, float]] | None
default:"None"
Per-candidate, per-objective aggregate scores. Only populated when using multi-objective optimization with named objectives in side_info.
per_objective_best_candidates
dict[str, set[ProgramIdx]] | None
default:"None"
Per-objective Pareto frontier. Maps objective name to the set of candidate indices that are best for that objective.
objective_pareto_front
dict[str, float] | None
default:"None"
Best score achieved for each objective metric.
best_outputs_valset
dict[DataId, list[tuple[ProgramIdx, RolloutOutput]]] | None
default:"None"
Best outputs for each validation instance. Only populated when track_best_outputs=True. Maps validation ID to list of (candidate_idx, output) tuples.

Metadata Fields

total_metric_calls
int | None
default:"None"
Total number of metric/evaluation calls made during optimization.
num_full_val_evals
int | None
default:"None"
Number of full validation evaluations performed.
run_dir
str | None
default:"None"
Directory where optimization state was saved.
seed
int | None
default:"None"
Random seed used for the optimization run.

Serialization

to_dict()

def to_dict(self) -> dict[str, Any]
Convert the result to a JSON-serializable dictionary.
result = optimize_anything(...)
result_dict = result.to_dict()

# Save to JSON
import json
with open("result.json", "w") as f:
    json.dump(result_dict, f)

from_dict()

@staticmethod
def from_dict(d: dict[str, Any]) -> GEPAResult[RolloutOutput, DataId]
Restore a GEPAResult from a dictionary (supports version migration).
import json
from gepa.core.result import GEPAResult

with open("result.json", "r") as f:
    result_dict = json.load(f)

result = GEPAResult.from_dict(result_dict)
print(result.best_candidate)

from_state()

@staticmethod
def from_state(
    state: GEPAState[RolloutOutput, DataId],
    run_dir: str | None = None,
    seed: int | None = None,
    str_candidate_key: str | None = None,
) -> GEPAResult[RolloutOutput, DataId]
Build a GEPAResult from a GEPAState (internal use).

Example Usage

Basic Access

import gepa.optimize_anything as oa

result = oa.optimize_anything(
    seed_candidate="Initial prompt",
    evaluator=evaluate,
    dataset=train_data,
    objective="Improve accuracy",
    config=oa.GEPAConfig(
        engine=oa.EngineConfig(max_metric_calls=100)
    ),
)

# Access best candidate
print(f"Best candidate: {result.best_candidate}")
print(f"Best score: {result.val_aggregate_scores[result.best_idx]}")
print(f"Total candidates explored: {result.num_candidates}")
print(f"Total evaluations: {result.total_metric_calls}")

Analyzing Pareto Frontier

# Get candidates on the Pareto frontier for a specific validation instance
val_id = list(result.per_val_instance_best_candidates.keys())[0]
pareto_candidates = result.per_val_instance_best_candidates[val_id]

print(f"Candidates on Pareto frontier for {val_id}:")
for idx in pareto_candidates:
    candidate = result.candidates[idx]
    score = result.val_subscores[idx][val_id]
    print(f"  Candidate {idx}: score={score}")
    print(f"    {candidate}")

Tracing Candidate Evolution

# Trace the lineage of the best candidate
def trace_lineage(result, idx):
    lineage = [idx]
    while result.parents[idx] and result.parents[idx][0] is not None:
        idx = result.parents[idx][0]
        lineage.append(idx)
    return list(reversed(lineage))

lineage = trace_lineage(result, result.best_idx)
print(f"Best candidate evolution: {lineage}")

for i, idx in enumerate(lineage):
    print(f"\nGeneration {i} (candidate {idx}):")
    print(f"  Score: {result.val_aggregate_scores[idx]}")
    print(f"  Discovered at eval: {result.discovery_eval_counts[idx]}")
    print(f"  Candidate: {result.candidates[idx]}")

Multi-Objective Analysis

# When using multi-objective optimization
if result.val_aggregate_subscores is not None:
    best_idx = result.best_idx
    objective_scores = result.val_aggregate_subscores[best_idx]
    
    print("Best candidate objective scores:")
    for objective, score in objective_scores.items():
        print(f"  {objective}: {score}")

if result.per_objective_best_candidates is not None:
    print("\nBest candidates per objective:")
    for objective, candidate_indices in result.per_objective_best_candidates.items():
        print(f"  {objective}: candidates {candidate_indices}")

Saving and Loading Results

import json
from gepa.core.result import GEPAResult

# Save result
result = oa.optimize_anything(...)
with open("optimization_result.json", "w") as f:
    json.dump(result.to_dict(), f, indent=2)

# Load result later
with open("optimization_result.json", "r") as f:
    result_dict = json.load(f)

loaded_result = GEPAResult.from_dict(result_dict)
print(loaded_result.best_candidate)

See Also

Build docs developers (and LLMs) love