Skip to main content

Overview

The optimizers module provides factory functions for creating PyTorch optimizers, learning rate schedulers, and loss functions based on configuration dictionaries. It also includes a custom Focal Loss implementation for handling class imbalance.

Classes

FocalLoss

Focal Loss implementation for imbalanced multi-class classification. Reduces loss contribution from easy examples and focuses on hard examples.
class FocalLoss(nn.Module):
    def __init__(
        self,
        alpha: torch.Tensor | None = None,
        gamma: float = 2.0,
        reduction: str = "mean"
    )
alpha
torch.Tensor
default:"None"
Class weights tensor of shape (num_classes,). Used to handle class imbalance.
gamma
float
default:"2.0"
Focusing parameter. Higher values down-weight easy examples more. Typical range: [0, 5]
reduction
str
default:"mean"
Specifies the reduction to apply: “none”, “mean”, or “sum”
Formula:
FL(pt) = -alpha * (1 - pt)^gamma * log(pt)

where pt = exp(-CE_loss)
When to use:
  • Highly imbalanced datasets
  • When many examples are easy to classify
  • When you want to focus training on hard examples

Example

import torch
import torch.nn as nn
from training.optimizers import FocalLoss
from training.dataset import compute_class_weights

# Compute class weights
class_weights = compute_class_weights(train_labels, num_classes=9)

# Create focal loss
criterion = FocalLoss(
    alpha=class_weights,
    gamma=2.0,
    reduction="mean"
)

# Use in training
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()

Functions

create_optimizer

Creates a PyTorch optimizer from training configuration.
def create_optimizer(model: nn.Module, config: dict) -> torch.optim.Optimizer
model
nn.Module
required
PyTorch model whose parameters will be optimized
config
dict
required
Training configuration dictionary with keys:
  • optimizer: Optimizer name (“Adam”, “AdamW”, “SGD with Momentum”, “RMSprop”)
  • learning_rate: Learning rate (default: 0.001)
  • l2_decay: Whether to enable L2 regularization (default: False)
  • l2_lambda: L2 regularization coefficient (default: 0.0001)
Returns: Configured optimizer instance Supported Optimizers:
OptimizerDescriptionBest For
AdamAdaptive learning rate with momentumGeneral purpose, default choice
AdamWAdam with decoupled weight decayBetter generalization than Adam
SGD with MomentumClassic SGD with momentum=0.9Large batch training, simple models
RMSpropAdaptive learning rateRNNs, online learning

Example

from training.optimizers import create_optimizer

# Basic Adam optimizer
config = {
    "optimizer": "Adam",
    "learning_rate": 0.001,
    "l2_decay": False
}
optimizer = create_optimizer(model, config)

# AdamW with weight decay
config = {
    "optimizer": "AdamW",
    "learning_rate": 0.0001,
    "l2_decay": True,
    "l2_lambda": 0.01
}
optimizer = create_optimizer(model, config)

# SGD with momentum and L2 regularization
config = {
    "optimizer": "SGD with Momentum",
    "learning_rate": 0.01,
    "l2_decay": True,
    "l2_lambda": 0.0001
}
optimizer = create_optimizer(model, config)

create_scheduler

Creates a learning rate scheduler from training configuration.
def create_scheduler(
    optimizer: torch.optim.Optimizer,
    config: dict,
    steps_per_epoch: int,
) -> torch.optim.lr_scheduler.LRScheduler | None
optimizer
torch.optim.Optimizer
required
Optimizer instance to schedule
config
dict
required
Training configuration dictionary with keys:
  • lr_strategy: Scheduler name (“Constant”, “ReduceLROnPlateau”, “Cosine Annealing”, “Step Decay”, “Exponential Decay”)
  • epochs: Total number of training epochs
steps_per_epoch
int
required
Number of training steps per epoch (length of train DataLoader)
Returns: Configured scheduler instance or None if using constant learning rate Supported Schedulers:
SchedulerDescriptionParameters
ConstantNo schedulingNone
ReduceLROnPlateauReduce LR when metric plateausfactor=0.5, patience=5, min_lr=1e-6
Cosine AnnealingCosine decay to minimumT_max=epochs, eta_min=1e-6
Step DecayReduce LR at fixed intervalsstep_size=epochs/3, gamma=0.1
Exponential DecayExponential decaygamma=0.95

Example

from training.optimizers import create_optimizer, create_scheduler

optimizer = create_optimizer(model, training_config)

# Constant learning rate
config = {"lr_strategy": "Constant"}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Returns None

# ReduceLROnPlateau - reduces LR when validation loss plateaus
config = {
    "lr_strategy": "ReduceLROnPlateau",
    "epochs": 100
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Call with: scheduler.step(val_loss)

# Cosine Annealing - smooth decay
config = {
    "lr_strategy": "Cosine Annealing",
    "epochs": 100
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Call with: scheduler.step()

# Step Decay - drops LR at fixed intervals
config = {
    "lr_strategy": "Step Decay",
    "epochs": 90
}
scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
# Reduces LR by 0.1x every 30 epochs

create_criterion

Creates a loss function from training configuration.
def create_criterion(
    config: dict,
    class_weights: torch.Tensor | None = None,
    device: torch.device | None = None,
) -> nn.Module
config
dict
required
Training configuration dictionary with keys:
  • class_weights: Loss type (“None”, “Auto Class Weights”, “Focal Loss”)
class_weights
torch.Tensor
default:"None"
Optional class weights tensor from compute_class_weights()
device
torch.device
default:"None"
Device to move class_weights to
Returns: Loss function (nn.CrossEntropyLoss or FocalLoss) Loss Functions:
TypeLoss FunctionUse Case
NoneCrossEntropyLoss()Balanced datasets
Auto Class WeightsCrossEntropyLoss(weight=class_weights)Imbalanced datasets
Focal LossFocalLoss(alpha=class_weights)Highly imbalanced, hard examples

Example

from training.optimizers import create_criterion
from training.dataset import compute_class_weights

# Standard cross entropy
config = {"class_weights": "None"}
criterion = create_criterion(config)

# Weighted cross entropy for imbalanced data
class_weights = compute_class_weights(train_labels, num_classes=9)
config = {"class_weights": "Auto Class Weights"}
criterion = create_criterion(config, class_weights, device)

# Focal loss for highly imbalanced data
class_weights = compute_class_weights(train_labels, num_classes=9)
config = {"class_weights": "Focal Loss"}
criterion = create_criterion(config, class_weights, device)

Complete Training Setup

import torch
import torch.nn as nn
from training.dataset import create_dataloaders
from training.optimizers import create_optimizer, create_scheduler, create_criterion
from training.engine import TrainingEngine

# Configuration
training_config = {
    "optimizer": "AdamW",
    "learning_rate": 0.0001,
    "l2_decay": True,
    "l2_lambda": 0.01,
    "lr_strategy": "Cosine Annealing",
    "epochs": 100,
    "batch_size": 32,
    "class_weights": "Focal Loss"
}

dataset_config = {
    "dataset_path": "dataset",
    "preprocessing": {"target_size": (224, 224)},
    "augmentation": {"preset": "Moderate"}
}

model_config = {
    "architecture": "ResNet50",
    "num_classes": 9,
    "pretrained": True
}

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = build_model(model_config).to(device)

# Create dataloaders
dataloaders, class_names, class_weights = create_dataloaders(
    dataset_config,
    training_config
)

# Create optimizer, scheduler, and loss
optimizer = create_optimizer(model, training_config)

scheduler = create_scheduler(
    optimizer,
    training_config,
    steps_per_epoch=len(dataloaders["train"])
)

criterion = create_criterion(
    training_config,
    class_weights,
    device
)

# Create training engine
engine = TrainingEngine(
    model=model,
    train_loader=dataloaders["train"],
    val_loader=dataloaders["val"],
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    scheduler=scheduler,
    early_stopping_patience=10
)

# Train
results = engine.fit(epochs=training_config["epochs"])
print(f"Training completed in {results['duration']}")
print(f"Best validation loss: {results['best_val_loss']:.4f}")

Learning Rate Schedule Visualization

import torch
import matplotlib.pyplot as plt
from training.optimizers import create_optimizer, create_scheduler

model = nn.Linear(10, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Test different schedulers
schedulers = [
    ("Cosine Annealing", "Cosine Annealing"),
    ("Step Decay", "Step Decay"),
    ("Exponential Decay", "Exponential Decay")
]

plt.figure(figsize=(12, 4))

for idx, (name, strategy) in enumerate(schedulers):
    config = {"lr_strategy": strategy, "epochs": 100}
    scheduler = create_scheduler(optimizer, config, steps_per_epoch=100)
    
    lrs = []
    for epoch in range(100):
        lrs.append(optimizer.param_groups[0]["lr"])
        if scheduler:
            scheduler.step()
    
    plt.subplot(1, 3, idx + 1)
    plt.plot(lrs)
    plt.title(name)
    plt.xlabel("Epoch")
    plt.ylabel("Learning Rate")
    plt.grid(True)
    
    # Reset optimizer
    for param_group in optimizer.param_groups:
        param_group["lr"] = 0.001

plt.tight_layout()
plt.savefig("lr_schedules.png")

Build docs developers (and LLMs) love