Skip to main content
QualiVision uses a centralized configuration system defined in src/config/config.py. This page documents all available configuration dictionaries and their parameters.

Model Configurations

Configuration for the DOVER++ model.
from src.config.config import DOVER_CONFIG

# Access configuration
model_name = DOVER_CONFIG["model_name"]
video_res = DOVER_CONFIG["video_resolution"]

Parameters

model_name
string
default:"DOVER++"
Name of the DOVER model variant
video_resolution
tuple[int, int]
default:"(640, 640)"
Input video resolution as (width, height) in pixels
num_frames
int
default:"64"
Number of frames to sample from each video
text_encoder
string
default:"BAAI/bge-large-en-v1.5"
HuggingFace model identifier for the text encoder
dover_dim
int
default:"1024"
Dimensionality of DOVER feature embeddings
text_dim
int
default:"1024"
Dimensionality of text embeddings
hidden_dim
int
default:"512"
Hidden layer dimensionality for fusion layers
pretrained_weights
string
URL to download pretrained DOVER++ weights
batch_size
int
default:"4"
Number of videos per batch during training
learning_rate
float
default:"1e-4"
Initial learning rate for optimization
epochs
int
default:"5"
Total number of training epochs
gradient_accumulation_steps
int
default:"8"
Number of steps to accumulate gradients before updating weights
effective_batch_size
int
default:"32"
Effective batch size (batch_size × gradient_accumulation_steps)

Training Configuration

Global training parameters that apply to all models.
from src.config.config import TRAINING_CONFIG

# Example: Configure training
device = TRAINING_CONFIG["device"]
mixed_precision = TRAINING_CONFIG["mixed_precision"]
device
string
default:"cuda"
Device to use for training (‘cuda’ or ‘cpu’)
mixed_precision
bool
default:"True"
Enable automatic mixed precision (AMP) training for faster computation
gradient_clipping
float
default:"1.0"
Maximum gradient norm for gradient clipping
warmup_steps
int
default:"100"
Number of warmup steps for learning rate scheduler
logging_steps
int
default:"50"
Log training metrics every N steps
save_steps
int
default:"500"
Save model checkpoint every N steps
eval_steps
int
default:"500"
Run evaluation every N steps
max_grad_norm
float
default:"1.0"
Maximum gradient norm for clipping (same as gradient_clipping)
weight_decay
float
default:"1e-2"
Weight decay (L2 regularization) coefficient
adam_epsilon
float
default:"1e-8"
Epsilon parameter for Adam optimizer
adam_betas
tuple[float, float]
default:"(0.9, 0.999)"
Beta parameters for Adam optimizer (beta1, beta2)
scheduler
string
default:"cosine"
Learning rate scheduler type (‘cosine’, ‘linear’, etc.)
num_workers
int
default:"4"
Number of worker processes for data loading
pin_memory
bool
default:"True"
Pin memory in DataLoader for faster CPU-to-GPU transfer
persistent_workers
bool
default:"True"
Keep worker processes alive between epochs

Loss Configuration

Configuration for the custom loss function combining smooth L1, ranking, and scale-aware components.
from src.config.config import LOSS_CONFIG

# Example: Configure loss weights
loss_weights = LOSS_CONFIG["loss_weights"]
alpha = loss_weights["alpha"]  # 0.7
beta = loss_weights["beta"]    # 0.3
gamma = loss_weights["gamma"]  # 0.1
smooth_l1_beta
float
default:"0.1"
Beta parameter for smooth L1 loss (transition point between L1 and L2)
ranking_margin
float
default:"0.2"
Margin for pairwise ranking loss
scale_weights
dict
Weights for different quality ranges:
  • low_quality: 1.5 (for MOS < 2.5)
  • high_quality: 1.5 (for MOS > 4.0)
  • normal: 1.0 (for 2.5 ≤ MOS ≤ 4.0)
loss_weights
dict
default:"{\"alpha\": 0.7, \"beta\": 0.3, \"gamma\": 0.1}"
Weights for combining different loss components:
  • alpha: 0.7 (smooth L1 loss weight)
  • beta: 0.3 (ranking loss weight)
  • gamma: 0.1 (scale-aware weight)
adaptive_weighting
bool
default:"True"
Enable adaptive loss weighting during training
adaptation_rate
float
default:"0.1"
Rate at which loss weights adapt (only used if adaptive_weighting is True)

Dataset Configuration

Configuration for dataset loading and preprocessing.
from src.config.config import DATASET_CONFIG

# Example: Access dataset columns
mos_columns = DATASET_CONFIG["mos_columns"]
text_column = DATASET_CONFIG["text_column"]
video_column = DATASET_CONFIG["video_column"]
mos_columns
list[str]
List of MOS (Mean Opinion Score) column names in the dataset
text_column
string
default:"Prompt"
Name of the column containing text prompts
video_column
string
default:"video_name"
Name of the column containing video file names
train_split
float
default:"0.8"
Proportion of data to use for training (0.0 to 1.0)
val_split
float
default:"0.2"
Proportion of data to use for validation (0.0 to 1.0)
seed
int
default:"42"
Random seed for reproducible train/val splits
max_text_length
int
default:"512"
Maximum length of text prompts in tokens
video_extensions
list[str]
default:"[\".mp4\", \".avi\", \".mov\", \".mkv\"]"
Supported video file extensions

Evaluation Configuration

Configuration for model evaluation and inference.
from src.config.config import EVAL_CONFIG

# Example: Run evaluation
metrics = EVAL_CONFIG["metrics"]
batch_size = EVAL_CONFIG["batch_size"]
metrics
list[str]
default:"[\"spearman\", \"pearson\"]"
Correlation metrics to compute during evaluation
batch_size
int
default:"1"
Batch size for evaluation (typically 1 for video quality assessment)
num_workers
int
default:"0"
Number of worker processes for evaluation data loading
save_predictions
bool
default:"True"
Save prediction results to file
output_format
list[str]
default:"[\"csv\", \"xlsx\"]"
Output formats for saving predictions
generate_report
bool
default:"True"
Generate a detailed evaluation report

GPU Configuration

Configuration for GPU memory management and optimization.
from src.config.config import GPU_CONFIG

# Example: Configure GPU settings
memory_fraction = GPU_CONFIG["memory_fraction"]
mixed_precision = GPU_CONFIG["mixed_precision"]
memory_fraction
float
default:"0.9"
Fraction of GPU memory to allocate (0.0 to 1.0)
allow_growth
bool
default:"True"
Allow GPU memory to grow dynamically as needed
mixed_precision
bool
default:"True"
Enable mixed precision (FP16/FP32) for faster computation
gradient_checkpointing
bool
default:"False"
Enable gradient checkpointing to save memory (disabled for V-JEPA2)
dataloader_pin_memory
bool
default:"True"
Pin memory in DataLoader for faster data transfer
cleanup_frequency
int
default:"50"
Clean GPU memory cache every N batches

Usage Examples

Basic Configuration Access

from src.config.config import (
    DOVER_CONFIG,
    VJEPA_CONFIG,
    TRAINING_CONFIG,
    LOSS_CONFIG,
    DATASET_CONFIG,
    EVAL_CONFIG,
    GPU_CONFIG
)

# Access DOVER configuration
model_name = DOVER_CONFIG["model_name"]
video_resolution = DOVER_CONFIG["video_resolution"]
batch_size = DOVER_CONFIG["batch_size"]

# Access training configuration
device = TRAINING_CONFIG["device"]
learning_rate = TRAINING_CONFIG["learning_rate"]
num_workers = TRAINING_CONFIG["num_workers"]

Custom Configuration Override

import copy
from src.config.config import VJEPA_CONFIG, TRAINING_CONFIG

# Create a custom configuration
custom_config = copy.deepcopy(VJEPA_CONFIG)
custom_config["batch_size"] = 8
custom_config["learning_rate"] = 1e-4
custom_config["epochs"] = 20

# Override training settings
custom_training = copy.deepcopy(TRAINING_CONFIG)
custom_training["gradient_clipping"] = 0.5
custom_training["warmup_steps"] = 200

Using Discriminative Learning Rates

from src.config.config import VJEPA_CONFIG

# Get discriminative learning rate multipliers
base_lr = VJEPA_CONFIG["learning_rate"]  # 2e-4
discriminative_lr = VJEPA_CONFIG["discriminative_lr"]

# Calculate actual learning rates for each component
text_lr = base_lr * discriminative_lr["text"]    # 2e-5 (10%)
video_lr = base_lr * discriminative_lr["video"]  # 1e-4 (50%)
head_lr = base_lr * discriminative_lr["head"]    # 4e-4 (200%)

print(f"Text encoder LR: {text_lr}")
print(f"Video encoder LR: {video_lr}")
print(f"Prediction head LR: {head_lr}")

Configuring Loss Function

from src.config.config import LOSS_CONFIG

# Extract loss configuration
smooth_l1_beta = LOSS_CONFIG["smooth_l1_beta"]
ranking_margin = LOSS_CONFIG["ranking_margin"]
scale_weights = LOSS_CONFIG["scale_weights"]
loss_weights = LOSS_CONFIG["loss_weights"]

# Use in loss computation
alpha = loss_weights["alpha"]  # 0.7 for smooth L1
beta = loss_weights["beta"]    # 0.3 for ranking
gamma = loss_weights["gamma"]  # 0.1 for scale-aware

total_loss = (
    alpha * smooth_l1_loss +
    beta * ranking_loss +
    gamma * scale_aware_loss
)

Dataset Configuration

from src.config.config import DATASET_CONFIG

# Load dataset configuration
mos_columns = DATASET_CONFIG["mos_columns"]
text_column = DATASET_CONFIG["text_column"]
video_column = DATASET_CONFIG["video_column"]
train_split = DATASET_CONFIG["train_split"]
seed = DATASET_CONFIG["seed"]

# Use in dataset loading
import pandas as pd

df = pd.read_csv("data/annotations.csv")
text_prompts = df[text_column]
video_files = df[video_column]
mos_scores = df[mos_columns]

Path Constants

In addition to configuration dictionaries, the config module also defines useful path constants:
from src.config.config import (
    PROJECT_ROOT,
    DATA_DIR,
    MODELS_DIR,
    NOTEBOOKS_DIR,
    DOCS_DIR,
    TRAIN_DATA_PATH,
    VAL_DATA_PATH,
    TEST_DATA_PATH
)

# Example usage
print(f"Project root: {PROJECT_ROOT}")
print(f"Training data: {TRAIN_DATA_PATH}")
print(f"Models directory: {MODELS_DIR}")

See Also

Build docs developers (and LLMs) love