Skip to main content

Description

Retrieves metrics tracked in a DVC repository. Without arguments, this function retrieves all metrics from all tracked metrics files for the current working tree. Metrics are typically stored in JSON, YAML, or other structured formats and tracked in dvc.yaml. This function provides programmatic access to these metrics for analysis, comparison, and reporting.

Signature

dvc.api.metrics_show(
    *targets: str,
    repo: Optional[str] = None,
    rev: Optional[str] = None,
    config: Optional[dict] = None,
) -> dict

Parameters

*targets
str
default:"None"
Names of the metric files to retrieve metrics from (positional arguments).
  • If no targets are provided, all metric files tracked in dvc.yaml will be used
  • Targets don’t necessarily have to be defined in dvc.yaml
  • Can specify multiple files
# Single target
metrics = dvc.api.metrics_show("metrics.json")

# Multiple targets
metrics = dvc.api.metrics_show("train_metrics.json", "eval_metrics.json")

# No targets (all metrics)
metrics = dvc.api.metrics_show()
repo
str
default:"None"
Location of the DVC repository.
  • Defaults to the current project (found by walking up from current working directory)
  • Can be a URL or a file system path
  • Both HTTP and SSH protocols are supported for online Git repos
repo="https://github.com/iterative/example-get-started"
repo="[email protected]:user/project.git"
repo="/path/to/local/repo"
rev
str
default:"None"
Name of the Git revision to retrieve metrics from.
  • Defaults to None (current working tree)
  • Can be a branch name, tag name, commit hash, or DVC experiment name
  • If repo is not a Git repo, this option is ignored
rev="main"
rev="v1.0.0"
rev="abc123"
rev="exp-random-forest"
config
dict
default:"None"
Config dictionary to be passed through to the DVC project.
config={"cache": {"dir": "/tmp/cache"}}

Returns

metrics
dict
A dictionary containing the metrics. The structure depends on the metrics files:
  • Single file: Returns the metrics directly
  • Multiple files with unique keys: Merges metrics from all files
  • Multiple files with conflicting keys: Prefixes keys with filename:key
# Example return value
{
    "accuracy": 0.9249,
    "precision": 0.9156,
    "recall": 0.9342,
    "f1_score": 0.9248
}

Examples

Basic Usage - All Metrics

import dvc.api
import json

# Get all metrics from current working tree
metrics = dvc.api.metrics_show()
print(json.dumps(metrics, indent=2))
{
  "accuracy": 0.9249974999612706,
  "precision": 0.9156,
  "recall": 0.9342,
  "f1_score": 0.9248,
  "roc_auc": 0.9460213440787918
}

From Specific Git Revision

import dvc.api

# Get metrics from main branch
metrics_main = dvc.api.metrics_show(rev="main")

# Get metrics from experiment
metrics_exp = dvc.api.metrics_show(rev="exp-random-forest")

print(f"Main accuracy: {metrics_main['accuracy']:.4f}")
print(f"Experiment accuracy: {metrics_exp['accuracy']:.4f}")

if metrics_exp['accuracy'] > metrics_main['accuracy']:
    print("Experiment improved accuracy!")

Specific Metrics Files

import dvc.api

# Get metrics from specific file
metrics = dvc.api.metrics_show("evaluation.json")
print(f"ROC AUC: {metrics['roc_auc']}")

# Get metrics from multiple files
metrics = dvc.api.metrics_show(
    "train_metrics.json",
    "eval_metrics.json"
)

Remote Repository

import dvc.api

metrics = dvc.api.metrics_show(
    repo="https://github.com/iterative/example-get-started"
)
print(f"Accuracy: {metrics['avg_prec']:.4f}")
print(f"ROC AUC: {metrics['roc_auc']:.4f}")

Compare Metrics Across Branches

import dvc.api
import pandas as pd

branches = ["main", "experiment-1", "experiment-2", "experiment-3"]
results = []

for branch in branches:
    metrics = dvc.api.metrics_show(rev=branch)
    results.append({
        "branch": branch,
        "accuracy": metrics.get("accuracy"),
        "f1_score": metrics.get("f1_score")
    })

df = pd.DataFrame(results)
print(df.to_string())
print(f"\nBest accuracy: {df['accuracy'].max():.4f}")

Compare Against Baseline

import dvc.api

# Get baseline metrics
baseline = dvc.api.metrics_show(rev="v1.0.0")

# Get current metrics
current = dvc.api.metrics_show()

# Compare
for metric in ["accuracy", "precision", "recall"]:
    baseline_val = baseline.get(metric, 0)
    current_val = current.get(metric, 0)
    improvement = current_val - baseline_val
    
    print(f"{metric}:")
    print(f"  Baseline: {baseline_val:.4f}")
    print(f"  Current:  {current_val:.4f}")
    print(f"  Change:   {improvement:+.4f}")

Track Model Performance Over Time

import dvc.api
from datetime import datetime

# Get metrics from different tags (releases)
tags = ["v1.0.0", "v1.1.0", "v1.2.0", "v2.0.0"]
performance_history = []

for tag in tags:
    try:
        metrics = dvc.api.metrics_show(rev=tag)
        performance_history.append({
            "version": tag,
            "accuracy": metrics.get("accuracy"),
            "f1_score": metrics.get("f1_score")
        })
    except Exception as e:
        print(f"Could not get metrics for {tag}: {e}")

# Plot or analyze the history
for entry in performance_history:
    print(f"{entry['version']}: accuracy={entry['accuracy']:.4f}")

Automated Performance Check

import dvc.api
import sys

# Get metrics from current experiment
current_metrics = dvc.api.metrics_show()

# Get metrics from production
prod_metrics = dvc.api.metrics_show(rev="production")

# Define threshold
MIN_ACCURACY = 0.90
MIN_IMPROVEMENT = 0.02

current_acc = current_metrics.get("accuracy", 0)
prod_acc = prod_metrics.get("accuracy", 0)

if current_acc < MIN_ACCURACY:
    print(f"ERROR: Accuracy {current_acc:.4f} below threshold {MIN_ACCURACY}")
    sys.exit(1)

if current_acc < prod_acc + MIN_IMPROVEMENT:
    print(f"WARNING: Insufficient improvement over production")
    print(f"Current: {current_acc:.4f}, Production: {prod_acc:.4f}")
    sys.exit(1)

print(f"SUCCESS: Model meets quality criteria")
print(f"Accuracy: {current_acc:.4f} (production: {prod_acc:.4f})")

Export Metrics to CSV

import dvc.api
import csv

experiments = [
    "exp-baseline",
    "exp-feature-eng",
    "exp-hyperparams",
    "exp-ensemble"
]

with open("metrics_comparison.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Experiment", "Accuracy", "Precision", "Recall", "F1"])
    
    for exp in experiments:
        try:
            metrics = dvc.api.metrics_show(rev=exp)
            writer.writerow([
                exp,
                metrics.get("accuracy"),
                metrics.get("precision"),
                metrics.get("recall"),
                metrics.get("f1_score")
            ])
        except Exception as e:
            print(f"Skipping {exp}: {e}")

print("Metrics exported to metrics_comparison.csv")

Handle Multiple Metrics Files

import dvc.api

# When files have conflicting keys, they're prefixed with filename
metrics = dvc.api.metrics_show(
    "train_metrics.json",
    "validation_metrics.json"
)

# Access metrics from specific files
train_loss = metrics.get("train_metrics.json:loss")
val_loss = metrics.get("validation_metrics.json:loss")

print(f"Training loss: {train_loss}")
print(f"Validation loss: {val_loss}")

Error Handling

import dvc.api

try:
    metrics = dvc.api.metrics_show(
        "metrics.json",
        repo="https://github.com/user/repo",
        rev="main"
    )
    print(f"Accuracy: {metrics.get('accuracy')}")
except FileNotFoundError:
    print("Metrics file not found")
except KeyError as e:
    print(f"Metric key not found: {e}")
except Exception as e:
    print(f"Error retrieving metrics: {e}")

Use Cases

Model Evaluation

Retrieve and compare model performance metrics across experiments.

CI/CD Integration

Automate quality checks based on metrics in deployment pipelines.

Performance Tracking

Track model performance over time across different versions.

Experiment Analysis

Analyze and compare metrics from multiple experiments.

Metrics File Formats

{
  "accuracy": 0.9249,
  "precision": 0.9156,
  "recall": 0.9342,
  "f1_score": 0.9248,
  "confusion_matrix": [[45, 5], [3, 47]]
}
metrics = dvc.api.metrics_show("metrics.json")
print(metrics["accuracy"])  # 0.9249

Return Value Structure

When retrieving metrics from a single file, the structure matches the file content:
metrics = dvc.api.metrics_show("metrics.json")
# {
#   "accuracy": 0.9249,
#   "precision": 0.9156,
#   "recall": 0.9342
# }
When metric keys are unique across files, they’re merged:
metrics = dvc.api.metrics_show("train_metrics.json", "eval_metrics.json")
# {
#   "train_loss": 0.23,     # from train_metrics.json
#   "eval_accuracy": 0.92   # from eval_metrics.json
# }
When keys conflict, they’re prefixed with the filename:
metrics = dvc.api.metrics_show("train.json", "val.json")
# {
#   "train.json:loss": 0.23,
#   "val.json:loss": 0.31
# }

Configuring Metrics in dvc.yaml

stages:
  train:
    cmd: python train.py
    metrics:
      - metrics/train.json:
          cache: false
  
  evaluate:
    cmd: python evaluate.py
    metrics:
      - metrics/eval.json:
          cache: false
# Retrieve all configured metrics
metrics = dvc.api.metrics_show()

Best Practices

Store metrics in JSON or YAML for easy access:
# Good - Structured format
import json
metrics = {
    "accuracy": 0.92,
    "loss": 0.23,
    "f1_score": 0.89
}
with open("metrics.json", "w") as f:
    json.dump(metrics, f)
Always compare new metrics against established baselines:
import dvc.api

baseline = dvc.api.metrics_show(rev="baseline")
current = dvc.api.metrics_show()

for metric, value in current.items():
    baseline_val = baseline.get(metric, 0)
    print(f"{metric}: {value:.4f} (baseline: {baseline_val:.4f})")
Track comprehensive metrics, not just accuracy:
metrics = {
    "accuracy": 0.92,
    "precision": 0.89,
    "recall": 0.94,
    "f1_score": 0.91,
    "roc_auc": 0.95,
    "training_time": 142.5,
    "inference_time_ms": 23.4
}
Use .get() with defaults:
import dvc.api

metrics = dvc.api.metrics_show()

accuracy = metrics.get("accuracy", 0.0)
precision = metrics.get("precision", 0.0)

if accuracy > 0:
    print(f"Accuracy: {accuracy:.4f}")
else:
    print("Accuracy metric not available")

Integration Examples

MLflow Integration

import dvc.api
import mlflow

# Log DVC metrics to MLflow
metrics = dvc.api.metrics_show()

with mlflow.start_run():
    for key, value in metrics.items():
        if isinstance(value, (int, float)):
            mlflow.log_metric(key, value)

Weights & Biases Integration

import dvc.api
import wandb

wandb.init(project="my-project")

# Log DVC metrics to W&B
metrics = dvc.api.metrics_show()
wandb.log(metrics)

Slack Notification

import dvc.api
import requests

metrics = dvc.api.metrics_show()
baseline = dvc.api.metrics_show(rev="production")

if metrics["accuracy"] > baseline["accuracy"]:
    message = f"🎉 New model improved accuracy to {metrics['accuracy']:.4f}!"
    # Send to Slack webhook
    requests.post(SLACK_WEBHOOK_URL, json={"text": message})

params_show()

Retrieve parameters values

exp_show()

Show experiments with metrics

read()

Read any tracked file

Build docs developers (and LLMs) love