Skip to main content

Overview

Tags provide a flexible way to organize and categorize your Metaflow runs. Use tags to mark production deployments, track experiments, or group related runs.

What are Tags?

Tags are user-defined labels attached to runs. They help you:
  • Identify production runs
  • Group related experiments
  • Mark successful deployments
  • Track A/B tests
  • Organize by team or project

Adding Tags

Add Tags via CLI

Add tags when running a flow:
# Single tag
python myflow.py run --tag experiment

# Multiple tags
python myflow.py run --tag production --tag v1.2.3

# Tags with spaces (use quotes)
python myflow.py run --tag "model training"

Add Tags Programmatically

Add tags to runs using the Client API:
from metaflow import Flow

# Get the latest run
run = Flow('MyFlow').latest_run

# Add a single tag
run.add_tag('production')

# Add multiple tags
run.add_tags(['deployed', 'verified', 'v2.0'])

Add Tags in Flow Code

Use the current singleton to tag runs from within a flow:
from metaflow import FlowSpec, step, current

class TaggedFlow(FlowSpec):
    
    @step
    def start(self):
        # Add tag during execution
        current.run.add_tag('automated')
        current.run.add_tag(f"date-{current.run.created_at.date()}")
        self.next(self.end)
    
    @step
    def end(self):
        # Add tag based on results
        if self.accuracy > 0.95:
            current.run.add_tag('high-accuracy')

Tag Validation Rules

Metaflow enforces validation rules for tags:

Size Limits

  • Maximum tags per run: 50
  • Maximum tag length: 500 characters

Character Requirements

  • Tags must be UTF-8 encodable/decodable
  • Tags cannot be empty strings
  • Tags can contain spaces and special characters
from metaflow.tagging_util import validate_tag, validate_tags

# Valid tags
validate_tag("production")  # OK
validate_tag("version-1.2.3")  # OK
validate_tag("my experiment")  # OK

# Invalid tags
validate_tag("")  # Raises MetaflowTaggingError
validate_tag("x" * 501)  # Raises MetaflowTaggingError (too long)

Finding Tagged Runs

Query by Tags

Find runs with specific tags:
from metaflow import Flow

# Find all production runs
production_runs = [run for run in Flow('MyFlow') 
                   if 'production' in run.tags]

# Find runs with multiple tags
validated_prod = [run for run in Flow('MyFlow')
                  if 'production' in run.tags 
                  and 'validated' in run.tags]

# Find latest run with tag
for run in Flow('MyFlow'):
    if 'production' in run.tags:
        latest_prod = run
        break

List All Tags

Get all tags from a run:
run = Flow('MyFlow').latest_run
print(f"Tags: {run.tags}")

# Tags are a set
for tag in run.tags:
    print(f"  - {tag}")

Removing Tags

Remove tags from runs:
from metaflow import Flow

run = Flow('MyFlow')['1234']

# Remove a single tag
run.remove_tag('experimental')

# Remove multiple tags
run.remove_tags(['test', 'debug'])

# Remove all user tags
for tag in run.tags:
    run.remove_tag(tag)
You can only modify tags on runs that haven’t been finalized by the metadata service.

Common Tagging Patterns

Environment Tags

Tag runs by deployment environment:
import os
from metaflow import FlowSpec, step, current

class DeploymentFlow(FlowSpec):
    
    @step
    def start(self):
        # Tag based on environment
        env = os.environ.get('ENVIRONMENT', 'dev')
        current.run.add_tag(f"env:{env}")
        
        if env == 'production':
            current.run.add_tag('requires-approval')
        
        self.next(self.end)
    
    @step
    def end(self):
        pass

Version Tags

Tag runs with version information:
from metaflow import FlowSpec, step, Parameter, current

class VersionedFlow(FlowSpec):
    
    version = Parameter('version', default='1.0.0')
    
    @step
    def start(self):
        # Tag with version
        current.run.add_tag(f"version:{self.version}")
        current.run.add_tag(f"major:{self.version.split('.')[0]}")
        self.next(self.end)
    
    @step
    def end(self):
        pass

Experiment Tags

Organize ML experiments:
from metaflow import FlowSpec, step, Parameter, current

class ExperimentFlow(FlowSpec):
    
    experiment_id = Parameter('experiment-id')
    
    @step
    def start(self):
        # Tag experiment
        current.run.add_tag(f"experiment:{self.experiment_id}")
        current.run.add_tag('ml-experiment')
        current.run.add_tag('hyperparameter-tuning')
        self.next(self.train)
    
    @step
    def train(self):
        # Train model and evaluate
        self.accuracy = train_and_evaluate()
        self.next(self.end)
    
    @step
    def end(self):
        # Tag based on performance
        if self.accuracy > 0.95:
            current.run.add_tag('best-model')
            current.run.add_tag('candidate-for-production')

Team and Project Tags

Organize by ownership:
from metaflow import FlowSpec, step, current
import os

class TeamFlow(FlowSpec):
    
    @step
    def start(self):
        # Tag by team and project
        current.run.add_tag('team:data-science')
        current.run.add_tag('project:recommendation-engine')
        current.run.add_tag(f"user:{os.environ.get('USER')}")
        self.next(self.end)
    
    @step
    def end(self):
        pass

Time-based Tags

Tag runs by time period:
from metaflow import FlowSpec, step, current
from datetime import datetime

class TimeTaggedFlow(FlowSpec):
    
    @step
    def start(self):
        now = datetime.now()
        
        # Add time-based tags
        current.run.add_tag(f"year:{now.year}")
        current.run.add_tag(f"month:{now.year}-{now.month:02d}")
        current.run.add_tag(f"week:{now.isocalendar().week}")
        current.run.add_tag(f"quarter:Q{(now.month-1)//3 + 1}")
        
        self.next(self.end)
    
    @step
    def end(self):
        pass

Advanced Tag Queries

Filter by Tag Patterns

Find runs matching tag patterns:
from metaflow import Flow
import re

flow = Flow('MyFlow')

# Find all version 2.x runs
v2_runs = [run for run in flow
           if any(re.match(r'version:2\.\d+\.\d+', tag) 
                  for tag in run.tags)]

# Find experiment runs from specific month
month_runs = [run for run in flow
              if any(tag.startswith('month:2024-03') 
                     for tag in run.tags)]

# Find runs by team
team_runs = [run for run in flow
             if 'team:data-science' in run.tags]

Count Runs by Tag

Analyze tag distribution:
from metaflow import Flow
from collections import Counter

flow = Flow('MyFlow')

# Count occurrences of each tag
all_tags = []
for run in flow:
    all_tags.extend(run.tags)

tag_counts = Counter(all_tags)
print("Top 10 tags:")
for tag, count in tag_counts.most_common(10):
    print(f"  {tag}: {count}")

Compare Tagged Runs

Compare metrics across tagged runs:
from metaflow import Flow
import pandas as pd

flow = Flow('MLFlow')

# Collect metrics from production runs
prod_metrics = []
for run in flow:
    if 'production' in run.tags:
        try:
            end_task = run['end'].task
            prod_metrics.append({
                'run_id': run.id,
                'accuracy': end_task.data.accuracy,
                'version': next((t.split(':')[1] for t in run.tags 
                               if t.startswith('version:')), None),
                'deployed_at': run.created_at
            })
        except:
            pass

df = pd.DataFrame(prod_metrics)
print(df)

Tag Management Best Practices

Establish tag naming conventions:
# Good: Consistent prefix pattern
current.run.add_tag('env:production')
current.run.add_tag('version:1.2.3')
current.run.add_tag('team:ml-platform')

# Avoid: Inconsistent naming
current.run.add_tag('PRODUCTION')
current.run.add_tag('v1.2.3')
current.run.add_tag('ML Team')
Maintain a tag dictionary:
# tags.py
TAG_DEFINITIONS = {
    'production': 'Deployed to production environment',
    'validated': 'Passed validation checks',
    'rollback': 'Used for rollback',
    'env:*': 'Deployment environment (dev, staging, prod)',
    'version:*': 'Semantic version number',
    'experiment:*': 'Experiment identifier',
}
Remove old or obsolete tags:
from metaflow import Flow
from datetime import datetime, timedelta

flow = Flow('MyFlow')
old_threshold = datetime.now() - timedelta(days=30)

for run in flow:
    if run.created_at < old_threshold:
        # Remove temporary tags from old runs
        run.remove_tags(['temp', 'testing', 'debug'])
Use tags to make runs easy to find:
# Add multiple relevant tags
current.run.add_tag('ml-training')
current.run.add_tag('resnet50')
current.run.add_tag('imagenet')
current.run.add_tag('gpu-enabled')
current.run.add_tag(f"author:{current.username}")

System Tags vs User Tags

Metaflow distinguishes between system and user tags:
  • System tags: Automatically added by Metaflow (e.g., runtime tags)
  • User tags: Manually added by users (limited to 50)
The 50-tag limit applies only to user tags.

Next Steps

Versioning

Learn about version tracking in Metaflow

Best Practices

Follow recommended tagging patterns

Build docs developers (and LLMs) love