Skip to main content
The ResourceSettings class allows you to specify CPU, memory, GPU, and other resource requirements for pipeline steps.

Signature

ResourceSettings(
    cpu_count: Optional[PositiveInt] = None,
    gpu_count: Optional[NonNegativeInt] = None,
    memory: Optional[str] = None,
    node_selectors: Optional[Dict[str, str]] = None,
    affinity: Optional[Dict[str, Any]] = None,
    tolerations: Optional[List[Dict[str, Any]]] = None,
    # Additional deployer/serverless settings
    min_replicas: Optional[int] = None,
    max_replicas: Optional[int] = None,
    autoscaling_metric: Optional[str] = None,
    autoscaling_target: Optional[float] = None,
    max_concurrency: Optional[int] = None,
)

Parameters

cpu_count
int
Number of CPUs to allocate. Must be a positive integer.
gpu_count
int
Number of GPUs to allocate. Must be zero or positive.
memory
str
Amount of memory to allocate. Format: number followed by unit (e.g., “4GB”, “512MB”, “2GiB”).
node_selectors
Dict[str, str]
Kubernetes node selector constraints.
affinity
Dict[str, Any]
Kubernetes affinity rules.
tolerations
List[Dict[str, Any]]
Kubernetes tolerations for node taints.
min_replicas
int
Minimum number of replicas (for deployments). Set to 0 to allow scaling to zero.
max_replicas
int
Maximum number of replicas (for deployments). None means no specific limit.
autoscaling_metric
str
Metric to use for autoscaling (e.g., “cpu”, “concurrency”, “rps”).
autoscaling_target
float
Target value for the autoscaling metric (e.g., 75.0 for 75% CPU).
max_concurrency
int
Maximum number of concurrent requests per instance.

Examples

Basic CPU and Memory

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=4,
            memory="8GB"
        )
    }
)
def data_processing() -> None:
    # This step will run with 4 CPUs and 8GB memory
    pass

GPU-Accelerated Step

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=8,
            gpu_count=2,
            memory="32GB"
        )
    }
)
def train_deep_learning_model() -> None:
    # This step will run with 2 GPUs
    import torch
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Training code...

Memory Units

from zenml import step
from zenml.config import ResourceSettings

# Different memory unit formats
@step(
    settings={
        "resources": ResourceSettings(
            memory="512MB"  # Megabytes
        )
    }
)
def small_step() -> None:
    pass

@step(
    settings={
        "resources": ResourceSettings(
            memory="4GiB"  # Gibibytes (binary)
        )
    }
)
def medium_step() -> None:
    pass

@step(
    settings={
        "resources": ResourceSettings(
            memory="16GB"  # Gigabytes (decimal)
        )
    }
)
def large_step() -> None:
    pass

Kubernetes Node Selectors

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=16,
            memory="64GB",
            node_selectors={
                "node.kubernetes.io/instance-type": "n1-highmem-16",
                "cloud.google.com/gke-nodepool": "high-memory-pool"
            }
        )
    }
)
def memory_intensive_step() -> None:
    # Runs on specific node types
    pass

GPU Node Selection

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            gpu_count=4,
            node_selectors={
                "accelerator": "nvidia-tesla-v100"
            }
        )
    }
)
def gpu_training() -> None:
    # Runs on nodes with V100 GPUs
    pass

Different Resources for Different Steps

from zenml import pipeline, step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=2,
            memory="4GB"
        )
    }
)
def load_data() -> dict:
    return {"data": [1, 2, 3]}

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=8,
            gpu_count=1,
            memory="16GB"
        )
    }
)
def train_model(data: dict) -> None:
    # Heavy computation
    pass

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=4,
            memory="8GB"
        )
    }
)
def evaluate_model() -> None:
    # Medium computation
    pass

@pipeline
def ml_pipeline():
    data = load_data()
    train_model(data)
    evaluate_model()

Deployment Settings

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=2,
            memory="4GB",
            # Autoscaling configuration
            min_replicas=2,  # Always keep at least 2 instances
            max_replicas=10,  # Scale up to 10 instances
            autoscaling_metric="cpu",
            autoscaling_target=75.0,  # Target 75% CPU utilization
            max_concurrency=50  # Max 50 concurrent requests per instance
        )
    }
)
def model_serving_step() -> None:
    # Deployed with autoscaling
    pass

Serverless Configuration

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=4,
            memory="8GB",
            min_replicas=0,  # Can scale to zero when idle
            max_replicas=100,  # Scale up to 100 for bursts
            autoscaling_metric="concurrency",
            autoscaling_target=10.0,  # Target 10 concurrent requests
            max_concurrency=20  # Max 20 concurrent per instance
        )
    }
)
def serverless_inference() -> None:
    # Scales to zero when not in use
    pass

Kubernetes Tolerations

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            gpu_count=2,
            tolerations=[
                {
                    "key": "nvidia.com/gpu",
                    "operator": "Exists",
                    "effect": "NoSchedule"
                }
            ]
        )
    }
)
def gpu_step_with_tolerations() -> None:
    # Can run on tainted GPU nodes
    pass

Affinity Rules

from zenml import step
from zenml.config import ResourceSettings

@step(
    settings={
        "resources": ResourceSettings(
            cpu_count=16,
            affinity={
                "nodeAffinity": {
                    "requiredDuringSchedulingIgnoredDuringExecution": {
                        "nodeSelectorTerms": [{
                            "matchExpressions": [{
                                "key": "node.kubernetes.io/instance-type",
                                "operator": "In",
                                "values": ["n1-standard-16", "n1-highmem-16"]
                            }]
                        }]
                    }
                }
            }
        )
    }
)
def step_with_affinity() -> None:
    # Runs on specific node types with affinity
    pass

Dynamic Resource Configuration

from zenml import step, pipeline
from zenml.config import ResourceSettings

@step
def configurable_step() -> None:
    pass

@pipeline
def dynamic_pipeline(use_gpu: bool = False):
    # Configure resources dynamically
    if use_gpu:
        resources = ResourceSettings(
            cpu_count=8,
            gpu_count=1,
            memory="16GB"
        )
    else:
        resources = ResourceSettings(
            cpu_count=4,
            memory="8GB"
        )
    
    configurable_step.with_options(
        settings={"resources": resources}
    )()

# Run with GPU
dynamic_pipeline(use_gpu=True)

# Run without GPU
dynamic_pipeline(use_gpu=False)

Memory Units

Supported memory unit formats:
  • Decimal units: KB, MB, GB, TB, PB (powers of 1000)
  • Binary units: KiB, MiB, GiB, TiB, PiB (powers of 1024)
Examples:
  • "512MB" = 512,000,000 bytes
  • "512MiB" = 536,870,912 bytes
  • "4GB" = 4,000,000,000 bytes
  • "4GiB" = 4,294,967,296 bytes

Use Cases

  1. GPU training - Allocate GPUs for deep learning
  2. Memory-intensive processing - Handle large datasets
  3. Parallel processing - Use multiple CPUs
  4. Node selection - Run on specific hardware
  5. Autoscaling deployments - Configure scaling behavior
  6. Cost optimization - Right-size resources
  7. Serverless workloads - Scale to zero when idle

Important Notes

  • Resource settings are respected by orchestrators that support resource allocation (Kubernetes, cloud providers)
  • Local orchestrators may ignore resource settings
  • Actual resource availability depends on your infrastructure
  • GPU count of 0 means no GPU allocation (different from None)
  • For deployments, combine with autoscaling settings for optimal performance

@step

Configure step resources

@pipeline

Learn about pipelines

Build docs developers (and LLMs) love