Skip to main content
Task definitions are the atomic units of execution in Flyte. The schema is defined in flyteidl/core/tasks.proto.

TaskTemplate

TaskTemplate is the top-level message for a task definition. It is registered with FlyteAdmin via CreateTask and referenced by workflow nodes.
message TaskTemplate {
  Identifier id = 1;               // globally unique identifier
  string type = 2;                 // task type plugin identifier
  TaskMetadata metadata = 3;       // caching, retries, timeout, tags
  TypedInterface interface = 4;    // strongly typed inputs and outputs
  google.protobuf.Struct custom = 5; // plugin-specific configuration

  oneof target {
    Container container = 6;       // standard container execution
    K8sPod k8s_pod = 17;           // full Kubernetes pod spec
    Sql sql = 18;                  // SQL workload
  }

  int32 task_type_version = 7;     // plugin API version
  SecurityContext security_context = 8; // security attributes
  ExtendedResources extended_resources = 9; // GPU accelerator config
  map<string, string> config = 16; // additional propeller hints
}

TaskMetadata

message TaskMetadata {
  bool discoverable = 1;             // enable output caching
  RuntimeMetadata runtime = 2;       // SDK runtime information
  google.protobuf.Duration timeout = 4;  // total task timeout (including retries)
  RetryStrategy retries = 5;         // number of retries
  string discovery_version = 6;      // cache key version string
  string deprecated_error_message = 7; // deprecation notice
  bool interruptible = 8;            // allow spot interruption
  bool cache_serializable = 9;       // serialize concurrent cache lookups
  map<string, string> tags = 11;     // arbitrary labels
  string pod_template_name = 12;     // base Kubernetes PodTemplate name
  repeated string cache_ignore_input_vars = 13; // inputs excluded from cache key
  bool is_eager = 14;                // eager execution mode
  google.protobuf.BoolValue generates_deck = 15; // produces a Flyte Deck
  K8sObjectMetadata metadata = 16;   // labels/annotations for the pod
}

RuntimeMetadata

message RuntimeMetadata {
  enum RuntimeType {
    OTHER = 0;
    FLYTE_SDK = 1;
  }
  RuntimeType type = 1;  // SDK type
  string version = 2;    // SDK version (e.g. "1.16.4")
  string flavor = 3;     // language flavor (e.g. "python")
}
This is populated automatically by the SDK when serializing tasks. It identifies the SDK type and version used to author the task, which helps with debugging.

Resources

Resources defines CPU, memory, GPU, and storage requests/limits for a container.
message Resources {
  enum ResourceName {
    UNKNOWN = 0;
    CPU = 1;
    GPU = 2;
    MEMORY = 3;
    STORAGE = 4;
    EPHEMERAL_STORAGE = 5;
  }

  message ResourceEntry {
    ResourceName name = 1;
    string value = 2;  // valid Kubernetes quantity (e.g. "100m", "2Gi")
  }

  repeated ResourceEntry requests = 1;  // requested resources
  repeated ResourceEntry limits = 2;    // maximum resource limits
}

Container

The Container message defines a standard Docker container execution target.
message Container {
  string image = 1;            // container image URI
  repeated string command = 2; // entrypoint override
  repeated string args = 3;    // arguments to the command
  Resources resources = 4;     // CPU/memory/GPU resource spec
  repeated KeyValuePair env = 5; // environment variables
  repeated ContainerPort ports = 7; // exposed ports (K8s only)
  DataLoadingConfig data_config = 9; // CoPilot data loading config
  Architecture architecture = 10;    // AMD64, ARM64, etc.
}

ExtendedResources

For non-standard resources not captured by Resources:
message ExtendedResources {
  GPUAccelerator gpu_accelerator = 1;
  SharedMemory shared_memory = 2;
}

message GPUAccelerator {
  string device = 1;           // e.g. "nvidia-tesla-a100"
  oneof partition_size_value {
    bool unpartitioned = 2;    // full GPU, no MIG partition
    string partition_size = 3; // MIG partition size, e.g. "1g.5gb"
  }
}

message SharedMemory {
  string mount_path = 1;  // path in container
  string mount_name = 2;  // volume name
  string size_limit = 3;  // size limit (optional)
}

K8sPod

For full control over the Kubernetes pod specification:
message K8sPod {
  K8sObjectMetadata metadata = 1;  // labels and annotations
  google.protobuf.Struct pod_spec = 2; // JSON-marshalled v1.PodSpec
  DataLoadingConfig data_config = 3;   // CoPilot configuration
  string primary_container_name = 4;   // name of the main container
}

Complete container task example

The following JSON represents a fully serialized Python container task:
{
  "id": {
    "resourceType": "TASK",
    "project": "flytesnacks",
    "domain": "development",
    "name": "my_module.process_data",
    "version": "abc123"
  },
  "type": "python-task",
  "metadata": {
    "runtime": {
      "type": "FLYTE_SDK",
      "version": "1.16.4",
      "flavor": "python"
    },
    "timeout": { "seconds": 3600 },
    "retries": { "minimum": 2 },
    "discoverable": true,
    "discoveryVersion": "1.0",
    "interruptible": true,
    "tags": {
      "team": "ml-platform"
    }
  },
  "interface": {
    "inputs": {
      "variables": {
        "dataset_path": { "type": { "blob": { "dimensionality": "SINGLE" } } },
        "batch_size": { "type": { "simple": "INTEGER" } }
      }
    },
    "outputs": {
      "variables": {
        "metrics": { "type": { "simple": "STRUCT" } }
      }
    }
  },
  "container": {
    "image": "my-registry.example.com/ml-pipeline:v1.2.3",
    "command": [],
    "args": [
      "pyflyte-execute",
      "--inputs", "{{.input}}",
      "--output-prefix", "{{.outputPrefix}}",
      "--raw-output-data-prefix", "{{.rawOutputDataPrefix}}",
      "--resolver", "flytekit.core.python_auto_container.default_task_resolver",
      "--",
      "task-module", "my_module",
      "task-name", "process_data"
    ],
    "resources": {
      "requests": [
        { "name": "CPU", "value": "2" },
        { "name": "MEMORY", "value": "4Gi" }
      ],
      "limits": [
        { "name": "CPU", "value": "4" },
        { "name": "MEMORY", "value": "8Gi" },
        { "name": "GPU", "value": "1" }
      ]
    },
    "env": [
      { "key": "LOG_LEVEL", "value": "INFO" },
      { "key": "BATCH_SIZE_OVERRIDE", "value": "" }
    ]
  },
  "extendedResources": {
    "gpuAccelerator": {
      "device": "nvidia-tesla-a100",
      "unpartitioned": true
    }
  }
}

Defining resources in Python

In flytekit, resources are declared on the @task decorator and automatically serialized to the protobuf Resources message:
from flytekit import task, Resources

@task(
    requests=Resources(cpu="2", mem="4Gi"),
    limits=Resources(cpu="4", mem="8Gi", gpu="1"),
    retries=2,
    timeout=3600,  # seconds
    cache=True,
    cache_version="1.0",
    interruptible=True,
)
def process_data(dataset_path: str, batch_size: int) -> dict:
    ...

Build docs developers (and LLMs) love