The log_metadata() function allows you to attach metadata to various ZenML entities including steps, pipeline runs, artifacts, and model versions.
Signature
def log_metadata (
metadata : Dict[ str , MetadataType],
# Step and run parameters
step_id : Optional[ UUID ] = None ,
step_name : Optional[ str ] = None ,
run_id_name_or_prefix : Optional[Union[ UUID , str ]] = None ,
# Artifact parameters
artifact_version_id : Optional[ UUID ] = None ,
artifact_name : Optional[ str ] = None ,
artifact_version : Optional[ str ] = None ,
infer_artifact : bool = False ,
# Model parameters
model_version_id : Optional[ UUID ] = None ,
model_name : Optional[ str ] = None ,
model_version : Optional[Union[ModelStages, int , str ]] = None ,
infer_model : bool = False ,
) -> None
Parameters
metadata
Dict[str, MetadataType]
required
Dictionary of metadata key-value pairs to log. Values can be strings, numbers, booleans, lists, or dicts.
The ID of a specific step to attach metadata to.
The name of a step (requires run_id_name_or_prefix).
The ID, name, or prefix of a pipeline run.
The ID of a specific artifact version.
The name of an artifact (requires artifact_version for external calls).
The version of the artifact.
Whether to infer the artifact from the step context (must be called inside a step).
The ID of a specific model version.
The name of a model (requires model_version).
model_version
Union[ModelStages, int, str]
The version or stage of the model.
Whether to infer the model from the step context (must be called inside a step with a configured model).
Examples
from zenml import step, log_metadata
@step
def training_step () -> None :
# Train model
accuracy = 0.95
loss = 0.05
# Log to the current step
log_metadata({
"accuracy" : accuracy,
"loss" : loss,
"framework" : "tensorflow" ,
"epochs" : 100
})
from zenml import step, log_metadata
import pandas as pd
@step
def create_dataset () -> pd.DataFrame:
df = pd.DataFrame({ "value" : range ( 1000 )})
# Log metadata to the output artifact
log_metadata(
metadata = {
"num_rows" : len (df),
"num_columns" : len (df.columns),
"memory_usage" : df.memory_usage( deep = True ).sum()
},
infer_artifact = True
)
return df
from zenml import step, log_metadata
from typing import Tuple, Annotated
import pandas as pd
@step
def split_data () -> Tuple[
Annotated[pd.DataFrame, "train" ],
Annotated[pd.DataFrame, "test" ],
]:
train = pd.DataFrame({ "x" : range ( 800 )})
test = pd.DataFrame({ "x" : range ( 200 )})
# Log metadata to specific outputs
log_metadata(
metadata = { "split" : "train" , "size" : len (train)},
artifact_name = "train" ,
infer_artifact = True
)
log_metadata(
metadata = { "split" : "test" , "size" : len (test)},
artifact_name = "test" ,
infer_artifact = True
)
return train, test
from zenml import step, log_metadata, Model
@step ( model = Model( name = "iris_classifier" , version = "1.0" ))
def evaluate_model () -> float :
accuracy = 0.96
# Log metadata to the model version
log_metadata(
metadata = {
"test_accuracy" : accuracy,
"test_samples" : 150 ,
"confusion_matrix" : [[ 50 , 0 , 0 ], [ 0 , 48 , 2 ], [ 0 , 1 , 49 ]]
},
infer_model = True
)
return accuracy
from zenml import log_metadata
# Log metadata to a previously created artifact
log_metadata(
metadata = {
"reviewed" : True ,
"reviewer" : "data_scientist_1" ,
"quality_score" : 9.5
},
artifact_name = "training_data" ,
artifact_version = "5"
)
from zenml import step, log_metadata
@step
def monitor_step () -> None :
# Log metadata to the entire pipeline run
log_metadata(
metadata = {
"environment" : "production" ,
"triggered_by" : "scheduler" ,
"priority" : "high"
},
run_id_name_or_prefix = "my_pipeline-2024_01_15-12_30_45"
)
from zenml import log_metadata
# Log metadata to a specific step in a specific run
log_metadata(
metadata = {
"retry_count" : 3 ,
"error_resolved" : True
},
step_name = "data_loader" ,
run_id_name_or_prefix = "my_pipeline-2024_01_15"
)
from zenml import step, log_metadata
@step
def analysis_step () -> None :
log_metadata({
# Simple types
"accuracy" : 0.95 ,
"model_type" : "RandomForest" ,
"is_production" : True ,
# Lists
"feature_names" : [ "age" , "income" , "education" ],
"class_labels" : [ 0 , 1 , 2 ],
# Nested structures
"hyperparameters" : {
"n_estimators" : 100 ,
"max_depth" : 10 ,
"min_samples_split" : 2
},
# Arrays/matrices (as lists)
"confusion_matrix" : [
[ 45 , 2 , 1 ],
[ 3 , 48 , 2 ],
[ 1 , 1 , 47 ]
]
})
from zenml import log_metadata
from zenml.enums import ModelStages
# Log to production model
log_metadata(
metadata = {
"deployed_at" : "2024-01-15T10:30:00Z" ,
"deployment_target" : "kubernetes-cluster-1" ,
"replicas" : 3
},
model_name = "sentiment_analyzer" ,
model_version = ModelStages. PRODUCTION
)
from zenml import step, log_metadata
@step
def comprehensive_step () -> dict :
results = { "score" : 0.92 }
# Log multiple metadata entries
log_metadata({ "preprocessing" : "completed" })
log_metadata({ "training" : "completed" })
log_metadata({ "validation" : "completed" })
log_metadata({ "final_score" : results[ "score" ]})
return results
The following Python types are supported as metadata values:
Primitives : str, int, float, bool
Collections : list, dict
None : None values are supported
Nested : Dictionaries and lists can be nested
Use Cases
Track experiment parameters - Log hyperparameters, configurations, and settings
Record performance metrics - Store accuracy, loss, and other evaluation metrics
Document data statistics - Save dataset sizes, distributions, and characteristics
Audit and compliance - Track who processed data, when, and under what conditions
Quality assurance - Mark artifacts as reviewed, validated, or approved
Deployment tracking - Record deployment information and environment details
log_artifact_metadata Deprecated artifact metadata function
log_model_metadata Deprecated model metadata function
get_step_context Access step context
bulk_log_metadata Log metadata to multiple entities