The ArtifactConfig class is used to define various properties for step output artifacts.
Signature
ArtifactConfig(
name: Optional[ str ] = None ,
version: Optional[Union[ str , int ]] = None ,
tags: Optional[List[ str ]] = None ,
run_metadata: Optional[Dict[ str , MetadataType]] = None ,
artifact_type: Optional[ArtifactType] = None ,
)
Parameters
The name of the artifact. Can be a static string like "my_artifact" or a dynamic string with placeholders like "data_{date}_{time}_{custom}". For custom placeholders, provide values in the substitutions argument of the step decorator or with_options().
The version of the artifact.
Tags to associate with the artifact.
Metadata to add to the artifact.
Optional type of the artifact. If not given, the type specified by the materializer is used. Available types include: DATA, MODEL, SERVICE, SCHEMA, etc.
Examples
Basic Artifact Configuration
from typing import Annotated
from zenml import step, ArtifactConfig
import pandas as pd
@step
def load_data () -> Annotated[
pd.DataFrame,
ArtifactConfig( name = "raw_data" , tags = [ "input" , "v1" ])
]:
return pd.read_csv( "data.csv" )
Multiple Outputs with Configuration
from typing import Annotated, Tuple
from zenml import step, ArtifactConfig
from zenml.enums import ArtifactType
import pandas as pd
@step
def split_data ( df : pd.DataFrame) -> Tuple[
Annotated[
pd.DataFrame,
ArtifactConfig(
name = "train_dataset" ,
tags = [ "training" ],
artifact_type = ArtifactType. DATA
)
],
Annotated[
pd.DataFrame,
ArtifactConfig(
name = "test_dataset" ,
tags = [ "testing" ],
artifact_type = ArtifactType. DATA
)
],
]:
train = df.sample( frac = 0.8 )
test = df.drop(train.index)
return train, test
Dynamic Artifact Names
from typing import Annotated
from zenml import step, ArtifactConfig
import pandas as pd
@step ( substitutions = { "region" : "us-west" })
def process_regional_data () -> Annotated[
pd.DataFrame,
ArtifactConfig( name = "data_ {date} _ {region} " )
]:
# The artifact will be named like "data_2024-01-15_us-west"
return pd.DataFrame({ "col" : [ 1 , 2 , 3 ]})
from typing import Annotated
from zenml import step, ArtifactConfig
import pandas as pd
@step
def create_dataset () -> Annotated[
pd.DataFrame,
ArtifactConfig(
name = "processed_dataset" ,
tags = [ "processed" , "v2" ],
run_metadata = {
"source" : "external_api" ,
"num_rows" : 10000 ,
"quality_score" : 0.95
}
)
]:
return pd.DataFrame({ "feature" : range ( 10000 )})
Model Artifact
from typing import Annotated, Any
from zenml import step, ArtifactConfig
from zenml.enums import ArtifactType
@step
def train_model ( X_train , y_train ) -> Annotated[
Any,
ArtifactConfig(
name = "trained_model" ,
artifact_type = ArtifactType. MODEL ,
tags = [ "sklearn" , "random-forest" ],
run_metadata = {
"accuracy" : 0.92 ,
"n_estimators" : 100
}
)
]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier( n_estimators = 100 )
model.fit(X_train, y_train)
return model
Versioned Artifacts
from typing import Annotated
from zenml import step, ArtifactConfig
import pandas as pd
@step
def create_versioned_data () -> Annotated[
pd.DataFrame,
ArtifactConfig(
name = "experiment_data" ,
version = "1.2.0" ,
tags = [ "experiment" ]
)
]:
return pd.DataFrame({ "data" : [ 1 , 2 , 3 ]})
Notes
ArtifactConfig is typically used with Python’s Annotated type hint to attach configuration to step outputs.
The {date} and {time} placeholders are automatically available and will be replaced with the current date and time.
For custom placeholders, provide substitution values using the substitutions parameter in the @step decorator or when calling step.with_options().
If you don’t specify an artifact name, ZenML will use the output parameter name from the step function.
@step Learn about creating steps
ExternalArtifact Use external artifacts as inputs
save_artifact Manually save artifacts
log_metadata Log artifact metadata