The register_artifact() function allows you to register existing data stored in the artifact store as a ZenML artifact without uploading it again.
Signature
def register_artifact (
folder_or_file_uri : str ,
name : str ,
version : Optional[Union[ int , str ]] = None ,
artifact_type : Optional[ArtifactType] = None ,
tags : Optional[List[ str ]] = None ,
has_custom_name : bool = True ,
artifact_metadata : Dict[ str , MetadataType] = {},
) -> ArtifactVersionResponse
Parameters
The full URI within the artifact store to the folder or file. Must be within the artifact store bounds.
The name of the artifact.
The version of the artifact. If not provided, a new auto-incremented version will be used.
The artifact type. If not given, defaults to DATA.
Tags to associate with the artifact.
Whether the artifact name is custom and should be listed in the dashboard “Artifacts” tab.
artifact_metadata
Dict[str, MetadataType]
default: "{}"
Metadata dictionary to attach to the artifact version.
Returns
The registered artifact version response object.
Examples
Register a File
from zenml import register_artifact
from zenml import Client
client = Client()
artifact_store = client.active_stack.artifact_store
# Get the full path to your file in the artifact store
file_uri = f " { artifact_store.path } /my_data/dataset.csv"
artifact = register_artifact(
folder_or_file_uri = file_uri,
name = "external_dataset"
)
print ( f "Registered artifact: { artifact.id } " )
Register a Directory
from zenml import register_artifact
from zenml import Client
client = Client()
artifact_store = client.active_stack.artifact_store
# Register an entire directory
folder_uri = f " { artifact_store.path } /model_checkpoints/run_123"
artifact = register_artifact(
folder_or_file_uri = folder_uri,
name = "model_checkpoints" ,
version = "run_123"
)
from zenml import register_artifact
from zenml.enums import ArtifactType
from zenml import Client
client = Client()
artifact_store = client.active_stack.artifact_store
data_uri = f " { artifact_store.path } /external/production_data.parquet"
artifact = register_artifact(
folder_or_file_uri = data_uri,
name = "production_dataset" ,
artifact_type = ArtifactType. DATA ,
tags = [ "production" , "validated" , "2024-01" ],
artifact_metadata = {
"source" : "data_warehouse" ,
"extracted_at" : "2024-01-15T10:00:00Z" ,
"row_count" : 1000000 ,
"quality_score" : 0.98
}
)
Register Model Weights
from zenml import register_artifact
from zenml.enums import ArtifactType
from zenml import Client
client = Client()
artifact_store = client.active_stack.artifact_store
# Register pre-trained model weights
weights_uri = f " { artifact_store.path } /pretrained/bert_weights"
artifact = register_artifact(
folder_or_file_uri = weights_uri,
name = "bert_base_weights" ,
version = "pretrained" ,
artifact_type = ArtifactType. MODEL ,
tags = [ "bert" , "pretrained" , "base" ],
artifact_metadata = {
"model_type" : "BERT" ,
"parameters" : "110M" ,
"training_data" : "BooksCorpus + Wikipedia"
}
)
Register in a Step
from zenml import step, register_artifact, Client
import os
@step
def register_external_data () -> None :
client = Client()
artifact_store = client.active_stack.artifact_store
# Path to existing data in artifact store
data_uri = f " { artifact_store.path } /imports/daily_data.csv"
# Check if file exists
if artifact_store.exists(data_uri):
artifact = register_artifact(
folder_or_file_uri = data_uri,
name = "daily_import" ,
tags = [ "daily" , "imported" ]
)
print ( f "Registered: { artifact.name } " )
else :
print ( f "File not found: { data_uri } " )
Register Multiple Files
from zenml import register_artifact, Client
import os
client = Client()
artifact_store = client.active_stack.artifact_store
base_path = f " { artifact_store.path } /batch_imports"
# Register multiple datasets
for i in range ( 1 , 6 ):
file_uri = f " { base_path } /batch_ { i } .parquet"
if artifact_store.exists(file_uri):
artifact = register_artifact(
folder_or_file_uri = file_uri,
name = "batch_data" ,
version = f "batch_ { i } " ,
tags = [ "batch" , f "batch_ { i } " ]
)
print ( f "Registered batch { i } " )
Register and Link to Model
from zenml import step, register_artifact, link_artifact_to_model
from zenml import Client, Model
@step ( model = Model( name = "my_model" , version = "1.0" ))
def register_and_link () -> None :
client = Client()
artifact_store = client.active_stack.artifact_store
# Register artifact
data_uri = f " { artifact_store.path } /validation/test_set.csv"
artifact = register_artifact(
folder_or_file_uri = data_uri,
name = "test_dataset"
)
# Link to model
link_artifact_to_model(artifact)
Version Management
from zenml import register_artifact, Client
from datetime import datetime
client = Client()
artifact_store = client.active_stack.artifact_store
# Register with date-based versioning
today = datetime.now().strftime( "%Y-%m- %d " )
data_uri = f " { artifact_store.path } /daily/data_ { today } .csv"
artifact = register_artifact(
folder_or_file_uri = data_uri,
name = "daily_snapshot" ,
version = today,
tags = [ "daily" , today]
)
Register Cloud Storage Data
from zenml import register_artifact
from zenml import Client
# For S3-based artifact store
client = Client()
artifact_store = client.active_stack.artifact_store
# Register data already in S3
s3_uri = f " { artifact_store.path } /data-lake/users/demographics.parquet"
artifact = register_artifact(
folder_or_file_uri = s3_uri,
name = "user_demographics" ,
tags = [ "users" , "demographics" , "pii" ],
artifact_metadata = {
"source" : "data_lake" ,
"contains_pii" : True ,
"anonymized" : False
}
)
Use Cases
Import existing data that was uploaded directly to the artifact store
Register manual uploads from data engineering teams
Track external datasets stored in your artifact store
Version control data that was created outside ZenML
Integrate legacy data into ZenML workflows
Register pre-trained models or checkpoints
Catalog data lake contents in ZenML
Important Notes
The URI must be within the artifact store bounds (start with the artifact store path)
The data is not moved or copied - only metadata is registered in ZenML
The artifact uses the PreexistingDataMaterializer which returns a pathlib.Path when loaded
Changes to the underlying files are not tracked by ZenML
The URI is validated to ensure it points to the artifact store
Differences from save_artifact
Feature register_artifact()save_artifact()Data location Already in artifact store Uploaded by ZenML Use case Register existing data Save new data Materializer PreexistingDataMaterializer Auto-selected Load behavior Returns Path object Returns original type Metadata extraction Manual only Automatic + manual
Error Handling
from zenml import register_artifact, Client
client = Client()
artifact_store = client.active_stack.artifact_store
data_uri = f " { artifact_store.path } /external/data.csv"
try :
artifact = register_artifact(
folder_or_file_uri = data_uri,
name = "external_data"
)
print ( "Registration successful" )
except FileNotFoundError :
print ( f "URI is outside artifact store bounds" )
except RuntimeError as e:
print ( f "URI already registered: { e } " )
save_artifact Upload and save new artifacts
load_artifact Load registered artifacts
link_artifact_to_model Link artifacts to models