Skip to main content

Description

Returns the URL to the storage location of a data file or directory tracked in a DVC repository. This function provides direct access to the remote storage URL without downloading the file.
This function does not check for the actual existence of the file in remote storage. It only returns the expected URL based on DVC metadata.
For Git repositories, HEAD is used unless a rev argument is supplied. The default remote is tried unless a remote argument is supplied.

Signature

dvc.api.get_url(
    path: str,
    repo: Optional[str] = None,
    rev: Optional[str] = None,
    remote: Optional[str] = None,
    config: Optional[dict[str, Any]] = None,
    remote_config: Optional[dict[str, Any]] = None,
) -> str

Parameters

path
str
required
Location and filename of the target, relative to the root of the repository.
path="data/train.csv"
path="models/weights.h5"
path="features/"
repo
str
default:"None"
Location of the DVC project or Git repository. Defaults to the current DVC project (found by walking up from the current working directory).Can be:
  • A URL to a Git repository
  • A local file system path
  • Both HTTP and SSH protocols are supported for online Git repos
repo="https://github.com/iterative/example-get-started"
repo="[email protected]:user/project.git"
repo="/path/to/local/repo"
rev
str
default:"None"
Any Git revision such as a branch or tag name, a commit hash, or a DVC experiment name.
  • Defaults to HEAD for Git repositories
  • If repo is not a Git repo, this option is ignored
rev="main"
rev="v1.0.0"
rev="abc123def"
rev="exp-tuned-model"
remote
str
default:"None"
Name of the DVC remote used to form the returned URL string.
  • Defaults to the default remote of the repository
  • For local projects, the cache is tried before the default remote
remote="myremote"
remote="s3-storage"
remote="azure-backup"
config
dict
default:"None"
Config dictionary to be passed to the DVC repository.
config={"cache": {"type": "symlink"}}
remote_config
dict
default:"None"
Remote config dictionary to be passed to the DVC repository.
remote_config={
    "url": "s3://mybucket/cache",
    "region": "us-west-2"
}

Returns

url
str
The URL to the file or directory in remote storage. The format depends on the remote type:
  • S3: s3://bucket-name/path/to/file
  • GCS: gs://bucket-name/path/to/file
  • Azure: azure://container/path/to/file
  • HTTP: https://server.com/path/to/file
  • SSH: ssh://user@server/path/to/file
  • Local: file:///path/to/file

Raises

OutputNotFoundError
exception
Raised when the file is not tracked by DVC.
NoRemoteError
exception
Raised when no remote is configured or specified.
NoRemoteInExternalRepoError
exception
Raised when accessing an external repository with no remote configured.

Examples

Basic Usage

import dvc.api

url = dvc.api.get_url(
    'data/model.pkl',
    repo='https://github.com/iterative/example-get-started'
)
print(url)
# Output: s3://dvc-public/remote/example-get-started/a3/04afb96060aad90176268345e10355

Get URL from Specific Remote

import dvc.api

# Specify which remote to use
url = dvc.api.get_url(
    'data/features.csv',
    remote='s3-backup'
)
print(url)

Access Different Versions

import dvc.api

# Get URL from production branch
prod_url = dvc.api.get_url(
    'models/classifier.pkl',
    rev='production'
)

# Get URL from development branch
dev_url = dvc.api.get_url(
    'models/classifier.pkl',
    rev='development'
)

print(f"Production model: {prod_url}")
print(f"Development model: {dev_url}")

Download with External Tool

import dvc.api
import requests

# Get URL and download with requests
url = dvc.api.get_url('data/dataset.csv')

if url.startswith('https://'):
    response = requests.get(url)
    with open('local_dataset.csv', 'wb') as f:
        f.write(response.content)

Use with AWS CLI

import dvc.api
import subprocess

# Get S3 URL
url = dvc.api.get_url(
    'data/large_file.bin',
    repo='https://github.com/user/ml-project'
)

if url.startswith('s3://'):
    # Download using AWS CLI
    subprocess.run(['aws', 's3', 'cp', url, 'local_file.bin'])

Check Multiple Files

import dvc.api

files = ['data/train.csv', 'data/test.csv', 'data/validation.csv']

for file_path in files:
    try:
        url = dvc.api.get_url(file_path)
        print(f"{file_path}: {url}")
    except Exception as e:
        print(f"{file_path}: Error - {e}")

Get Directory URL

import dvc.api

# Get URL for an entire directory
dir_url = dvc.api.get_url(
    'data/images/',
    repo='https://github.com/user/vision-project'
)
print(f"Directory URL: {dir_url}")

From Specific Tag

import dvc.api

# Get URL from a tagged release
url = dvc.api.get_url(
    'models/model-v2.pkl',
    rev='v2.0.0',
    repo='https://github.com/company/ml-models'
)
print(f"Release v2.0.0 model URL: {url}")

Error Handling

import dvc.api
from dvc.exceptions import OutputNotFoundError
from dvc.config import NoRemoteError

try:
    url = dvc.api.get_url('data/file.csv')
    print(f"URL: {url}")
except OutputNotFoundError:
    print("File is not tracked by DVC")
except NoRemoteError:
    print("No remote configured")
except Exception as e:
    print(f"Error: {e}")

Custom Remote Configuration

import dvc.api

url = dvc.api.get_url(
    'data/dataset.parquet',
    remote='custom-s3',
    remote_config={
        'url': 's3://my-custom-bucket/dvc-cache',
        'region': 'eu-west-1'
    }
)
print(url)

Generate Presigned URL (S3)

import dvc.api
import boto3
from urllib.parse import urlparse

# Get S3 URL from DVC
url = dvc.api.get_url('data/private_data.csv')

# Parse S3 URL
parsed = urlparse(url)
bucket = parsed.netloc
key = parsed.path.lstrip('/')

# Generate presigned URL
s3_client = boto3.client('s3')
presigned_url = s3_client.generate_presigned_url(
    'get_object',
    Params={'Bucket': bucket, 'Key': key},
    ExpiresIn=3600
)

print(f"Presigned URL (expires in 1 hour): {presigned_url}")

Use Cases

Direct Downloads

Download files using external tools like wget, curl, or AWS CLI.

URL Sharing

Share direct URLs to data with team members or services.

Integration

Integrate DVC-tracked data with other tools and services.

Presigned URLs

Generate temporary access URLs for private cloud storage.

Remote Storage URL Formats

url = dvc.api.get_url('data.csv', remote='s3')
# Returns: s3://bucket-name/path/to/hash

Important Notes

File Existence: This function does NOT verify that the file actually exists in remote storage. It only constructs and returns the expected URL based on DVC metadata.
Cache Priority: For local projects, DVC will try to get the URL from the local cache before checking the default remote.
Direct Access: The returned URL can often be used directly with cloud provider tools (aws, gsutil, az) or HTTP clients for downloading.

Best Practices

Since get_url() doesn’t check if the file exists, verify before using:
import dvc.api
import requests

url = dvc.api.get_url('data.csv')

# Verify file exists (for HTTP URLs)
if url.startswith('http'):
    response = requests.head(url)
    if response.status_code == 200:
        print("File exists")
    else:
        print("File not found in storage")
For very large files, getting the URL allows you to use optimized download tools:
import dvc.api
import subprocess

url = dvc.api.get_url('large_dataset.tar.gz')

# Use aria2c for parallel downloads
subprocess.run(['aria2c', '-x', '16', '-s', '16', url])
Parse URLs appropriately based on storage type:
import dvc.api
from urllib.parse import urlparse

url = dvc.api.get_url('data.csv')
parsed = urlparse(url)

if parsed.scheme == 's3':
    print(f"S3 bucket: {parsed.netloc}")
    print(f"S3 key: {parsed.path}")
elif parsed.scheme == 'gs':
    print(f"GCS bucket: {parsed.netloc}")
elif parsed.scheme in ['http', 'https']:
    print(f"HTTP URL: {url}")
Remember that accessing the URL may require authentication:
import dvc.api

# Get URL (may be private)
url = dvc.api.get_url('private_data.csv', remote='s3')

# Ensure AWS credentials are configured
# export AWS_ACCESS_KEY_ID=...
# export AWS_SECRET_ACCESS_KEY=...

# Or use boto3 session
import boto3
session = boto3.Session(profile_name='myprofile')

Comparison with Other Functions

FunctionPurposeDownloads Data
get_url()Get remote URLNo
read()Read file contentsYes (in memory)
open()Stream file contentsYes (streaming)
import dvc.api

# get_url() - Just returns the URL
url = dvc.api.get_url('data.csv')
print(url)  # s3://bucket/path/hash

# read() - Downloads and returns contents
data = dvc.api.read('data.csv')
print(len(data))  # Size in bytes

# open() - Downloads and streams
with dvc.api.open('data.csv') as f:
    first_line = f.readline()

read()

Read complete file contents

open()

Stream file with context manager

DVCFileSystem

File system interface

Build docs developers (and LLMs) love