Skip to main content

Overview

SyftBoxConfig is a configuration class that stores essential information about a SyftBox installation, including the location of the SyftBox folder and the user’s email address.

Class Definition

from syft_datasets import SyftBoxConfig
from pathlib import Path

config = SyftBoxConfig(
    syftbox_folder=Path("/path/to/syftbox"),
    email="[email protected]"
)

Fields

syftbox_folder
Path
required
Path to the SyftBox folder on the local filesystem. This is the root directory where all SyftBox data is stored.
email
str
required
Email address associated with the SyftBox datasite. This serves as the unique identifier for the datasite owner.

Properties

private_dir

Returns the path to the private data directory.
private_path = config.private_dir
# Returns: syftbox_folder/private
return
Path
Path to the private directory within the SyftBox folder

public_dir

Returns the path to the public data directory for the configured email.
public_path = config.public_dir
# Returns: syftbox_folder/[email protected]/public
return
Path
Path to the public directory for the user’s datasite

Usage Examples

Basic Configuration

from syft_datasets import SyftBoxConfig
from pathlib import Path

# Create configuration
config = SyftBoxConfig(
    syftbox_folder=Path.home() / "SyftBox",
    email="[email protected]"
)

print(f"SyftBox root: {config.syftbox_folder}")
print(f"User email: {config.email}")
print(f"Private directory: {config.private_dir}")
print(f"Public directory: {config.public_dir}")

Using with SyftDatasetManager

from syft_datasets import SyftBoxConfig, SyftDatasetManager
from pathlib import Path

# Create config
config = SyftBoxConfig(
    syftbox_folder=Path.home() / "SyftBox",
    email="[email protected]"
)

# Use config to initialize manager
manager = SyftDatasetManager.from_config(config)

# Access configuration from manager
print(f"Manager email: {manager.syftbox_config.email}")
print(f"Public dir: {manager.syftbox_config.public_dir}")

Accessing Directories

# Get private data directory
private_dir = config.private_dir
if not private_dir.exists():
    private_dir.mkdir(parents=True, exist_ok=True)

# Get public data directory
public_dir = config.public_dir
if not public_dir.exists():
    public_dir.mkdir(parents=True, exist_ok=True)

# List files in public directory
for item in public_dir.iterdir():
    print(f"Public item: {item.name}")

Configuration from Environment

import os
from pathlib import Path
from syft_datasets import SyftBoxConfig

# Load from environment variables
config = SyftBoxConfig(
    syftbox_folder=Path(os.getenv("SYFTBOX_PATH", Path.home() / "SyftBox")),
    email=os.getenv("SYFTBOX_EMAIL", "[email protected]")
)

Directory Structure

When you create a SyftBoxConfig, it expects and works with the following directory structure:
syftbox_folder/
├── private/                          # Private data (config.private_dir)
│   └── syft_datasets/               # Private dataset metadata
│       └── dataset_name/
│           ├── private_metadata.yaml
│           └── data files...
└── [email protected]/                 # User's datasite
    └── public/                       # Public data (config.public_dir)
        └── syft_datasets/           # Public datasets
            └── dataset_name/
                ├── dataset.yaml
                ├── README.md
                └── mock data files...

Integration with Dataset Manager

The SyftBoxConfig is used internally by SyftDatasetManager to:
  1. Locate datasets: Determine where to store and find dataset files
  2. Manage permissions: Identify the owner when setting access controls
  3. Resolve paths: Convert between SyftBoxURLs and local filesystem paths
  4. Multi-datasite support: Access datasets from multiple datasites
from syft_datasets import SyftDatasetManager, SyftBoxConfig
from pathlib import Path

# Manual initialization
config = SyftBoxConfig(
    syftbox_folder=Path("~/SyftBox"),
    email="[email protected]"
)

manager = SyftDatasetManager.from_config(config)

# The manager uses config to determine paths
dataset = manager.create(
    name="my_data",
    mock_path="./mock",
    private_path="./private"
)

# Dataset locations are determined by config
print(f"Mock data: {dataset.mock_dir}")
# Output: ~/SyftBox/[email protected]/public/syft_datasets/my_data

print(f"Private data: {dataset.private_dir}")
# Output: ~/SyftBox/private/syft_datasets/my_data

Validation

SyftBoxConfig uses Pydantic for validation:
from syft_datasets import SyftBoxConfig
from pathlib import Path

# Valid configuration
config = SyftBoxConfig(
    syftbox_folder=Path("/valid/path"),
    email="[email protected]"
)

# Invalid - missing required fields
try:
    config = SyftBoxConfig()
except ValueError as e:
    print(f"Error: {e}")

# Invalid - wrong types
try:
    config = SyftBoxConfig(
        syftbox_folder="not a Path object",  # Should be Path
        email=123  # Should be str
    )
except ValueError as e:
    print(f"Error: {e}")

Best Practices

Always use pathlib.Path objects for syftbox_folder rather than strings:
# Good
config = SyftBoxConfig(
    syftbox_folder=Path.home() / "SyftBox",
    email="[email protected]"
)

# Avoid
config = SyftBoxConfig(
    syftbox_folder="~/SyftBox",  # String path
    email="[email protected]"
)
Ensure the email is a valid email address to avoid issues with datasite identification:
import re

def is_valid_email(email: str) -> bool:
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, email))

email = "[email protected]"
if is_valid_email(email):
    config = SyftBoxConfig(
        syftbox_folder=Path.home() / "SyftBox",
        email=email
    )
Verify that the SyftBox folder exists before creating the config:
syftbox_path = Path.home() / "SyftBox"

if not syftbox_path.exists():
    syftbox_path.mkdir(parents=True, exist_ok=True)
    print(f"Created SyftBox directory at {syftbox_path}")

config = SyftBoxConfig(
    syftbox_folder=syftbox_path,
    email="[email protected]"
)
Create the config once and reuse it across your application:
# config.py
from pathlib import Path
from syft_datasets import SyftBoxConfig

APP_CONFIG = SyftBoxConfig(
    syftbox_folder=Path.home() / "SyftBox",
    email="[email protected]"
)

# main.py
from config import APP_CONFIG
from syft_datasets import SyftDatasetManager

manager = SyftDatasetManager.from_config(APP_CONFIG)

Build docs developers (and LLMs) love