class DatasiteOwnerSyncer(BaseModelCallbackMixin): """Responsible for downloading files and checking permissions""" def sync(self, peer_emails: list[str], recompute_hashes: bool = True): """Pull proposed file changes from peers""" for peer_email in peer_emails: msg = self.pull_and_process_next_proposed_filechange(peer_email) if msg: self.handle_proposed_filechange_events_message(peer_email, msg) def check_write_permission(self, sender_email: str, path: str) -> bool: """Check if sender has write access to the given path""" self.perm_context._reload() return self.perm_context.open(path).has_write_access(sender_email) def handle_proposed_filechange_events_message( self, sender_email: str, proposed_events_message: ProposedFileChangesMessage ): # Filter to only changes sender has permission to make allowed_changes = [ change for change in proposed_events_message.proposed_file_changes if self.check_write_permission(sender_email, str(change.path_in_datasite)) ] if allowed_changes: # Process and accept allowed changes self.event_cache.process_proposed_events_message(...)
Following Principle 7: Manual-review-first, the default is manual approval:
from syft_job import get_clientclient = get_client("/path/to/syftbox", "[email protected]")# View pending jobsfor job in client.jobs: if job.status == "inbox": print(f"Job from {job.submitted_by}: {job.name}") # Review job contents print(open(job.location / "run.sh").read()) # Approve if safe job.approve()
Automatic approval when jobs match specific criteria:
from syft_client.job_auto_approval import create_approval_policy# Auto-approve jobs matching exact script from specific userspolicy = create_approval_policy( required_scripts={"run.sh": "#!/bin/bash\necho 'hello'"}, allowed_users=["[email protected]"], auto_approve=True)# Policy runs automatically in background
See job auto-approval in syft_client/job_auto_approval.py
Following Principle 2: File-permission-first, job-policy second, no other permission system exists. All access control is through file permissions or job policies.
Following Principle 11: MapReduce-first, all interactions are viewed through the MapReduce lens:Map phase: Submit same job to multiple data owners Reduce phase: Aggregate results locally
# Map: Submit to multiple data ownersdata_owners = ["[email protected]", "[email protected]", "[email protected]"]jobs = []for owner in data_owners: job_dir = client.submit_python_job( user=owner, code_path="count_customers.py", job_name=f"Count for {owner}" ) jobs.append((owner, job_dir))# Wait for results...# Reduce: Aggregate resultstotal = 0for owner, job_dir in jobs: result = pd.read_csv(job_dir / "outputs" / "count.csv") total += result["count"].sum()print(f"Total customers across all owners: {total}")