Skip to main content

Overview

The folding_input module defines the data structures for AlphaFold 3 inputs, including protein chains, RNA chains, DNA chains, ligands, and templates. It handles JSON serialization/deserialization and mmCIF conversion.

Input Class

Main dataclass representing an AlphaFold 3 prediction input.
@dataclasses.dataclass(frozen=True, slots=True, kw_only=True)
class Input:
    name: str
    chains: Sequence[ProteinChain | RnaChain | DnaChain | Ligand]
    rng_seeds: Sequence[int]
    bonded_atom_pairs: Sequence[tuple[BondAtomId, BondAtomId]] | None = None
    user_ccd: str | None = None
name
str
required
The name of the target structure.
chains
Sequence[ProteinChain | RnaChain | DnaChain | Ligand]
required
List of molecular chains (protein, RNA, DNA, or ligands).
rng_seeds
Sequence[int]
required
Random number generator seeds, one for each model execution. Must have at least one seed.
bonded_atom_pairs
Sequence[tuple[BondAtomId, BondAtomId]] | None
List of bonded atom pairs. Each atom defined by (chain_id, res_id, atom_name). Residue IDs are 1-indexed.
user_ccd
str | None
User-defined chemical component dictionary in CIF format for custom ligands.

Properties

protein_chains
Sequence[ProteinChain]
Filtered list of protein chains only.
rna_chains
Sequence[RnaChain]
Filtered list of RNA chains only.
dna_chains
Sequence[DnaChain]
Filtered list of DNA chains only.
ligands
Sequence[Ligand]
Filtered list of ligands only.

Class Methods

from_json

@classmethod
def from_json(
    cls, 
    json_str: str, 
    json_path: pathlib.Path | None = None
) -> Self
Loads Input from AlphaFold JSON string. Supports both alphafold3 and alphafoldserver dialects.
json_str
str
required
JSON string in AlphaFold 3 format.
json_path
pathlib.Path | None
Path to JSON file for resolving relative paths.

from_mmcif

@classmethod
def from_mmcif(
    cls, 
    mmcif_str: str, 
    ccd: chemical_components.Ccd
) -> Self
Loads Input from mmCIF string. Note: RNG seeds are randomly sampled since mmCIFs don’t store them.
mmcif_str
str
required
mmCIF formatted structure string.
ccd
chemical_components.Ccd
required
Chemical components dictionary.

from_alphafoldserver_fold_job

@classmethod
def from_alphafoldserver_fold_job(
    cls, 
    fold_job: Mapping[str, Any]
) -> Self
Constructs Input from AlphaFoldServer fold job format.

Instance Methods

to_json

def to_json(self) -> str
Converts Input to AlphaFold JSON string format with proper formatting.

to_structure

def to_structure(
    self, 
    ccd: chemical_components.Ccd
) -> structure.Structure
Converts Input to a Structure object. Note: RNG seeds are not preserved.

sanitised_name

def sanitised_name(self) -> str
Returns sanitized name safe for use as filename (replaces spaces, removes special characters).

fill_missing_fields

def fill_missing_fields(self) -> Self
Fills missing MSA and template fields with default empty values.

ProteinChain Class

Represents a protein chain input.
class ProteinChain:
    def __init__(
        self,
        *,
        id: str,
        sequence: str,
        ptms: Sequence[tuple[str, int]],
        description: str | None = None,
        paired_msa: str | None = None,
        unpaired_msa: str | None = None,
        templates: Sequence[Template] | None = None,
    )
id
str
required
Unique protein chain identifier (must be uppercase letter).
sequence
str
required
Amino acid sequence (single-letter codes, only letters).
ptms
Sequence[tuple[str, int]]
required
Post-translational modifications as list of (modification_type, residue_index). Indices are 1-based.
description
str | None
Optional textual description of the protein chain.
paired_msa
str | None
Paired A3M-formatted MSA. If None, must be filled by data pipeline. Empty string means custom MSA with no sequences.
unpaired_msa
str | None
Unpaired A3M-formatted MSA. If None, must be filled by data pipeline. Empty string means custom MSA with no sequences.
templates
Sequence[Template] | None
List of structural templates (max 20). If None, must be filled by data pipeline. Empty list means no templates.

Properties

sequence
str
Single-letter sequence taking modifications into account (uses ‘X’ for unknown residues).
ptms
Sequence[tuple[str, int]]
Post-translational modifications.
paired_msa
str | None
Paired MSA in A3M format.
unpaired_msa
str | None
Unpaired MSA in A3M format.
templates
Sequence[Template] | None
Structural templates.

Methods

to_ccd_sequence

def to_ccd_sequence(self) -> Sequence[str]
Converts to sequence of CCD (Chemical Component Dictionary) codes.

to_dict

def to_dict(
    self, 
    seq_id: str | Sequence[str] | None = None
) -> Mapping[str, Mapping[str, Any]]
Converts ProteinChain to AlphaFold JSON dict.

RnaChain Class

Represents an RNA chain input.
class RnaChain:
    def __init__(
        self,
        *,
        id: str,
        sequence: str,
        modifications: Sequence[tuple[str, int]],
        description: str | None = None,
        unpaired_msa: str | None = None,
    )
id
str
required
Unique RNA chain identifier (must be uppercase letter).
sequence
str
required
RNA sequence (single-letter codes, only letters).
modifications
Sequence[tuple[str, int]]
required
RNA modifications as list of (modification_type, residue_index). Indices are 1-based.
description
str | None
Optional textual description.
unpaired_msa
str | None
Unpaired A3M-formatted MSA. If None, must be filled by data pipeline.

Properties

sequence
str
Single-letter sequence taking modifications into account (uses ‘N’ for unknown residues).
modifications
Sequence[tuple[str, int]]
RNA modifications.
unpaired_msa
str | None
Unpaired MSA in A3M format.

DnaChain Class

Represents a single-strand DNA chain input.
class DnaChain:
    def __init__(
        self,
        *,
        id: str,
        sequence: str,
        modifications: Sequence[tuple[str, int]],
        description: str | None = None,
    )
id
str
required
Unique DNA chain identifier (must be uppercase letter).
sequence
str
required
DNA sequence (single-letter codes, only letters).
modifications
Sequence[tuple[str, int]]
required
DNA modifications as list of (modification_type, residue_index). Indices are 1-based.
description
str | None
Optional textual description.

Properties

sequence
str
Single-letter sequence taking modifications into account (uses ‘N’ for unknown residues).

Ligand Class

Represents a ligand (small molecule) input.
@dataclasses.dataclass(frozen=True, slots=True, kw_only=True)
class Ligand:
    id: str
    ccd_ids: Sequence[str] | None = None
    smiles: str | None = None
    description: str | None = None
id
str
required
Unique ligand “chain” identifier.
ccd_ids
Sequence[str] | None
Chemical Component Dictionary IDs. Either ccd_ids or smiles must be set (not both).
smiles
str | None
SMILES representation of ligand. Either ccd_ids or smiles must be set (not both).
description
str | None
Optional textual description.

Template Class

Represents a structural template for protein chains.
class Template:
    def __init__(
        self, 
        *, 
        mmcif: str, 
        query_to_template_map: Mapping[int, int]
    )
mmcif
str
required
Structural template in mmCIF format (should have only one protein chain).
query_to_template_map
Mapping[int, int]
required
Mapping from query residue index to template residue index.

Properties

mmcif
str
mmCIF string of the template structure.
query_to_template_map
Mapping[int, int]
Query-to-template residue mapping.

Usage Examples

Creating a Simple Protein Input

from alphafold3.common import folding_input

# Create protein chain
protein = folding_input.ProteinChain(
    id='A',
    sequence='MKFLKFSLLTAVLLSVVFAFSSCGDDDDTYPYDVPDYAIEGIFHATIKHNIMYKYSSKT',
    ptms=[],  # No post-translational modifications
    description='Example protein'
)

# Create input
fold_input = folding_input.Input(
    name='my_protein',
    chains=[protein],
    rng_seeds=[42, 123, 456],  # Multiple seeds for sampling
)

Creating a Complex with Ligand

# Protein chain
protein = folding_input.ProteinChain(
    id='A',
    sequence='MKFLKFSLLTAVLLSVVFAFSSCGDDDDTYPYDVPDYA',
    ptms=[('SEP', 10)],  # Phosphoserine at position 10
)

# Ligand by CCD code
ligand = folding_input.Ligand(
    id='B',
    ccd_ids=['ATP'],  # Adenosine triphosphate
    description='ATP substrate'
)

# Create input with bonded atoms
fold_input = folding_input.Input(
    name='protein_ligand_complex',
    chains=[protein, ligand],
    rng_seeds=[42],
    bonded_atom_pairs=[
        (('A', 15, 'SG'), ('B', 1, 'PG')),  # Cysteine SG bonded to ATP
    ]
)

Loading from JSON

import pathlib

# Load from file
fold_input = folding_input.Input.from_json(
    json_str=pathlib.Path('input.json').read_text(),
    json_path=pathlib.Path('input.json')
)

# Access chains
print(f"Protein chains: {len(fold_input.protein_chains)}")
print(f"Ligands: {len(fold_input.ligands)}")

Saving to JSON

# Save input to JSON
json_str = fold_input.to_json()
with open('output.json', 'w') as f:
    f.write(json_str)

JSON Format

AlphaFold 3 JSON Dialect

{
  "dialect": "alphafold3",
  "version": 4,
  "name": "my_structure",
  "modelSeeds": [42, 123],
  "sequences": [
    {
      "protein": {
        "id": "A",
        "sequence": "MKFLKFSLLTAVLLSVVFAFSSCGDDDDTYPYDVPDYA",
        "modifications": [
          {"ptmType": "SEP", "ptmPosition": 10}
        ],
        "unpairedMsa": null,
        "pairedMsa": null,
        "templates": null
      }
    },
    {
      "ligand": {
        "id": "B",
        "ccdCodes": ["ATP"]
      }
    }
  ],
  "bondedAtomPairs": [
    [["A", 15, "SG"], ["B", 1, "PG"]]
  ]
}

Constants

JSON_DIALECT: Final[str] = 'alphafold3'
JSON_VERSIONS: Final[tuple[int, ...]] = (1, 2, 3, 4)
JSON_VERSION: Final[int] = 4

ALPHAFOLDSERVER_JSON_DIALECT: Final[str] = 'alphafoldserver'
ALPHAFOLDSERVER_JSON_VERSION: Final[int] = 1

Type Aliases

BondAtomId: TypeAlias = tuple[str, int, str]  # (chain_id, res_id, atom_name)

See Also

Build docs developers (and LLMs) love