Inference Tools#

class atomworks.io.tools.inference.CCDComponent(ccd_code: str, chain_type: atomworks.enums.ChainType | str = 'non-polymer', is_polymer: bool = False, chain_id: str | None = None)[source]#

Bases: LigandComponent

ccd_code: str#
chain_id: str | None = None#
chain_type: ChainType | str = 'non-polymer'#
is_polymer: bool = False#
class atomworks.io.tools.inference.CIFOrPDBFileComponent(path: os.PathLike | _io.StringIO, msa_paths: dict[str, os.PathLike] | None = None, chain_type: atomworks.enums.ChainType | str | None = None, custom_parse_kwargs: dict[str, Any] | None = None)[source]#

Bases: ChemicalComponent

chain_type: ChainType | str | None = None#
custom_parse_kwargs: dict[str, Any] | None = None#
msa_paths: dict[str, PathLike] | None = None#
path: PathLike | StringIO#
class atomworks.io.tools.inference.ChemicalComponent[source]#

Bases: ABC

as_dict() dict[source]#
static from_dict(args_dict: dict) ChemicalComponent[source]#
class atomworks.io.tools.inference.DNA(seq: str | list[str], chain_type: atomworks.enums.ChainType = <ChainType.DNA: 3>, is_polymer: bool | None = None, chain_id: str | None = None, msa_path: os.PathLike | None = None)[source]#

Bases: SequenceComponent

chain_type: ChainType = 3#
class atomworks.io.tools.inference.LigandComponent[source]#

Bases: ChemicalComponent

class atomworks.io.tools.inference.Peptide(seq: str | list[str], chain_type: atomworks.enums.ChainType = <ChainType.POLYPEPTIDE_L: 6>, is_polymer: bool = False, chain_id: str | None = None, msa_path: os.PathLike | None = None)[source]#

Bases: SequenceComponent

chain_type: ChainType = 6#
is_polymer: bool = False#
class atomworks.io.tools.inference.Polymer(seq: str | list[str], chain_type: atomworks.enums.ChainType | None = None, is_polymer: bool = True, chain_id: str | None = None, msa_path: os.PathLike | None = None)[source]#

Bases: SequenceComponent

is_polymer: bool = True#
class atomworks.io.tools.inference.Protein(seq: str | list[str], chain_type: atomworks.enums.ChainType = <ChainType.POLYPEPTIDE_L: 6>, is_polymer: bool | None = None, chain_id: str | None = None, msa_path: os.PathLike | None = None)[source]#

Bases: SequenceComponent

chain_type: ChainType = 6#
class atomworks.io.tools.inference.RNA(seq: str | list[str], chain_type: atomworks.enums.ChainType = <ChainType.RNA: 7>, is_polymer: bool | None = None, chain_id: str | None = None, msa_path: os.PathLike | None = None)[source]#

Bases: SequenceComponent

chain_type: ChainType = 7#
class atomworks.io.tools.inference.SDFComponent(path: os.PathLike | _io.StringIO, chain_type: atomworks.enums.ChainType | str = 'non-polymer', is_polymer: bool = False, chain_id: str | None = None, res_name: str = 'UNL')[source]#

Bases: LigandComponent

chain_id: str | None = None#
chain_type: ChainType | str = 'non-polymer'#
is_polymer: bool = False#
path: PathLike | StringIO#
res_name: str = 'UNL'#
class atomworks.io.tools.inference.SequenceComponent(seq: str | list[str], chain_type: atomworks.enums.ChainType | None = None, is_polymer: bool | None = None, chain_id: str | None = None, msa_path: os.PathLike | None = None)[source]#

Bases: ChemicalComponent

static assert_valid_chain_type(seq: list[str], chain_type: ChainType, allow_other: bool = False) bool[source]#

Asserts that all the CCD codes in the sequence are valid for the given chain type.

Parameters:
  • seq (list[str]) – List of three-letter CCD codes.

  • chain_type (ChainType) – The chain type to check against.

  • allow_other (bool) – If True, allow non-CCD codes (e.g., custom NCAA) to be valid.

Ignore non-CCD codes (e.g., custom NCAA) which are presumed to be valid (and are mapped to “other”)

chain_id: str | None = None#
chain_type: ChainType | None = None#
static from_seq(seq: str | list[str], *, chain_type: ChainType | str = None, is_polymer: bool | None = None) SequenceComponent[source]#
static infer_chain_type(seq: str) ChainType[source]#
is_polymer: bool | None = None#
msa_path: PathLike | None = None#
seq: str | list[str]#
class atomworks.io.tools.inference.SmilesComponent(smiles: str, chain_type: atomworks.enums.ChainType | str = 'non-polymer', is_polymer: bool = False, chain_id: str | None = None, res_name: str = 'UNL')[source]#

Bases: LigandComponent

chain_id: str | None = None#
chain_type: ChainType | str = 'non-polymer'#
is_polymer: bool = False#
res_name: str = 'UNL'#
smiles: str#
atomworks.io.tools.inference.assign_res_name_from_atom_array_hash(atom_array: AtomArray, hash_to_id: KeyToIntMapper) AtomArray[source]#

Assigns a residue name to an array based on its hash.

The residue names will be assigned as L:{id} where id is a unique integer assigned to each hash.

Parameters:
  • ligand_array (AtomArray) – The ligand array to assign a residue name to.

  • ligand_hash_to_id (KeyToIntMapper) – A mapper from ligand hash to ligand ID.

atomworks.io.tools.inference.build_msa_paths_by_chain_id_from_component_list(components: list[ChemicalComponent]) dict[str, PathLike][source]#

Build a dictionary of MSA paths by chain ID from a list of ChemicalComponent objects.

The composed dictionary may be encoded as extra metadata in the CIF file, and ultimately loaded into chain_info through parse.

atomworks.io.tools.inference.ccd_code_to_annotated_atom_array(ccd_code: list[str], chain_id: str, *, chain_type: ChainType | str = None, is_polymer: bool | None = None, ccd_mirror_path: PathLike = None) AtomArray[source]#
atomworks.io.tools.inference.components_to_atom_array(components: list[ChemicalComponent | dict], bonds: list[str] | None = None, return_components: bool = False, custom_residues: dict[str, AtomArray | SDFComponent | dict] | None = None) AtomArray | list[ChemicalComponent][source]#

Build an AtomArray from a list of ChemicalComponent objects and supporting details (bonds, custom residues).

Parameters:
  • components (list[ChemicalComponent | dict]) – List of ChemicalComponent objects or dictionaries that can be converted to ChemicalComponent objects using ChemicalComponent.from_dict().

  • bonds (list[str]) – List of tuples of atom ids to be bonded. We will add them like spoof struct_conn entries, ensuring that we remove leaving groups as appropriate. Bonds tuples must be in the format (1-indexed!): ` (CHAIN_ID / RES_NAME / RES_ID / ATOM_NAME, CHAIN_ID / RES_NAME / RES_ID / ATOM_NAME) ` e.g., [(“A/THR/4/CG”, “D/L:1/0/O13”), (“A/CYS/5/SG”, “A/CYS/137/SG”)]

  • return_components (bool) – If True, return the components list as well as the AtomArray. Useful for e.g., mapping components to generated chain IDs or inferred chain types.

  • custom_residues – A dictionary of custom residues to be used as “spoof” CCD entries. Can be given either as AtomArrays directly or as dictionary specifying paths to CIF files (must include atom names).

NOTE: If manually specifying bonds, we recommend visualizing the bond graph with matplotlib to ensure that the bonds are correctly NOTE: The res_id numbering follows the RCSB convention (1-indexed)

Returns:

The assembled AtomArray, used for visualization or inference.

Return type:

AtomArray

Raises:
  • ValueError – If there are duplicate chain_ids across input Components

  • ValueError – If there are duplicate chain_ids that correspond to non-identical molecular entities.

atomworks.io.tools.inference.read_chai_fasta(fasta_path: Path) list[ChemicalComponent][source]#
atomworks.io.tools.inference.sdf_to_annotated_atom_array(path: StringIO | PathLike, chain_id: str, *, chain_type: ChainType | str = 'non-polymer', is_polymer: bool = False, res_name: str = 'UNL', backend: Literal['openbabel', 'rdkit'] = 'rdkit') AtomArray[source]#
atomworks.io.tools.inference.sequence_to_annotated_atom_array(seq: list[str], chain_id: str, *, chain_type: ChainType | str = None, is_polymer: bool | None = None, ccd_mirror_path: PathLike = None, custom_residues: dict[str, AtomArray] | None = None, **kwargs) AtomArray[source]#
atomworks.io.tools.inference.smiles_to_annotated_atom_array(smiles: str, chain_id: str, *, chain_type: ChainType | str = 'non-polymer', is_polymer: bool = False, backend: Literal['openbabel', 'rdkit'] = 'rdkit', res_name: str = 'UNL') AtomArray[source]#
atomworks.io.tools.inference.standardize_component_keys(component_dict: dict) dict[source]#

Standardize component dictionary keys for compatibility with AF3’s inference API.

Maps:
  • “sequence” -> “seq”

  • “id” -> “chain_id”