.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/plot_basics.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_plot_basics.py: CIFUtils: User Example ====================== This example demonstrates how to use the `atomworks.io` package to load, inspect, and manipulate mmCIF structure files. You'll see how to parse a structure, visualize it, and perform basic analysis. .. GENERATED FROM PYTHON SOURCE LINES 10-45 .. code-block:: Python """ ## 1.1 Loading from mmCIF We start by loading a structure from an mmCIF file using `atomworks.io.parse`. This function supports various file formats and allows you to specify options such as which assembly to build, whether to add missing atoms, and more. """ import io from biotite.database import rcsb from atomworks.io import parse from atomworks.io.utils.testing import get_pdb_path from atomworks.io.utils.visualize import view def get_example_path_or_buffer(pdb_id: str) -> str | io.StringIO: try: # ... if file is locally available return get_pdb_path(pdb_id) except FileNotFoundError: # ... otherwise, fetch the file from RCSB return rcsb.fetch(pdb_id, format="cif") result_dict = parse( filename=get_example_path_or_buffer("6lyz"), build_assembly=["1"], add_missing_atoms=True, remove_waters=True, hydrogen_policy="remove", model=1, ) print("Keys in parsed result:", list(result_dict.keys())) .. rst-class:: sphx-glr-script-out .. code-block:: none Keys in parsed result: ['extra_info', 'metadata', 'chain_info', 'assemblies', 'ligand_info', 'asym_unit'] .. GENERATED FROM PYTHON SOURCE LINES 46-61 .. code-block:: Python """ ## 1.2 Visualizing the asymmetric unit and assembly You can visualize the asymmetric unit or any assembly using the built-in viewer from `atomworks.io`. This is helpful for quickly inspecting the structure and its components. """ asym_unit = result_dict["asym_unit"][0] asym_unit = asym_unit[asym_unit.occupancy > 0] view(asym_unit) assembly = result_dict["assemblies"]["1"][0] assembly = assembly[assembly.occupancy > 0] view(assembly) .. rst-class:: sphx-glr-script-out .. code-block:: none .. GENERATED FROM PYTHON SOURCE LINES 62-73 .. code-block:: Python """ ## 1.3 Inspecting structure metadata The parsed result contains rich metadata, including chain and ligand information, as well as annotation categories. This information is useful for downstream analysis and filtering. """ print("Chain info:", result_dict["chain_info"]) print("Ligand info:", result_dict["ligand_info"]) print("Metadata:", result_dict["metadata"]) print("Annotation categories:", result_dict["asym_unit"][0].get_annotation_categories()) .. rst-class:: sphx-glr-script-out .. code-block:: none Chain info: {'A': {'rcsb_entity': '1', 'chain_type': , 'unprocessed_entity_canonical_sequence': 'KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL', 'unprocessed_entity_non_canonical_sequence': 'KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL', 'is_polymer': True, 'ec_numbers': ['3.2.1.17'], 'res_name': ['LYS', 'VAL', 'PHE', 'GLY', 'ARG', 'CYS', 'GLU', 'LEU', 'ALA', 'ALA', 'ALA', 'MET', 'LYS', 'ARG', 'HIS', 'GLY', 'LEU', 'ASP', 'ASN', 'TYR', 'ARG', 'GLY', 'TYR', 'SER', 'LEU', 'GLY', 'ASN', 'TRP', 'VAL', 'CYS', 'ALA', 'ALA', 'LYS', 'PHE', 'GLU', 'SER', 'ASN', 'PHE', 'ASN', 'THR', 'GLN', 'ALA', 'THR', 'ASN', 'ARG', 'ASN', 'THR', 'ASP', 'GLY', 'SER', 'THR', 'ASP', 'TYR', 'GLY', 'ILE', 'LEU', 'GLN', 'ILE', 'ASN', 'SER', 'ARG', 'TRP', 'TRP', 'CYS', 'ASN', 'ASP', 'GLY', 'ARG', 'THR', 'PRO', 'GLY', 'SER', 'ARG', 'ASN', 'LEU', 'CYS', 'ASN', 'ILE', 'PRO', 'CYS', 'SER', 'ALA', 'LEU', 'LEU', 'SER', 'SER', 'ASP', 'ILE', 'THR', 'ALA', 'SER', 'VAL', 'ASN', 'CYS', 'ALA', 'LYS', 'LYS', 'ILE', 'VAL', 'SER', 'ASP', 'GLY', 'ASN', 'GLY', 'MET', 'ASN', 'ALA', 'TRP', 'VAL', 'ALA', 'TRP', 'ARG', 'ASN', 'ARG', 'CYS', 'LYS', 'GLY', 'THR', 'ASP', 'VAL', 'GLN', 'ALA', 'TRP', 'ILE', 'ARG', 'GLY', 'CYS', 'ARG', 'LEU'], 'res_id': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129'], 'processed_entity_non_canonical_sequence': 'KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL', 'processed_entity_canonical_sequence': 'KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL', 'has_sequence_heterogeneity': False}} Ligand info: {'ligand_of_interest': [], 'has_ligand_of_interest': False} Metadata: {'id': '6lyz', 'method': 'X-RAY_DIFFRACTION', 'deposition_date': '1975-02-01', 'release_date': '1977-04-12', 'resolution': 2.0, 'extra_metadata': None, 'crystallization_details': {'pH': None}} Annotation categories: ['chain_id', 'res_id', 'ins_code', 'res_name', 'hetero', 'atom_name', 'element', 'is_aromatic', 'alt_atom_id', 'occupancy', 'b_factor', 'uses_alt_atom_id', 'is_polymer', 'is_backbone_atom', 'chain_type', 'charge', 'stereo', 'pn_unit_id', 'molecule_id', 'chain_entity', 'pn_unit_entity', 'molecule_entity', 'atomic_number'] .. GENERATED FROM PYTHON SOURCE LINES 74-88 .. code-block:: Python """ ## 1.4 Manipulating AtomArray You can easily extract coordinates for specific atoms or chains, and inspect bond information. This is useful for custom analysis or feature extraction. """ ca = assembly[(assembly.atom_name == "CA") & (assembly.occupancy > 0)] print("Coordinates of all resolved CA atoms:", ca.coord.shape) chain = assembly[assembly.chain_id == "A"] print("Coordinates of chain A (all heavy atoms):", chain.coord.shape) print("Bond array:", assembly.bonds.as_array()) .. rst-class:: sphx-glr-script-out .. code-block:: none Coordinates of all resolved CA atoms: (129, 3) Coordinates of chain A (all heavy atoms): (1001, 3) Bond array: [[ 2 9 1] [ 11 16 1] [ 18 27 1] ... [996 997 1] [997 998 1] [997 999 1]] .. GENERATED FROM PYTHON SOURCE LINES 89-103 .. code-block:: Python """ ## 1.5 Distance computations `biotite.structure` provides convenient functions for distance calculations between atoms or sets of atoms. Here we compute distances between C-alpha atoms. """ import biotite.structure as struc distance = struc.distance(ca.coord[0], ca.coord[1]) print(f"Distance between first two C-alpha atoms: {distance:.2f} Å") distance = struc.distance(ca[0], ca) print(f"Distances between first C-alpha atom and all other C-alpha atoms: {distance}") .. rst-class:: sphx-glr-script-out .. code-block:: none Distance between first two C-alpha atoms: 3.80 Å Distances between first C-alpha atom and all other C-alpha atoms: [ 0. 3.7999492 6.156354 9.661932 12.549521 14.182252 11.581763 10.84263 14.616771 15.219817 13.272157 14.838497 18.069147 17.901442 17.395086 20.602865 19.68099 21.579292 24.697742 24.088991 26.870222 27.57354 24.496967 23.660084 20.183601 20.09242 20.787966 17.43983 15.200652 17.336142 17.041845 13.243525 13.362299 15.886873 14.705508 11.209983 10.677215 8.0477 5.7841344 5.6595535 6.6849732 9.572078 13.11964 16.576756 20.033554 22.90127 26.54439 27.58783 25.418657 22.863508 19.15349 16.59757 13.387169 10.174317 9.402125 13.177384 14.283987 16.653786 19.467123 20.810495 24.450527 24.489876 21.609152 20.13909 21.382933 19.977352 23.561102 24.538763 25.224674 28.811474 29.968405 27.057043 27.373085 24.488192 25.097178 21.882395 22.806473 19.33622 18.493265 16.118 13.694339 13.188874 11.805438 8.704005 7.6456475 5.4546723 8.908026 9.965934 13.512428 14.592234 13.578032 15.495068 18.250061 18.6872 18.320152 21.14647 23.370226 22.798538 23.847483 26.741693 28.212841 29.969715 28.73933 26.48814 23.56441 25.639193 23.927898 21.194921 22.984303 20.622774 21.259388 24.949894 25.389885 23.332045 22.84652 26.0037 28.647352 27.044004 26.817791 23.237877 23.657808 22.825241 19.110613 19.498287 21.972675 21.360188 19.54008 20.864511 20.917368 ] .. GENERATED FROM PYTHON SOURCE LINES 104-119 .. code-block:: Python """ ## 1.6 Efficient neighbor search with CellList For efficient spatial queries, use `CellList` to find atoms within a certain radius. This is useful for contact analysis and neighborhood queries. """ resolved_atom_array = assembly[assembly.occupancy > 0] cell_list = struc.CellList(resolved_atom_array, cell_size=5.0) near_atoms = cell_list.get_atoms(resolved_atom_array[0].coord, radius=4) print(f"Number of atoms within 7 Å of the first atom: {near_atoms.shape[0]}") print(f"Atom indices: {near_atoms}") print(f"Chain IDs: {resolved_atom_array.chain_id[near_atoms]}") print(f"Residue IDs: {resolved_atom_array.res_id[near_atoms]}") print(f"Residue names: {resolved_atom_array.res_name[near_atoms]}") .. rst-class:: sphx-glr-script-out .. code-block:: none Number of atoms within 7 Å of the first atom: 8 Atom indices: [ 4 5 0 1 9 2 3 315] Chain IDs: ['A' 'A' 'A' 'A' 'A' 'A' 'A' 'A'] Residue IDs: [ 1 1 1 1 2 1 1 40] Residue names: ['LYS' 'LYS' 'LYS' 'LYS' 'VAL' 'LYS' 'LYS' 'THR'] .. rst-class:: sphx-glr-timing **Total running time of the script:** (0 minutes 1.283 seconds) .. _sphx_glr_download_auto_examples_plot_basics.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_basics.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_basics.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: plot_basics.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_