Source code for atomworks.io.common
from __future__ import annotations
import copy
import hashlib
from collections import OrderedDict
from collections.abc import Callable, Iterable, Iterator
from functools import lru_cache, wraps
from typing import Any
import numpy as np
from toolz.curried import compose, reduce
[docs]
def exists(obj: Any) -> bool:
return obj is not None
[docs]
def default(obj: Any, default: Any) -> Any:
return obj if exists(obj) else default
[docs]
def deduplicate_iterator(iterator: Iterable) -> Iterator:
"""Deduplicate an iterator while preserving order."""
return iter(OrderedDict.fromkeys(iterator))
[docs]
def to_hashable(element: Any) -> Any:
"""Convert an element to a hashable type."""
return element if isinstance(element, int | str | np.integer | np.str_) else tuple(element)
[docs]
def sum_string_arrays(*objs: np.ndarray | str) -> np.ndarray:
"""
Sum a list of string arrays / strings into a single string array by concatenating them and
determining the shortest string length to set as dtype.
"""
return reduce(np.char.add, objs).astype(object).astype(str)
[docs]
def not_isin(element: np.ndarray, array: np.ndarray, **isin_kwargs) -> np.ndarray:
"""Like `~np.isin`, but more efficient."""
return np.isin(element, array, invert=True, **isin_kwargs)
[docs]
def listmap(func: Callable, *iterables) -> list:
"""Like `map`, but returns a list instead of an iterator."""
return compose(list, map)(func, *iterables)
[docs]
def immutable_lru_cache(maxsize: int = 128, typed: bool = False, deepcopy: bool = True) -> Callable:
"""An immutable version of `lru_cache` for caching functions that return mutable objects."""
copy_func = copy.deepcopy if deepcopy else copy.copy
def decorator(func: Callable) -> Callable:
cached_func = lru_cache(maxsize=maxsize, typed=typed)(func)
@wraps(func)
def wrapper(*args, **kwargs) -> Any:
return copy_func(cached_func(*args, **kwargs))
return wrapper
return decorator
[docs]
class KeyToIntMapper:
"""
Maps keys to unique integers based on the order of the first appearance of the key.
This is useful for mapping id's such as `chain_id`, `chain_entity`, `molecule_iid`, etc.
to integers.
Example:
```python
chain_id_to_int = KeyToIntMapper()
chain_id_to_int("A") # 0
chain_id_to_int("C") # 1
chain_id_to_int("A") # 0
chain_id_to_int("B") # 2
```
"""
def __init__(self):
self.key_to_id = {}
self.next_id = 0
def __call__(self, value: Any) -> int:
if value not in self.key_to_id:
self.key_to_id[value] = self.next_id
self.next_id += 1
return self.key_to_id[value]
[docs]
def md5_hash_string(s: str, length: int = 32) -> str:
"""Generate an MD5 hash of a string and return the first `length` characters."""
full_hash = hashlib.md5(s.encode("utf-8")).hexdigest()
return full_hash[:length]