Source code for orchestrator.computer.descriptor.quests

from ase import Atoms
import numpy as np
from .descriptor_base import AtomCenteredDescriptor
from orchestrator.utils.data_standard import METADATA_KEY

from typing import Optional, Union, Any, List

from quests.descriptor import get_descriptors, get_descriptors_multicomponent


[docs] class QUESTSDescriptor(AtomCenteredDescriptor): """ Leverages the QUESTS library for model agnostic descriptors. """
[docs] def __init__( self, num_nearest_neighbors: Optional[int] = 32, cutoff: Optional[float] = 5.0, species: Optional[List[str]] = None, ): """ :param num_nearest_neighbors: the number of nearest neighbors considered in calculation. Determines the dimensionality of the quests descriptor: (2*num_nearest_neighbors)-1 :type num_nearest_neighbors: int :param cutoff: the distance in angstroms considered in calculation :type cutoff: float :param species: the species list. If provided, all species-species interactions are computed and concatenated in order. If not provided, the species-agnostic version is used. :type species: list[str] """ super().__init__() self.OUTPUT_KEY = 'quests_descriptor' self.num_nearest_neighbors = num_nearest_neighbors self.cutoff = cutoff if species is None: species = ['species-agnostic'] self.species = species self._metadata = { 'num_nearest_neighbors': num_nearest_neighbors, 'cutoff': cutoff, 'species': species } self._init_args = self._metadata # they happen to be the same for this
[docs] def compute( self, atoms: Union[list[Atoms], Atoms], **kwargs, ) -> np.ndarray: """ Computes the QUESTS descriptors for one configuration of atoms. :param atoms: the atomic structure to compute descriptors for :type atoms: ASE.Atoms object :returns: (N,D) array of D-dimensional QUESTS descriptors corresponding to the N atoms in the atomic configuration where D equals (2*num_nearest_neighbors)-1 """ if isinstance(atoms, Atoms): return self.compute_batch([atoms])[0] elif isinstance(atoms, list): return self.compute_batch(atoms)[0] else: raise RuntimeError( f"Invalid input type '{type(atoms)}' passed to .compute()")
[docs] def compute_batch( self, list_of_atoms: list[Atoms], **kwargs, ) -> list[np.ndarray]: """ Computes the QUESTS descriptors for all configurations in the list. :param list_of_atoms: atomic structures to compute descriptors :type list_of_atoms: list of ASE.Atoms objects :returns: list of (N, D) arrays of D-dimensional QUESTS descriptors corresponding to the descriptors of each atomic configuration of N atoms, where D equals (2*num_nearest_neighbors)-1 :rtype: list """ check_species = self.species != ['species-agnostic'] for atoms in list_of_atoms: # NOTE: these are being attached here because ColabFit can't do # nested key extraction. e.g. extracting "cut_name" from # atoms.info[METADATA_KEY][self.OUTPUT_KEY]['cut_name'] for k, v in self._metadata.items(): atoms.info[f'{self.OUTPUT_KEY}_{k}'] = v # to avoid overwriting if METADATA_KEY already exists if METADATA_KEY not in atoms.info: atoms.info[METADATA_KEY] = {} atoms.info[METADATA_KEY][self.OUTPUT_KEY] = self._metadata species = set(atoms.get_chemical_symbols()) if check_species: for s in species: if s not in self.species: raise RuntimeError( "Invalid species detected. Expected one of" f" {self._metadata['species']}, but got `{s}`") if check_species: results = [ get_descriptors_multicomponent( [atoms], k=self.num_nearest_neighbors, cutoff=self.cutoff, species=self.species, ) for atoms in list_of_atoms ] else: # Optionally compute the species-agnostic version results = [ get_descriptors( [atoms], k=self.num_nearest_neighbors, cutoff=self.cutoff, ) for atoms in list_of_atoms ] return results
[docs] def get_colabfit_property_definition( self, name: Optional[str] = None, ) -> dict[str, Any]: return { 'property-id': 'tag:staff@noreply.colabfit.org,2024-12-09:' f'property/{self.OUTPUT_KEY.replace("_", "-")}', "property-name": self.OUTPUT_KEY.replace("_", "-"), "property-title": "QUESTS descriptor", "property-description": "The concatenation of a list of sorted" "neighbor distances and average triplet bond lengths", "descriptors": { # example: (N,D) arrays "type": "float", "has-unit": False, "extent": [":", ":"], "required": True, "description": "The per-atom descriptors. N is equal to" "the number of atoms and D is (2*num-nearest-neighbors)-1", }, "num-nearest-neighbors": { # example: 32 "type": "int", "has-unit": False, "extent": [], "required": True, "description": "The number of nearest neighbors " "included in the calculation. Determines the dimensionality " "of the quests descriptor: (2*num-nearest-neighbors)-1", }, "cutoff": { # example: 5.0 "type": "float", "has-unit": True, "extent": [], "required": True, "description": "The cutoff distance in calculation", }, "species": { # example: ['C', 'O'] "type": "string", "has-unit": False, "extent": [':'], "required": True, "description": "The chemical species of the descriptor" } }
[docs] def get_colabfit_property_map( self, name: Optional[str] = None, ) -> dict[str, Any]: return { 'descriptors': { 'field': self.OUTPUT_KEY + "_descriptors", 'units': None }, 'num-nearest-neighbors': { 'field': self.OUTPUT_KEY + "_num_nearest_neighbors", 'units': None }, 'cutoff': { 'field': self.OUTPUT_KEY + "_cutoff", 'units': 'Ang' }, 'species': { 'field': self.OUTPUT_KEY + '_species', 'units': None } }