Source code for orchestrator.computer.descriptor.kliff

import json
import numpy as np
from ase import Atoms
from .descriptor_base import AtomCenteredDescriptor
from orchestrator.utils.data_standard import METADATA_KEY

from typing import Optional, Union, Any

from kliff.legacy.descriptors import SymmetryFunction, Bispectrum
from kliff.dataset import Configuration


[docs] class KLIFFDescriptor(AtomCenteredDescriptor): """ Leverages the KLIFF library and its built-in descriptors. """ supported_descriptor_types = ['symmetry_function', 'bispectrum']
[docs] def __init__(self, descriptor_type: str, cut_dists: dict[str, float], cut_name: str, hyperparams: Union[str, dict[str, Any]]): """ :param descriptor_type: the type of the descriptors to evaluate. See `supported_descriptor_types` for available options. :type descriptor_type: str :param cut_dists: the cutoff distances for each element pairing. For example: `{'Cu-Cu': 3.5}`. :type cut_dists: dict :param cut_name: Name of the cutoff function, such as `cos`, `P3`, and `P7`. :type cut_name: str :param hyperparams: A dictionary of the hyperparams of the descriptor or a string to select the predefined hyperparams. :type hyperparams: dict or str """ super().__init__() if descriptor_type == 'symmetry_function': fxn_cls = SymmetryFunction self.OUTPUT_KEY = 'kliff_descriptor_symmetry_function' elif descriptor_type == 'bispectrum': fxn_cls = Bispectrum self.OUTPUT_KEY = 'kliff_descriptor_bispectrum' else: raise NotImplementedError( (f'The descriptor type {descriptor_type}' ' is not in the list {self.supported_descriptor_types}')) self._metadata = { 'descriptor_type': descriptor_type, 'cut_dists': cut_dists, # will be JSON-ified by write_input 'cut_name': cut_name, 'hyperparams': hyperparams, } self._init_args = self._metadata # they happen to be the same for this self.descriptor_fxn = fxn_cls( cut_dists=cut_dists, cut_name=cut_name, hyperparams=hyperparams, )
[docs] def compute(self, atoms: Union[list[Atoms], Atoms], **kwargs) -> np.ndarray: """Compute the atomic descriptors for a single supercell. See `.compute_batch` for arguments.""" if isinstance(atoms, Atoms): return self.compute_batch([atoms])[0] elif isinstance(atoms, list): return self.compute_batch(atoms)[0] else: raise RuntimeError( f"Invalid input type of '{type(atoms)}' passed to .compute()")
[docs] def compute_batch(self, list_of_atoms: list[Atoms], **kwargs) -> list[np.ndarray]: """ Computes atomic descriptors for all atomic configurations in the list. :param list_of_atoms: the list of atomic configurations for which to compute the atomic descriptors :type list_of_atoms: list of ASE.Atoms objects :returns: list of descriptors for each atomic configuration from `list_of_atoms` :rtype: list """ results = [] for atoms in list_of_atoms: # NOTE: these are being attached here because ColabFit can't do # nested key extraction. e.g. extracting "cut_name" from # atoms.info[METADATA_KEY][self.OUTPUT_KEY]['cut_name'] for k, v in self._metadata.items(): if k == 'cut_dists': # because it's a dict atoms.info[f'{self.OUTPUT_KEY}_{k}'] = json.dumps(v) else: atoms.info[f'{self.OUTPUT_KEY}_{k}'] = v # to avoid overwriting if METADATA_KEY already exists if METADATA_KEY not in atoms.info: atoms.info[METADATA_KEY] = {} atoms.info[METADATA_KEY][self.OUTPUT_KEY] = self._metadata atoms.info['dummy_energy'] = None # KLIFF Config requires this config = Configuration.from_ase_atoms(atoms, energy_key='dummy_energy') del atoms.info['dummy_energy'] results.append(self.descriptor_fxn.transform(config)[0]) return results
[docs] def get_colabfit_property_definition(self, name: Optional[str] = None ) -> dict[str, Any]: return { 'property-id': 'tag:staff@noreply.colabfit.org,2024-12-09:' f'property/{self.OUTPUT_KEY.replace("_", "-")}', # kim properties don't support "_", use "-" "property-name": self.OUTPUT_KEY.replace('_', '-'), "property-title": "ACSF descriptor", "property-description": "Atom Centered " "Symmetry Functions, as defined by the original BP-NNP work." " This property definition is intended to be used with the " "KLIFF implementation of the ACSF descriptors.", # the fields that make up the descriptor "descriptors": { # example: (N, D) arrays "type": "float", "has-unit": False, "extent": [":", ":"], "required": True, "description": "The per-atom descriptors.", }, "cut-name": { # example: "cos" "type": "string", "has-unit": False, "extent": [], "required": True, "description": "The name of the cutoff function", }, "cut-dists": { # example: {'Cu-Cu': 4.0} JSON-ified "type": "string", "has-unit": True, "extent": [], "required": True, "description": "The dictionary of cutoff distances of each" "species-species bond", }, "hyperparams": { # example: 'set51' "type": "string", "has-unit": False, "extent": [], "required": False, "description": "String description of additional " "hyperparameters. For KLIFF, this will usually be 'set51' " "or 'set30'." }, }
[docs] def get_colabfit_property_map(self, name: Optional[str] = None ) -> dict[str, dict[str, str]]: return { 'descriptors': { 'field': self.OUTPUT_KEY + "_descriptors", 'units': None }, 'cut-name': { 'field': self.OUTPUT_KEY + '_cut_name', 'units': None }, 'cut-dists': { 'field': self.OUTPUT_KEY + '_cut_dists', 'units': 'Ang' }, 'hyperparams': { 'field': self.OUTPUT_KEY + '_hyperparams', 'units': None }, }