Source code for orchestrator.simulator.simulator_base

from abc import ABC, abstractmethod
from datetime import datetime
from random import randrange, seed
from ..workflow.factory import workflow_builder
from ..utils.recorder import Recorder
from ..utils.exceptions import UnidentifiedPathError, UnidentifiedStorageError
from ..utils.input_output import ase_glob_read
from ..utils.isinstance import isinstance_no_import


[docs] class Simulator(Recorder, ABC): """ Abstract base class to manage and run simulations (exploration) The simulator class manages the construction and parsing of molecular dynamics calculations using interatomic potentials. The input will typically consist of an initial atomic configuration and calculation parameters (including the potential to use), while the output will include frames or configurations from the simulation as well as information such as the energy of the system, forces on each atom, and/or the stress on the cell, amongst others. :param simulator_args: dictionary of parameters to instantiate the Simulator, such as code_path (executable to use), elements (list of elements present in the simulation), and input_template (the path to an input template to build from) :type simulator_args: dict """
[docs] def __init__(self, simulator_args): """ Abstract base class to manage and run simulations (exploration) :param simulator_args: dictionary of parameters to instantiate the Simulator, such as code_path (executable to use), elements (list of elements present in the simulation), and input_template (the path to an input template to build from) :type simulator_args: dict """ super().__init__() self.simulator_args = simulator_args #: default workflow to use within the Simulator class self.default_wf = workflow_builder.build( 'LOCAL', {'root_directory': './simulator'}, ) # this flag should be set to True externally self.external_setup = False # if external_setup is set to True, then external_func needs to be set # as a function self.external_func = None
[docs] def get_init_configs_from_path(self, config_path, file_ext='.xyz', file_format='extxyz'): """ get the initial configuration for the simulator input from path This function loads the configurations present in the ``config_path`` and all of its sub-directories into a list of ASE Atoms, which is returned. Assumes files are stored in the extended xyz format. This function should only be used if configurations cannot be added to Storage. :param config_path: path of the root directory where configuration files are stored (extended xyz format) :type config_path: str :param file_ext: the file extension. Default is '.xyz' :type file_ext: str :param file_ext: the file format. Default is 'extxyz' :type file_ext: str :returns: dataset as list of Atoms objects :rtype: list """ return ase_glob_read(config_path, file_ext, file_format)
[docs] def run( self, path_type, model_path, input_args, init_config_args, workflow=None, job_details=None, ): """ setup and execute a Simulator calculation Prepare input file and initial configuration. Execute the code (run simulation), returning the ``job_id`` for tracking purposes :param path_type: specifier for the workflow path, to differentiate calculation types :type path_type: str :param model_path: path where the potential file(s) is stored :type model_path: str :param input_args: input arguments to fill out the input template file :type input_args: dict :param init_config_args: dictionary containing information to specify how the configuration should be setup for the run. Key:value pairs are 'make_config': boolean if run() should create the initial configuration [if false, the other keys are not needed], 'config_handle': identifier to retrieve the configuration, 'storage': storage module for configuration options to be retrieved from. Alternatively, set to 'path' if config_handle is a path where configs should be read from, 'random_seed': if selecting the configuration from a set, an int random seed can be specified to enable reproducability :type init_config_args: dict :param workflow: the workflow for managing job submission, if none are supplied, will use the default workflow defined in this class |default| ``None`` :type workflow: Workflow :param job_details: dict that includes any additional parameters for running the job (passed to :meth:`~orchestrator.workflow.workflow_base.Workflow.submit_job`) |default| ``None`` :type job_details: dict :returns: calculation ID :rtype: int """ module_name = self.__class__.__name__ if workflow is None: workflow = self.default_wf if job_details is None: job_details = {} run_path = workflow.make_path(module_name, path_type) self.load_potential(run_path, model_path) make_config = init_config_args.get('make_config', True) if make_config: self.logger.info(f'{module_name} is creating the configuration') config_handle = init_config_args.get('config_handle') storage = init_config_args.get('storage') random_seed = init_config_args.get('random_seed') if storage == 'path': self.logger.info('Reading configurations from path, consider ' 'using Storage instead') init_configs = self.get_init_configs_from_path(config_handle) elif isinstance_no_import(storage, 'Storage'): self.logger.info( f'Reading configurations from dataset {config_handle} in ' f'database {storage.database_name} from ' f'{storage.__class__.__name__}') init_configs = storage.get_data(config_handle) else: raise UnidentifiedStorageError( f'Simulator cannot use {storage}') if random_seed is not None: self.logger.info( f'Initializing random seed with seed: {random_seed}') seed(random_seed) ind = randrange(0, len(init_configs)) self.logger.info(f'Using random index: {ind}') self.write_initial_config(run_path, init_configs[ind]) input_file_name = job_details.get('input_file_name') self.write_input(run_path, input_args, input_file_name) if self.external_setup: self._external_calculation_setup(run_path) simulator_command = self.get_run_command(job_details) calc_id = workflow.submit_job(simulator_command, run_path, job_details) return calc_id
[docs] def save_configurations( self, path_ids, storage, dataset_handle=None, workflow=None, ): """ save the configurations associated with path_ids to storage :param path_ids: list of ``calc_ids`` or explicit paths associated with simulator jobs. If ``calc_ids`` are supplied, the path is extracted from the :class:`~orchestrator.workflow.workflow_base.JobStatus`. Otherwise it is taken verbatim as the input. :type path_ids: list of int or str :param storage: the storage module where the configurations will be saved. :type storage: Storage :param dataset_handle: the handle to identify where in Storage the configurations should be saved. If ``None``, then the class default (date stamped) is used. |default| ``None`` :type dataset_handle: str :param workflow: the workflow for managing job submission, if none are supplied, will use the default workflow defined in this class. Should be consistent with the workflow supplied for any run calls. |default| ``None`` :type workflow: Workflow :returns: handle of the dataset which includes the new configurations :rtype: str """ if not isinstance(path_ids, list): path_ids = [path_ids] data_paths = [] for path_id in path_ids: if isinstance(path_id, int): self.logger.info( 'Supplied path ID is calc ID, extracting paths') if workflow is None: workflow = self.default_wf calc_path = workflow.get_job_status(path_id).path data_paths.append(calc_path) elif '/' in path_id or '.' in path_id: self.logger.info('Reading explicit paths for parsing output') calc_path = path_id data_paths.append(calc_path) else: raise UnidentifiedPathError((f'Supplied path_id: "{path_id}" ' f'is not in a recognized format')) self.logger.info((f'Saving {len(data_paths)} ' f'{self.__class__.__name__} trajectories')) data = [] for run_path in data_paths: # parsed data is always a single atoms object from the Oracle data.extend(self.parse_for_storage(run_path)) current_date = datetime.today().strftime('%Y-%m-%d') dataset_metadata = { 'description': (f'data generated by {self.__class__.__name__} on ' f'{current_date}') } if dataset_handle is None: dataset_handle = storage.generate_dataset_name( f'{self.__class__.__name__}_dataset', f'{current_date}', check_uniqueness=True, ) # this logic assumes colabfit style naming conventions for dataset IDs # if other storage is implemented, should switch to a more generic # "if dataset exists" logic check if dataset_handle[:3] == 'DS_': # handle is a colabfit ID, dataset exists new_handle = storage.add_data(dataset_handle, data, dataset_metadata) else: # handle is a name, create new dataset new_handle = storage.new_dataset(dataset_handle, data, dataset_metadata) return new_handle
def _external_calculation_setup(self, path): """ utility function to call an attached external function for input setup If self.external_setup is set to ``True``, then this method will be called. The external code which set the setup flag to True should also set the external_func to the desired function. It should take in the path to write output as its only parameter. :param path: location where input files should be written, passed to the attached external_func :type path: str """ if callable(self.external_func): self.external_func(path) else: raise AttributeError('Set external_func to a callable function!')
[docs] @abstractmethod def write_input(self, run_path, input_args, input_file_name): """ generate an input file for running a simulator calculation generate an input file using the ``input_template`` and ``input_args`` for the given structural configuration, written as an external file by :meth:`write_initial_config` :param run_path: root path where simulations will run :type run_path: str :param input_args: additional arguments for the template, model specific :type input_args: dict :param input_file_name: name for the input file :type input_file_name: str """ pass
[docs] @abstractmethod def write_initial_config(self, run_path, atoms): """ generate an input file for the initial structural configuration Codes such as LAMMPS have an input file specifying the calculation and a separate input file specifying the structural configuration. This method generates the latter file. :param run_path: path where the configuration file will be written :type run_path: str :param atoms: the ASE Atoms object :type pos: Atoms """ pass
[docs] @abstractmethod def get_run_command(self, args=None): """ return the command to run a simulator calculation this method formats the run command based on the ``code_path`` internal variable set at instantiation of the Simulator, which the :class:`~orchestrator.workflow.workflow_base.Workflow` will execute in the proper ``run_path``. The args dictionary can be used to pass any necessary extra parameters to the specific implementations. :param args: dictionary for parameters to decorate or enable the run command |default| ``None`` :type args: dict :returns: command to run the simulator :rtype: str """ pass
[docs] @abstractmethod def parse_for_storage(self, run_path): """ process calculation output to extract data in a consistent format Typically, the output of interest from simulators are the calculation cell and atomic coordinates and type. However, additional information could also be extracted as properties in the ASE Atoms object. :param run_path: directory where the simulator output file resides :type run_path: str :returns: list of ASE Atoms of the configurations and any attached properties. Metadata with the configuration source information is attached to the METADATA_KEY in the info dict. :rtype: Atoms list """ pass
[docs] @abstractmethod def load_potential(self, run_path, model_path): """ set up the potential to be used at run_path Make the trained model accessible for simulations, i.e. through loading a KIM potential or ensuring the potential files are present in the requisite folder :param run_path: root path where simulations will run and potential should be loaded/linked :type run_path: str :param model_path: path where the model to load is stored :type model_path: str """ pass