Source code for orchestrator.trainer.trainer_base

from abc import ABC, abstractmethod
from ..utils.recorder import Recorder
from ..workflow.factory import workflow_builder
from typing import Optional, Union
import numpy as np
from ase import Atoms
from ..storage.storage_base import Storage
from ..potential.potential_base import Potential
from ..workflow.workflow_base import Workflow


[docs] class Trainer(Recorder, ABC): """ Abstract base class to manage the training of different potentials The trainer class is responsible for handling the loading/assignment of training data, as well as the actual process of training a potential """
[docs] def __init__(self, **kwargs): """ set variables and initialize the recorder and default workflow """ super().__init__() #: default workflow to use within the trainer class self.default_wf = workflow_builder.build( 'LOCAL', {'root_directory': './trainer'}, )
[docs] @abstractmethod def checkpoint_trainer(self): """ checkpoint the trainer module into the checkpoint file save necessary internal variables into a dict with key checkpoint_name and write to the (json) checkpoint file for restart capabilities """ pass
[docs] @abstractmethod def restart_trainer(self): """ restart the trainer module from the checkpoint file check if the checkpoint_file has an entry matching the checkpoint_name and set internal variables accordingly if so """ pass
@abstractmethod def _get_training_data( self, dataset_handle: str, storage: Storage, ) -> list[Atoms]: """ Get the training data configurations Retrieve the dataset specified by dataset_handle from the passed storage module. This dataset can be augmented or otherwise modified (i.e. adding weights) as necessary for training. :param dataset_handle: the identifier of the dataset to extract from the storage module :type dataset_handle: str :param storage: storage instance where the training data is saved :type storage: Storage :returns: training dataset :rtype: list of ASE Atoms """ pass
[docs] @abstractmethod def train( self, path_type: str, potential: Potential, storage: Storage, dataset_list: list, workflow: Optional[Workflow] = None, eweight: float = 1.0, fweight: float = 1.0, vweight: float = 1.0, per_atom_weights: Union[bool, np.ndarray] = False, write_training_script: bool = True, upload_to_kimkit=True, ) -> list: """ Train the potential based on the specific trainer details This is a main method of the trainer class, and uses the parameters supplied at instantiation to perform the potential training by minimizing a loss function. :param path_type: specifier for the workflow path, to differentiate training runs :type path_type: str :param potential: potential to be trained. The actual model itself is set as an attribute of the Potential object :type potential: Potential :param storage: an instance of the storage class :type storage: Storage :dataset_list: the list of dataset_handles (e.g. collabfit-IDs) within the storage object to use as the dataset. :type dataset_list: list :param workflow: the workflow for managing path definition and job submission, if none are supplied, will use the default workflow defined in this class |default| ``None`` :type workflow: Workflow :param per_atom_weights: True to read from dataset, or numpy array |default| ``False`` :type per_atom_weights: either boolean or np.ndarray :param write_training_script: True to write a training script in the working trainer directory |default| ``True`` :type write_training_script: bool :param upload_to_kimkit: True to upload to kimkit repository :type upload_to_kimkit: bool :returns: trained model, loss object :rtype: implementation dependent """ pass
[docs] @abstractmethod def submit_train( self, path_type: str, potential: Potential, storage: Storage, dataset_list: list, workflow: Workflow, job_details: dict, eweight: float = 1.0, fweight: float = 1.0, vweight: float = 1.0, per_atom_weights: Union[bool, np.ndarray, str] = False, upload_to_kimkit=True, ) -> int: """ Asychronously train the potential based on the trainer details This is a main method of the trainer class, and uses the parameters supplied at instantiation to perform the potential training by minimizing a loss function. While :meth:`train` works synchronously, this method submits training to a job scheduler. :param path_type: specifier for the workflow path, to differentiate training runs :type path_type: str :param potential: potential to be trained. The actual model itself is set as an attribute of the Potential object :type potential: Potential :param storage: an instance of the storage class :type storage: Storage :dataset_list: the list of dataset_handles (e.g. collabfit-IDs) within the storage object to use as the dataset. :type dataset_list: list :param workflow: the workflow for managing path definition and job submission :type workflow: Workflow :param eweight: weight of energy data in the loss function :type eweight: float :param fweight: weight of the force data in the loss function :type fweight: float :param vweight: weight of the stress data in the loss function :type vweight: float :param per_atom_weights: True to read from dataset, or numpy array |default| ``False`` :type per_atom_weights: either boolean or np.ndarray :param upload_to_kimkit: True to upload to kimkit repository :type upload_to_kimkit: bool :returns: calculation ID of the submitted job :rtype: int """ pass
@abstractmethod def _save_model( self, path_type, potential, loss=None, create_path=True, workflow=None, ): """ Save the model and (optionally) loss data Write the model (and loss) data to disk from memory :param path_type: specifier for the workflow path, to differentiate training runs and where the model will be saved :type path_type: str :param potential: potential to be saved. This method takes a full :class:`~orchestrator.potential.potential_base.Potential` class object :type potential: Potential :param loss: loss object to save, optional. |default| ``None`` :type loss: implementation dependent :param create_path: if the function needs to create a new path, or if path_type should be used as the full path |default| ``True`` :type create_path: boolean :param workflow: the workflow for managing path definition, if none are supplied, will use the default workflow defined in this class |default| ``None`` :type workflow: Workflow :returns: path where the model is saved :rtype: str """ pass
[docs] @abstractmethod def load_from_submitted_training( self, calc_id: int, potential: Potential, workflow: Workflow, ): """ reload a potential that was trained via a submitted job """ pass