Source code for orchestrator.trainer.trainer_base

from abc import ABC, abstractmethod
from ..utils.recorder import Recorder
from ..workflow.factory import workflow_builder
from typing import Optional, Union
import numpy as np
from ase import Atoms
from ..storage.storage_base import Storage
from ..potential.potential_base import Potential
from ..workflow.workflow_base import Workflow



[docs]
class Trainer(Recorder, ABC):
    """
    Abstract base class to manage the training of different potentials

    The trainer class is responsible for handling the loading/assignment of
    training data, as well as the actual process of training a potential
    """


[docs]
    def __init__(self, **kwargs):
        """
        set variables and initialize the recorder and default workflow
        """
        super().__init__()

        #: default workflow to use within the trainer class
        self.default_wf = workflow_builder.build(
            'LOCAL',
            {'root_directory': './trainer'},
        )



[docs]
    @abstractmethod
    def checkpoint_trainer(self):
        """
        checkpoint the trainer module into the checkpoint file

        save necessary internal variables into a dict with key checkpoint_name
        and write to the (json) checkpoint file for restart capabilities
        """
        pass



[docs]
    @abstractmethod
    def restart_trainer(self):
        """
        restart the trainer module from the checkpoint file

        check if the checkpoint_file has an entry matching the checkpoint_name
        and set internal variables accordingly if so
        """
        pass


    @abstractmethod
    def _get_training_data(
        self,
        dataset_handle: str,
        storage: Storage,
    ) -> list[Atoms]:
        """
        Get the training data configurations

        Retrieve the dataset specified by dataset_handle from the passed
        storage module. This dataset can be augmented or otherwise modified
        (i.e. adding weights) as necessary for training.

        :param dataset_handle: the identifier of the dataset to extract from
            the storage module
        :type dataset_handle: str
        :param storage: storage instance where the training data is saved
        :type storage: Storage
        :returns: training dataset
        :rtype: list of ASE Atoms
        """
        pass


[docs]
    @abstractmethod
    def train(
        self,
        path_type: str,
        potential: Potential,
        storage: Storage,
        dataset_list: list,
        workflow: Optional[Workflow] = None,
        eweight: float = 1.0,
        fweight: float = 1.0,
        vweight: float = 1.0,
        per_atom_weights: Union[bool, np.ndarray] = False,
        write_training_script: bool = True,
        upload_to_kimkit=True,
    ) -> list:
        """
        Train the potential based on the specific trainer details

        This is a main method of the trainer class, and uses the parameters
        supplied at instantiation to perform the potential training by
        minimizing a loss function.

        :param path_type: specifier for the workflow path, to differentiate
            training runs
        :type path_type: str
        :param potential: potential to be trained. The actual model itself is
            set as an attribute of the Potential object
        :type potential: Potential
        :param storage: an instance of the storage class
        :type storage: Storage
        :dataset_list: the list of dataset_handles (e.g. collabfit-IDs)
            within the storage object to use as the dataset.
        :type dataset_list: list
        :param workflow: the workflow for managing path definition and job
            submission, if none are supplied, will use the default workflow
            defined in this class |default| ``None``
        :type workflow: Workflow
        :param per_atom_weights: True to read from dataset, or numpy array
            |default| ``False``
        :type per_atom_weights: either boolean or np.ndarray
        :param write_training_script: True to write a training script in the
            working trainer directory |default| ``True``
        :type write_training_script: bool
        :param upload_to_kimkit: True to upload to kimkit repository
        :type upload_to_kimkit: bool
        :returns: trained model, loss object
        :rtype: implementation dependent
        """
        pass



[docs]
    @abstractmethod
    def submit_train(
        self,
        path_type: str,
        potential: Potential,
        storage: Storage,
        dataset_list: list,
        workflow: Workflow,
        job_details: dict,
        eweight: float = 1.0,
        fweight: float = 1.0,
        vweight: float = 1.0,
        per_atom_weights: Union[bool, np.ndarray, str] = False,
        upload_to_kimkit=True,
    ) -> int:
        """
        Asychronously train the potential based on the trainer details

        This is a main method of the trainer class, and uses the parameters
        supplied at instantiation to perform the potential training by
        minimizing a loss function. While :meth:`train` works synchronously,
        this method submits training to a job scheduler.

        :param path_type: specifier for the workflow path, to differentiate
            training runs
        :type path_type: str
        :param potential: potential to be trained. The actual model itself is
            set as an attribute of the Potential object
        :type potential: Potential
        :param storage: an instance of the storage class
        :type storage: Storage
        :dataset_list: the list of dataset_handles (e.g. collabfit-IDs)
            within the storage object to use as the dataset.
        :type dataset_list: list
        :param workflow: the workflow for managing path definition and job
            submission
        :type workflow: Workflow
        :param eweight: weight of energy data in the loss function
        :type eweight: float
        :param fweight: weight of the force data in the loss function
        :type fweight: float
        :param vweight: weight of the stress data in the loss function
        :type vweight: float
        :param per_atom_weights: True to read from dataset, or numpy array
            |default| ``False``
        :type per_atom_weights: either boolean or np.ndarray
        :param upload_to_kimkit: True to upload to kimkit repository
        :type upload_to_kimkit: bool
        :returns: calculation ID of the submitted job
        :rtype: int
        """
        pass


    @abstractmethod
    def _save_model(
        self,
        path_type,
        potential,
        loss=None,
        create_path=True,
        workflow=None,
    ):
        """
        Save the model and (optionally) loss data

        Write the model (and loss) data to disk from memory

        :param path_type: specifier for the workflow path, to differentiate
            training runs and where the model will be saved
        :type path_type: str
        :param potential: potential to be saved. This method takes a full
            :class:`~orchestrator.potential.potential_base.Potential` class
            object
        :type potential: Potential
        :param loss: loss object to save, optional. |default| ``None``
        :type loss: implementation dependent
        :param create_path: if the function needs to create a new path, or if
            path_type should be used as the full path |default| ``True``
        :type create_path: boolean
        :param workflow: the workflow for managing path definition, if none are
            supplied, will use the default workflow defined in this class
            |default| ``None``
        :type workflow: Workflow
        :returns: path where the model is saved
        :rtype: str
        """
        pass


[docs]
    @abstractmethod
    def load_from_submitted_training(
        self,
        calc_id: int,
        potential: Potential,
        workflow: Workflow,
    ):
        """
        reload a potential that was trained via a submitted job
        """
        pass