Source code for pytorchrl.agent.algorithms.base

from abc import ABC, abstractmethod
import torch


[docs]class Algorithm(ABC):
    """Base class for all algorithms"""

[docs]    @classmethod
    @abstractmethod
    def create_factory(cls):
        """Returns a function to create new Algo instances"""
        raise NotImplementedError

    @property
    @abstractmethod
    def gamma(self):
        """Returns discount factor gamma."""
        raise NotImplementedError

    @property
    @abstractmethod
    def start_steps(self):
        """Returns the number of steps to collect with initial random policy."""
        raise NotImplementedError

    @property
    @abstractmethod
    def num_epochs(self):
        """
        Returns the number of times the whole buffer is re-used before data
        collection proceeds.
        """
        raise NotImplementedError

    @property
    @abstractmethod
    def update_every(self):
        """
        Returns the number of data samples collected between
        network update stages.
        """
        raise NotImplementedError

    @property
    @abstractmethod
    def num_mini_batch(self):
        """
        Returns the number of times the whole buffer is re-used before data
        collection proceeds.
        """
        raise NotImplementedError

    @property
    @abstractmethod
    def mini_batch_size(self):
        """
        Returns the number of mini batches per epoch.
        """
        raise NotImplementedError

    @property
    @abstractmethod
    def test_every(self):
        """Number of network updates between test evaluations."""
        raise NotImplementedError

    @property
    @abstractmethod
    def num_test_episodes(self):
        """
        Returns the number of episodes to complete when testing.
        """
        raise NotImplementedError

[docs]    @abstractmethod
    def acting_step(self, obs, rhs, done, deterministic=False, *args):
        """
        Algorithm acting function.

        Parameters
        ----------
        obs: torch.tensor
            Current world observation
        rhs: torch.tensor
            RNN recurrent hidden state (if policy is not a RNN, rhs will contain zeroes).
        done: torch.tensor
            1.0 if current obs is the last one in the episode, else 0.0.
        deterministic: bool
            Whether to randomly sample action from predicted distribution or take the mode.

        Returns
        -------
        action: torch.tensor
            Predicted next action.
        clipped_action: torch.tensor
            Predicted next action (clipped to be within action space).
        rhs: torch.tensor
            Policy recurrent hidden state (if policy is not a RNN, rhs will contain zeroes).
        other: dict
            Additional PPO predictions, value score and action log probability,
            which are not used in other algorithms.
        """
        raise NotImplementedError

[docs]    @abstractmethod
    def compute_gradients(self, batch, grads_to_cpu=True, *args):
        """
        Compute loss and compute gradients but don't do optimization step,
        return gradients instead.

        Parameters
        ----------
        data: dict
            data batch containing all required tensors to compute PPO loss.
        grads_to_cpu: bool
            If gradient tensor will be sent to another node, need to be in CPU.

        Returns
        -------
        grads: list of tensors
            List of actor_critic gradients.
        info: dict
            Dict containing current PPO iteration information.
        """

        raise NotImplementedError

[docs]    @abstractmethod
    def apply_gradients(self, gradients=None, *args):
        """
        Take an optimization step, previously setting new gradients if provided.

        Parameters
        ----------
        gradients: list of tensors
            List of actor_critic gradients.
        """
        raise NotImplementedError

[docs]    @abstractmethod
    def set_weights(self, actor_weights, *args):
        """
        Update actor with the given weights

        Parameters
        ----------
        actor_weights: dict of tensors
            Dict containing actor_critic weights to be set.
        """
        raise NotImplementedError

[docs]    @abstractmethod
    def update_algorithm_parameter(self, parameter_name, new_parameter_value, *args):
        """
        If `parameter_name` is an attribute of the algorithm, change its value
        to `new_parameter_value value`.

        Parameters
        ----------
        parameter_name : str
            Attribute name
        new_parameter_value : int or float
            New value for `parameter_name`.
        """
        raise NotImplementedError