Source code for pytorchrl.agent.actors.distributions.deterministic

import torch
import torch.nn as nn

from pytorchrl.agent.actors.utils import init
from pytorchrl.agent.actors.noise import get_noise
from pytorchrl.agent.actors.feature_extractors.ensemble_layer import EnsembleFC


[docs]class Deterministic(nn.Module):
    """
    Deterministic prediction of the mean value mu of a learned action distribtion.

    Parameters
    ----------
    num_inputs : int
        Size of input feature maps.
    num_outputs : int
        Number of dims in output space.
    noise : str
        Type of noise that is added to the predicted mu.
    """

    def __init__(self, num_inputs, num_outputs, noise):
        super(Deterministic, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0))
        self.action_output = init_(nn.Linear(num_inputs, num_outputs))
        self.noise = get_noise(noise)(num_outputs)

[docs]    def forward(self, x, deterministic=True):
        """
        Predict distribution parameters from x (obs features)
        and returns predicted noisy action mu of the distribution and the clipped action [-1, 1].

        Parameters
        ----------
        x : torch.tensor
            Feature maps extracted from environment observations.
        deterministic : bool
            Whether to noise is added to the predicted mu or not.
            
        Returns
        -------
        action: torch.tensor
            Next action sampled.
        clipped_action: torch.tensor
            Next action sampled, but clipped to be within the env action space.
        logp: None
            Returns logp 'None' to have equal output to other distributions.
        entropy_dist: None
            Returns logp 'None' to have equal output to other distributions
        dist : torch.Distribution
            Action probability distribution.
        """
        mu = torch.tanh(self.action_output(x))
        if not deterministic:
            noise = self.noise.sample().to(mu.device)
            mu = mu + noise
        clipped_action = torch.clamp(mu, min=-1, max=1)
            
        return mu, clipped_action, None, None, clipped_action

[docs]    def evaluate_pred(self, x, pred):
        """
        Predict distribution parameters from x (obs features)
        and returns predicted mu value of the distribution.
        Ignores the pred input parameter.

        Parameters
        ----------
        x : torch.tensor
            Feature maps extracted from environment observations.
        pred : torch.tensor
            Prediction to evaluate.

        Returns
        -------
        logp : torch.tensor
            Log probability of `pred` according to the predicted
             distribution.
        entropy_dist : torch.tensor
            Entropy of the predicted distribution.
        dist : torch.Distribution
            Action probability distribution.
        """
        pred = torch.tanh(self.action_output(x))
        pred = torch.clamp(pred, min=-1, max=1)

        return None, None, pred

[docs]class DeterministicMB(nn.Module):
    """Deterministic ensemble output layer 
    
        Parameters
        ----------
        num_inputs: int
            Size of input feature maps.
        num_outputs: int
            Output size of the gaussian layer.
        ensemble_size: int
            Ensemble size in the output layer.
    """
    def __init__(self, num_inputs: int, num_outputs: int)-> None:
        super(DeterministicMB, self).__init__()

        self.num_outputs = num_outputs
        self.output = nn.Linear(in_features=num_inputs, out_features=num_outputs)
        
[docs]    def forward(self, x: torch.Tensor)-> torch.Tensor:
        """Forward pass"""
        mean = self.output(x)
        return mean