Source code for pytorchrl.agent.storages.on_policy.gae_buffer

import torch
import pytorchrl as prl
from pytorchrl.agent.storages.on_policy.vanilla_on_policy_buffer import VanillaOnPolicyBuffer as B


[docs]class GAEBuffer(B):
    """
    Storage class for On-Policy algorithms with Generalized Advantage
    Estimator (GAE). https://arxiv.org/abs/1506.02438

    Parameters
    ----------
    size : int
        Storage capacity along time axis.
    gae_lambda : float
        GAE lambda parameter.
    device: torch.device
        CPU or specific GPU where data tensors will be placed and class
        computations will take place. Should be the same device where the
        actor model is located.
    envs : VecEnv
        Vector of environments instance.
    actor : Actor
        Actor class instance.
    algorithm : Algorithm
        Algorithm class instance.
    """

    # Data fields to store in buffer and contained in generated batches
    storage_tensors = prl.OnPolicyDataKeys

    def __init__(self, size, device, actor, algorithm, envs, gae_lambda=0.95):

        super(GAEBuffer, self).__init__(
            size=size,
            envs=envs,
            device=device,
            actor=actor,
            algorithm=algorithm)

        self.gae_lambda = gae_lambda

[docs]    @classmethod
    def create_factory(cls, size, gae_lambda=0.95):
        """
        Returns a function that creates OnPolicyGAEBuffer instances.

        Parameters
        ----------
        size : int
            Storage capacity along time axis.
        gae_lambda : float
            GAE lambda parameter.

        Returns
        -------
        create_buffer_instance : func
            creates a new OnPolicyBuffer class instance.
        """
        def create_buffer_instance(device, actor, algorithm, envs):
            """Create and return a OnPolicyGAEBuffer instance."""
            return cls(size, device, actor, algorithm, envs, gae_lambda)
        return create_buffer_instance

    @property
    def used_capacity(self):
        """Returns the step up to which storage is full with env transitions."""
        return self.step - 1 if self.step != 0 else self.max_size

[docs]    def compute_returns(self, rewards, returns, values, dones, gamma):
        """Compute return values."""
        gae_lambda, length, gae = self.gae_lambda, self.used_capacity, 0
        returns[length].copy_(values[length])
        for step in reversed(range(length)):
            delta = (rewards[step] + gamma * values[step + 1] * (1.0 - dones[step + 1]) - values[step])
            gae = delta + gamma * gae_lambda * (1.0 - dones[step + 1]) * gae
            returns[step] = gae + values[step]