Source code for pytorchrl.envs.mujoco.mujoco_env_factory

import gym
from pytorchrl.envs.common import FrameStack, FrameSkip, DelayedReward, CartPoleActionWrapper


[docs]def mujoco_train_env_factory(env_id, index_col_worker, index_grad_worker, index_env=0, seed=0, frame_skip=0, frame_stack=1, reward_delay=1): """ Create train MuJoCo environment. Parameters ---------- env_id : str Environment name. index_col_worker : int Index of the collection worker running this environment. index_grad_worker : int Index of the gradient worker running the collection worker running this environment. index_env : int Index of this environment withing the vector of environments. seed : int Environment random seed. frame_skip : int Return only every `frame_skip`-th observation. frame_stack : int Observations composed of last `frame_stack` frames stacked. reward_delay : int Only return accumulated reward every `reward_delay` steps to simulate sparse reward environment. Returns ------- env : gym.Env Train environment. """ env = gym.make(env_id) env.seed(index_grad_worker * 1000 + 100 * index_col_worker + index_env + seed) if frame_skip > 0: env = FrameSkip(env, skip=frame_skip) if frame_stack > 1: env = FrameStack(env, k=frame_stack) if reward_delay > 1: env = DelayedReward(env, delay=reward_delay) if env_id == "CartPole-v0" or env_id == "CartPole-v1": env = CartPoleActionWrapper(env) return env
[docs]def mujoco_test_env_factory(env_id, index_col_worker, index_grad_worker, index_env=0, seed=0, frame_skip=0, frame_stack=1, reward_delay=1): """ Create test MuJoCo environment. Parameters ---------- env_id : str Environment name. index_col_worker : int Index of the collection worker running this environment. index_grad_worker : int Index of the gradient worker running the collection worker running this environment. index_env : int Index of this environment withing the vector of environments. seed : int Environment random seed. frame_skip : int Return only every `frame_skip`-th observation. frame_stack : int Observations composed of last `frame_stack` frames stacked. reward_delay : int Only return accumulated reward every `reward_delay` steps to simulate sparse reward environment. Returns ------- env : gym.Env Test environment. """ env = gym.make(env_id) env.seed(index_grad_worker * 1000 + 100 * index_col_worker + index_env + seed) if frame_skip > 0: env = FrameSkip(env, skip=frame_skip) if frame_stack > 1: env = FrameStack(env, k=frame_stack) if reward_delay > 1: env = DelayedReward(env, delay=reward_delay) if env_id == "CartPole-v0" or env_id == "CartPole-v1": env = CartPoleActionWrapper(env) return env