PufferLib’s GymnasiumPufferEnv wrapper converts any Gymnasium environment into the PufferEnv interface, providing vectorized batching, in-place updates, and automatic space emulation.
Basic usage
Wrap any Gymnasium environment:
import gymnasium
import pufferlib.emulation
# Create a Gymnasium environment
env = gymnasium.make( 'CartPole-v1' )
# Wrap it with PufferLib
env = pufferlib.emulation.GymnasiumPufferEnv( env = env)
# Use the vectorized interface
obs, info = env.reset()
actions = env.action_space.sample()
obs, reward, terminal, truncation, info = env.step(actions)
Even though the original environment has a single agent, the PufferLib wrapper adds a batch dimension:
print (obs.shape) # (1, obs_size) - batch dimension added
print (reward.shape) # (1,) - single reward per environment
Constructor parameters
The GymnasiumPufferEnv wrapper accepts these parameters:
GymnasiumPufferEnv(
env = None , # Gymnasium environment instance
env_creator = None , # Or callable that creates environment
env_args = [], # Args for env_creator
env_kwargs = {}, # Kwargs for env_creator
buf = None , # Pre-allocated buffers (optional)
seed = 0 # Random seed
)
Using env instance
Using env_creator
With custom buffers
import gymnasium
import pufferlib.emulation
env = gymnasium.make( 'CartPole-v1' )
puffer_env = pufferlib.emulation.GymnasiumPufferEnv( env = env)
import gymnasium
import pufferlib.emulation
def make_env ():
return gymnasium.make( 'CartPole-v1' )
puffer_env = pufferlib.emulation.GymnasiumPufferEnv(
env_creator = make_env
)
import numpy as np
import pufferlib.emulation
buf = {
'observations' : np.zeros(( 1 , 4 ), dtype = np.float32),
'rewards' : np.zeros( 1 , dtype = np.float32),
'terminals' : np.zeros( 1 , dtype = bool ),
'truncations' : np.zeros( 1 , dtype = bool ),
'masks' : np.ones( 1 , dtype = bool ),
'actions' : np.zeros( 1 , dtype = np.int32),
}
env = gymnasium.make( 'CartPole-v1' )
puffer_env = pufferlib.emulation.GymnasiumPufferEnv( env = env, buf = buf)
Space emulation
GymnasiumPufferEnv automatically “emulates” complex observation and action spaces as flat arrays for neural network compatibility.
Observation space emulation
Dict space
Tuple space
Box space (no emulation)
import gymnasium
import pufferlib.emulation
# Environment with Dict observation
class MyEnv ( gymnasium . Env ):
def __init__ ( self ):
self .observation_space = gymnasium.spaces.Dict({
'image' : gymnasium.spaces.Box( 0 , 255 , ( 64 , 64 , 3 ), dtype = np.uint8),
'vector' : gymnasium.spaces.Box( - 1 , 1 , ( 10 ,), dtype = np.float32)
})
self .action_space = gymnasium.spaces.Discrete( 4 )
env = MyEnv()
puffer_env = pufferlib.emulation.GymnasiumPufferEnv( env = env)
# Observation space is automatically flattened
print (puffer_env.observation_space)
# Box(0, 255, (12298,), uint8) - all fields flattened to single array
Action space emulation
Action spaces are similarly emulated:
import gymnasium
import pufferlib.emulation
class MyEnv ( gymnasium . Env ):
def __init__ ( self ):
self .observation_space = gymnasium.spaces.Box( 0 , 1 , ( 4 ,))
# Complex action space
self .action_space = gymnasium.spaces.Tuple((
gymnasium.spaces.Discrete( 4 ),
gymnasium.spaces.Discrete( 2 ),
))
env = MyEnv()
puffer_env = pufferlib.emulation.GymnasiumPufferEnv( env = env)
# Action space converted to MultiDiscrete
print (puffer_env.action_space)
# MultiDiscrete([4 2])
print (puffer_env.is_atn_emulated)
# True
Actions are automatically unpacked when stepping:
# Sample from emulated action space
action = puffer_env.action_space.sample() # [2, 1]
# Wrapper unpacks to original format: (2, 1)
obs, reward, terminal, truncation, info = puffer_env.step(action)
Atari example
PufferLib includes optimized Atari wrappers. Here’s the implementation from pufferlib/environments/atari/environment.py:
import gymnasium as gym
import pufferlib
import pufferlib.emulation
from ale_py import AtariEnv
def make ( name = 'breakout' , obs_type = 'grayscale' , frameskip = 4 ,
full_action_space = False , framestack = 1 , buf = None ):
# Create base Atari environment
env = AtariEnv(
name,
obs_type = obs_type,
frameskip = frameskip,
full_action_space = full_action_space,
render_mode = 'rgb_array'
)
# Fast downscaling wrapper
env = pufferlib.ResizeObservation(env, downscale = 2 )
# Optional frame stacking
if framestack > 1 :
env = gym.wrappers.FrameStack(env, framestack)
# Track episode statistics
env = pufferlib.EpisodeStats(env)
# Convert to PufferEnv interface
env = pufferlib.emulation.GymnasiumPufferEnv( env = env, buf = buf)
return env
Usage:
from pufferlib.environments import atari
# Create Breakout environment
env = atari.make( 'breakout' , framestack = 4 )
obs, info = env.reset()
for _ in range ( 100 ):
action = env.action_space.sample()
obs, reward, terminal, truncation, info = env.step(action)
if terminal or truncation:
print ( f "Episode return: { info[ 'episode_return' ] } " )
print ( f "Episode length: { info[ 'episode_length' ] } " )
obs, info = env.reset()
Procgen example
Procgen environments use vectorized Gym envs. From pufferlib/environments/procgen/environment.py:
import gym
import gymnasium
import shimmy
import pufferlib
import pufferlib.emulation
import procgen
def make ( name = 'bigfish' , num_envs = 1 , num_levels = 0 ,
start_level = 0 , distribution_mode = 'easy' , buf = None ):
# Procgen provides vectorized envs by default
envs = procgen.ProcgenEnv(
env_name = name,
num_envs = num_envs,
num_levels = num_levels,
start_level = start_level,
distribution_mode = distribution_mode,
)
# Extract RGB observations
envs = gym.wrappers.TransformObservation(envs, lambda obs : obs[ "rgb" ])
envs.single_action_space = envs.action_space
envs.single_observation_space = envs.observation_space[ "rgb" ]
# Apply preprocessing
envs = gym.wrappers.RecordEpisodeStatistics(envs)
envs = gym.wrappers.NormalizeReward(envs)
# Convert Gym to Gymnasium API
envs = shimmy.GymV21CompatibilityV0( env = envs)
envs = pufferlib.EpisodeStats(envs)
# Convert to PufferEnv
return pufferlib.emulation.GymnasiumPufferEnv( env = envs, buf = buf)
MiniGrid example
MiniGrid environments have dict observations with mission strings. From pufferlib/environments/minigrid/environment.py:
import gymnasium
import pufferlib
import pufferlib.emulation
class MiniGridWrapper :
"""Remove mission string from observations"""
def __init__ ( self , env ):
self .env = env
# Filter out 'mission' key from observation space
self .observation_space = gymnasium.spaces.Dict({
k: v for k, v in env.observation_space.items()
if k != 'mission'
})
self .action_space = env.action_space
def reset ( self , seed = None , options = None ):
obs, info = self .env.reset( seed = seed)
del obs[ 'mission' ]
return obs, info
def step ( self , action ):
obs, reward, done, truncated, info = self .env.step(action)
del obs[ 'mission' ]
return obs, reward, done, truncated, info
def make ( name = 'MiniGrid-LavaGapS7-v0' , buf = None ):
env = gymnasium.make(name, render_mode = 'rgb_array' )
env = MiniGridWrapper(env) # Remove mission
env = pufferlib.EpisodeStats(env)
return pufferlib.emulation.GymnasiumPufferEnv( env = env, buf = buf)
MuJoCo example
Continuous control environments work seamlessly:
from pufferlib.environments import mujoco
env = mujoco.make( 'Ant-v4' )
obs, info = env.reset()
for _ in range ( 1000 ):
# Sample continuous actions
action = env.action_space.sample()
obs, reward, terminal, truncation, info = env.step(action)
if terminal or truncation:
obs, info = env.reset()
Converting old Gym environments
For environments using the old Gym API (pre-v26), use GymToGymnasium:
import pufferlib
import pufferlib.emulation
from old_package import OldGymEnv # Returns (obs, reward, done, info)
old_env = OldGymEnv()
# Convert to Gymnasium API
env = pufferlib.GymToGymnasium(old_env)
# Now wrap with PufferLib
env = pufferlib.emulation.GymnasiumPufferEnv( env = env)
API reference
GymnasiumPufferEnv
See pufferlib/emulation.py:141 for the full implementation.
Attributes
single_observation_space - Observation space for one agent
single_action_space - Action space for one agent
observation_space - Batched observation space (adds batch dim)
action_space - Batched action space (adds batch dim)
num_agents - Always 1 for Gymnasium envs
is_obs_emulated - Whether observations are emulated
is_atn_emulated - Whether actions are emulated
observations - NumPy array buffer (shape: (1, *obs_shape))
rewards - NumPy array buffer (shape: (1,))
terminals - NumPy array buffer (shape: (1,))
truncations - NumPy array buffer (shape: (1,))
masks - NumPy array buffer (shape: (1,))
Methods
reset(seed=None) - Reset environment, returns (observations, info)
step(action) - Step environment, returns (observations, reward, terminal, truncation, info)
render() - Render environment
close() - Close environment
Next steps
PettingZoo integration Wrap multi-agent environments
Custom wrappers Create custom environment wrappers