PufferLib supports creating custom environments that leverage its high-performance vectorization and in-place array updates. This guide shows you how to create PufferEnv environments from scratch and handle structured observation spaces.
Creating a PufferEnv
The PufferEnv base class provides an efficient interface for multi-agent and vectorized environments. Here’s how to create a custom environment:
import gymnasium
import pufferlib
class SamplePufferEnv ( pufferlib . PufferEnv ):
def __init__ ( self , buf = None , seed = 0 ):
# Define single agent spaces
self .single_observation_space = gymnasium.spaces.Box(
low =- 1 , high = 1 , shape = ( 1 ,)
)
self .single_action_space = gymnasium.spaces.Discrete( 2 )
self .num_agents = 2
# Initialize parent class
super (). __init__ (buf)
def reset ( self , seed = 0 ):
# Update observations in-place
self .observations[:] = self .observation_space.sample()
return self .observations, []
def step ( self , action ):
# Update observations in-place
self .observations[:] = self .observation_space.sample()
infos = [{ 'infos' : 'is a list of dictionaries' }]
return self .observations, self .rewards, self .terminals, self .truncations, infos
Key principles
Define single-agent spaces
Set single_observation_space and single_action_space for individual agents. PufferLib automatically creates vectorized versions. self .single_observation_space = gymnasium.spaces.Box( low =- 1 , high = 1 , shape = ( 1 ,))
self .single_action_space = gymnasium.spaces.Discrete( 2 )
self .num_agents = 2
Initialize with buffer
Call super().__init__(buf) to initialize preallocated arrays for observations, rewards, terminals, and truncations.
Use in-place updates
Update the preallocated arrays in-place for maximum performance: self .observations[:] = new_values
# Not: self.observations = new_values
Using your PufferEnv
Once created, you can use your custom environment like any other PufferLib environment:
puffer_env = SamplePufferEnv()
observations, infos = puffer_env.reset()
actions = puffer_env.action_space.sample()
observations, rewards, terminals, truncations, infos = puffer_env.step(actions)
print ( 'Puffer envs use a vector interface and in-place array updates' )
print ( 'Observation:' , observations)
print ( 'Reward:' , rewards)
print ( 'Terminal:' , terminals)
print ( 'Truncation:' , truncations)
The environment automatically handles vectorization across all agents, providing a clean interface for batch processing.
Vectorizing custom environments
PufferLib’s vectorization system works seamlessly with custom environments:
Serial backend
For debugging and small-scale testing:
import pufferlib.vector
serial_vecenv = pufferlib.vector.make(
SamplePufferEnv,
num_envs = 2 ,
backend = pufferlib.vector.Serial
)
observations, infos = serial_vecenv.reset()
actions = serial_vecenv.action_space.sample()
o, r, d, t, i = serial_vecenv.step(actions)
print ( 'Serial VecEnv:' )
print ( 'Observations:' , o)
print ( 'Rewards:' , r)
print ( 'Terminals:' , t)
print ( 'Truncations:' , d)
Multiprocessing backend
For production training with parallel execution:
vecenv = pufferlib.vector.make(
SamplePufferEnv,
num_envs = 2 ,
num_workers = 2 ,
batch_size = 1 ,
backend = pufferlib.vector.Multiprocessing
)
# Asynchronous API
vecenv.async_reset()
o, r, d, t, i, env_ids, masks = vecenv.recv()
actions = vecenv.action_space.sample()
print ( 'Actions:' , actions)
vecenv.send(actions)
# Receive results from background workers
o, r, d, t, i, env_ids, masks = vecenv.recv()
print ( 'Observations:' , o)
vecenv.close()
Passing arguments to environments
You can customize environment initialization by passing arguments:
Environment with arguments
Same args for all environments
Different args per environment
class SamplePufferEnv ( pufferlib . PufferEnv ):
def __init__ ( self , foo = 0 , bar = 1 , buf = None , seed = 0 ):
self .single_observation_space = gymnasium.spaces.Box(
low =- 1 , high = 1 , shape = ( 1 ,)
)
self .single_action_space = gymnasium.spaces.Discrete( 2 )
self .num_agents = 2
super (). __init__ (buf)
# Custom arguments
self .foo = foo
self .bar = bar
def reset ( self , seed = 0 ):
self .observations[:] = self .observation_space.sample()
return self .observations, []
def step ( self , action ):
self .observations[:] = self .observation_space.sample()
infos = [{ 'infos' : 'is a list of dictionaries' }]
return self .observations, self .rewards, self .terminals, self .truncations, infos
Structured observation spaces
PufferLib can handle complex observation spaces including Dict, Tuple, and MultiDiscrete spaces. The library automatically flattens these for efficient processing.
import gymnasium
import pufferlib.emulation
class SampleGymnasiumEnv ( gymnasium . Env ):
def __init__ ( self ):
self .observation_space = gymnasium.spaces.Dict({
'foo' : gymnasium.spaces.Box( low =- 1 , high = 1 , shape = ( 2 ,)),
'bar' : gymnasium.spaces.Box( low = 2 , high = 3 , shape = ( 3 ,)),
})
self .action_space = gymnasium.spaces.MultiDiscrete([ 2 , 5 ])
def reset ( self ):
return self .observation_space.sample(), {}
def step ( self , action ):
return self .observation_space.sample(), 0.0 , False , False , {}
gymnasium_env = SampleGymnasiumEnv()
puffer_env = pufferlib.emulation.GymnasiumPufferEnv(gymnasium_env)
flat_observation, info = puffer_env.reset()
flat_action = puffer_env.action_space.sample()
print ( f 'PufferLib flattens observations and actions: \n { flat_observation } \n { flat_action } ' )
Unflattening structured observations
You can reconstruct the original structure in your neural network:
NumPy
PyTorch (recommended)
observation = flat_observation.view(puffer_env.obs_dtype)
print ( f 'Unflattened with numpy: \n { observation } ' )
import torch
import pufferlib.pytorch
flat_torch_observation = torch.from_numpy(flat_observation)
torch_dtype = pufferlib.pytorch.nativize_dtype(puffer_env.emulated)
torch_observation = pufferlib.pytorch.nativize_tensor(
flat_torch_observation, torch_dtype
)
print ( f 'Unflattened with torch: \n { torch_observation } ' )
For neural network integration, we recommend unflattening observations with PyTorch in your model’s forward pass. This provides better performance and easier gradient computation.
Multi-agent environments
PufferLib’s PufferEnv naturally supports multi-agent environments:
class MultiAgentPufferEnv ( pufferlib . PufferEnv ):
def __init__ ( self , num_agents = 4 , buf = None , seed = 0 ):
self .single_observation_space = gymnasium.spaces.Box(
low =- 1 , high = 1 , shape = ( 10 ,)
)
self .single_action_space = gymnasium.spaces.Discrete( 5 )
self .num_agents = num_agents
super (). __init__ (buf)
def reset ( self , seed = 0 ):
# Reset all agents
self .observations[:] = self .observation_space.sample()
return self .observations, []
def step ( self , actions ):
# Step all agents simultaneously
self .observations[:] = self .observation_space.sample()
# Compute rewards for each agent
self .rewards[:] = np.random.randn( self .num_agents)
# Determine terminals
self .terminals[:] = False
self .truncations[:] = False
infos = [{ 'agent_id' : i} for i in range ( self .num_agents)]
return self .observations, self .rewards, self .terminals, self .truncations, infos
The vectorized interface handles all agents uniformly, making it easy to scale to hundreds or thousands of agents.
Validation
PufferLib enforces certain constraints on vectorized environments:
Valid configuration
Invalid configuration
vecenv = pufferlib.vector.make(
SamplePufferEnv,
num_envs = 4 , # Must divide num_workers
num_workers = 2 , # Must divide batch_size
batch_size = 4 , # Should be divisible by both
backend = pufferlib.vector.Multiprocessing
)
Best practices
Use in-place updates : Always update preallocated arrays in-place with array[:] = values instead of array = values
Define single-agent spaces : Set single_observation_space and single_action_space for individual agents, not the full batch
Initialize properly : Always call super().__init__(buf) after setting space attributes
Return correct types : reset() should return observations and a list of infos; step() should return observations, rewards, terminals, truncations, and a list of infos
Close resources : If your environment allocates resources, implement a close() method and call vecenv.close() when done
class ResourcefulPufferEnv ( pufferlib . PufferEnv ):
def __init__ ( self , buf = None , seed = 0 ):
self .single_observation_space = gymnasium.spaces.Box( low =- 1 , high = 1 , shape = ( 1 ,))
self .single_action_space = gymnasium.spaces.Discrete( 2 )
self .num_agents = 2
super (). __init__ (buf)
# Allocate resources
self .resource = self .allocate_resource()
def close ( self ):
# Clean up resources
if hasattr ( self , 'resource' ):
self .resource.cleanup()
def reset ( self , seed = 0 ):
self .observations[:] = self .observation_space.sample()
return self .observations, []
def step ( self , action ):
self .observations[:] = self .observation_space.sample()
infos = [{}]
return self .observations, self .rewards, self .terminals, self .truncations, infos