from pettingzoo.butterfly import cooperative_pong_v5import pufferlib.emulation# Create a PettingZoo parallel environmentenv = cooperative_pong_v5.parallel_env()# Wrap it with PufferLibenv = pufferlib.emulation.PettingZooPufferEnv(env=env)# Use the vectorized interfaceobs, info = env.reset()actions = env.action_space.sample()obs, rewards, terminals, truncations, info = env.step(actions)
The wrapper automatically handles multiple agents:
print(obs.shape) # (num_agents, obs_size)print(rewards.shape) # (num_agents,)print(env.num_agents) # Number of agents in environment
PettingZoo environments can have variable numbers of active agents. PufferLib handles this with agent masks:
from pettingzoo.butterfly import pistonball_v6import pufferlib.emulationenv = pistonball_v6.parallel_env()env = pufferlib.emulation.PettingZooPufferEnv(env=env)obs, info = env.reset()# Check which agents are activeprint(env.masks) # Boolean array: [True, True, True, ...]# Some agents may finish earlyactions = env.action_space.sample()obs, rewards, terminals, truncations, info = env.step(actions)# Masks indicate which agents are still activeactive_agents = env.masks.sum()print(f"{active_agents} agents remaining")
Inactive agents have their observations zeroed, terminals set to True, and masks set to False.
Butterfly environments are cooperative PettingZoo games. From pufferlib/environments/butterfly/environment.py:
from pettingzoo.utils.conversions import aec_to_parallel_wrapperimport pufferlib.emulationdef make(name='cooperative_pong_v5', buf=None): if name == 'cooperative_pong_v5': from pettingzoo.butterfly import cooperative_pong_v5 as pong env_cls = pong.raw_env elif name == 'knights_archers_zombies_v10': from pettingzoo.butterfly import knights_archers_zombies_v10 as kaz env_cls = kaz.raw_env else: raise ValueError(f'Unknown environment: {name}') # Convert AEC to parallel env = env_cls() env = aec_to_parallel_wrapper(env) return pufferlib.emulation.PettingZooPufferEnv(env=env, buf=buf)
Usage:
from pufferlib.environments import butterflyenv = butterfly.make('cooperative_pong_v5')obs, info = env.reset()for _ in range(100): # All agents act simultaneously actions = env.action_space.sample() obs, rewards, terminals, truncations, info = env.step(actions) if all(terminals) or all(truncations): print(f"Episode ended") obs, info = env.reset()
from pufferlib.environments import magentenv = magent.make('battle_v4')print(f"Agents: {env.num_agents}") # Many agents!obs, info = env.reset()for _ in range(1000): actions = env.action_space.sample() obs, rewards, terminals, truncations, info = env.step(actions) # Check how many agents are still active active = env.masks.sum() print(f"Active agents: {active}/{env.num_agents}") if env.done: obs, info = env.reset()
from pettingzoo.butterfly import cooperative_pong_v5import pufferlibimport pufferlib.emulationenv = cooperative_pong_v5.parallel_env()env = pufferlib.MultiagentEpisodeStats(env)env = pufferlib.emulation.PettingZooPufferEnv(env=env)obs, info = env.reset()for _ in range(100): actions = env.action_space.sample() obs, rewards, terminals, truncations, info = env.step(actions) # Info contains per-agent statistics if any(terminals): for agent, agent_info in info.items(): if 'episode_return' in agent_info: print(f"{agent}: {agent_info['episode_return']}")
from pettingzoo.butterfly import cooperative_pong_v5import pufferlibimport pufferlib.emulationenv = cooperative_pong_v5.parallel_env()env = pufferlib.MultiagentEpisodeStats(env)env = pufferlib.MeanOverAgents(env) # Average statsenv = pufferlib.emulation.PettingZooPufferEnv(env=env)obs, info = env.reset()for _ in range(100): actions = env.action_space.sample() obs, rewards, terminals, truncations, info = env.step(actions) # Info now contains mean values if any(terminals): print(f"Mean return: {info.get('episode_return', 0)}")