StatefulToolEnv
Environment for tasks where tools require per-rollout state (e.g., sandbox IDs, database connections, sessions).
Overview
StatefulToolEnv extends ToolEnv to support tools that need persistent state across multiple calls within a rollout:
- Hidden arguments: Some tool arguments can be hidden from the model and injected automatically
- State injection: Use
update_tool_args() to inject state into tool calls
- Per-rollout isolation: Each rollout maintains its own state
Inheritance
Environment
└── MultiTurnEnv
└── ToolEnv
└── StatefulToolEnv
Constructor
StatefulToolEnv(
tools: list[Callable] | None = None,
max_turns: int = 10,
error_formatter: Callable[[Exception], str] = lambda e: f"{e}",
stop_errors: list[type[Exception]] | None = None,
**kwargs
)
Same parameters as ToolEnv.
Do NOT pass tools in the constructor. Use add_tool() instead to specify hidden arguments.
Core Methods
def update_tool_args(
tool_name: str,
tool_args: dict,
messages: vf.Messages,
state: vf.State,
**kwargs
) -> dict
Abstract method - Must be implemented by subclasses.
Update tool arguments and/or state before calling the tool. Use this to inject hidden arguments.
Name of the tool being called.
Arguments from the model’s tool call.
Current rollout state (can be modified in-place).
Returns: dict - Updated tool arguments (including hidden args).
def add_tool(tool: Callable, args_to_skip: list[str] = [])
Add a tool with optionally hidden arguments.
Python function to add as a tool.
List of argument names to hide from the model’s view. These arguments will be removed from the tool schema but can be injected via update_tool_args().
def remove_tool(tool: Callable)
Remove a tool from the environment.
env_response
async def env_response(
messages: vf.Messages,
state: vf.State,
**kwargs
) -> vf.Messages
Process tool calls, calling update_tool_args() before each tool execution. Implemented by StatefulToolEnv - do not override.
Example Usage
Sandbox Environment
import verifiers as vf
def execute_code(code: str, sandbox_id: str) -> str:
"""Execute code in a sandbox.
Args:
code: Python code to execute
sandbox_id: Sandbox identifier (hidden from model)
"""
# Execute code in the specified sandbox
return run_in_sandbox(sandbox_id, code)
def read_file(path: str, sandbox_id: str) -> str:
"""Read a file from the sandbox.
Args:
path: File path to read
sandbox_id: Sandbox identifier (hidden from model)
"""
return read_from_sandbox(sandbox_id, path)
class SandboxEnv(vf.StatefulToolEnv):
def __init__(self, **kwargs):
super().__init__(max_turns=10, **kwargs)
# Add tools with sandbox_id hidden
self.add_tool(execute_code, args_to_skip=["sandbox_id"])
self.add_tool(read_file, args_to_skip=["sandbox_id"])
async def setup_state(self, state: vf.State) -> vf.State:
"""Create a sandbox for this rollout."""
state["sandbox_id"] = create_sandbox()
return state
def update_tool_args(
self,
tool_name: str,
tool_args: dict,
messages: vf.Messages,
state: vf.State,
**kwargs
) -> dict:
"""Inject sandbox_id into tool calls."""
# Add the hidden sandbox_id argument
tool_args["sandbox_id"] = state["sandbox_id"]
return tool_args
@vf.cleanup
async def cleanup_sandbox(self, state: vf.State):
"""Clean up sandbox after rollout."""
if "sandbox_id" in state:
destroy_sandbox(state["sandbox_id"])
def load_environment():
dataset = vf.Environment.make_dataset(
[
{"question": "Write a Python function to compute fibonacci numbers"},
]
)
def code_works(completion: vf.Messages) -> float:
# Check if code executed successfully
text = str(completion)
return 1.0 if "error" not in text.lower() else 0.0
return SandboxEnv(
dataset=dataset,
rubric=vf.Rubric(code_works),
system_prompt="Write and test code using the available tools."
)
Database Session
import verifiers as vf
import sqlite3
def query(sql: str, conn: sqlite3.Connection) -> list[dict]:
"""Execute a SQL query.
Args:
sql: SQL query to execute
conn: Database connection (hidden from model)
"""
cursor = conn.execute(sql)
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
def insert(table: str, data: dict, conn: sqlite3.Connection) -> str:
"""Insert data into a table.
Args:
table: Table name
data: Dictionary of column:value pairs
conn: Database connection (hidden from model)
"""
columns = ", ".join(data.keys())
placeholders = ", ".join(["?"] * len(data))
sql = f"INSERT INTO {table} ({columns}) VALUES ({placeholders})"
conn.execute(sql, list(data.values()))
conn.commit()
return "Inserted successfully"
class DatabaseEnv(vf.StatefulToolEnv):
def __init__(self, **kwargs):
super().__init__(max_turns=10, **kwargs)
# Hide database connection from model
self.add_tool(query, args_to_skip=["conn"])
self.add_tool(insert, args_to_skip=["conn"])
async def setup_state(self, state: vf.State) -> vf.State:
"""Create database connection for this rollout."""
state["db_conn"] = sqlite3.connect(":memory:")
# Setup schema
state["db_conn"].execute(
"CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT)"
)
return state
def update_tool_args(
self,
tool_name: str,
tool_args: dict,
messages: vf.Messages,
state: vf.State,
**kwargs
) -> dict:
"""Inject database connection."""
tool_args["conn"] = state["db_conn"]
return tool_args
@vf.cleanup
async def cleanup_db(self, state: vf.State):
"""Close database connection."""
if "db_conn" in state:
state["db_conn"].close()
def load_environment():
dataset = vf.Environment.make_dataset(
[
{"question": "Add a user named Alice and then query all users"},
]
)
return DatabaseEnv(
dataset=dataset,
rubric=vf.Rubric(lambda completion: 1.0),
system_prompt="Use SQL tools to manage the database."
)
API Session with Authentication
import verifiers as vf
import httpx
def api_get(endpoint: str, session: httpx.Client) -> dict:
"""Make GET request to API.
Args:
endpoint: API endpoint path
session: Authenticated HTTP session (hidden from model)
"""
response = session.get(f"https://api.example.com{endpoint}")
return response.json()
def api_post(endpoint: str, data: dict, session: httpx.Client) -> dict:
"""Make POST request to API.
Args:
endpoint: API endpoint path
data: Request body
session: Authenticated HTTP session (hidden from model)
"""
response = session.post(f"https://api.example.com{endpoint}", json=data)
return response.json()
class APIEnv(vf.StatefulToolEnv):
def __init__(self, api_key: str, **kwargs):
self.api_key = api_key
super().__init__(max_turns=10, **kwargs)
self.add_tool(api_get, args_to_skip=["session"])
self.add_tool(api_post, args_to_skip=["session"])
async def setup_state(self, state: vf.State) -> vf.State:
"""Create authenticated session."""
state["http_session"] = httpx.Client(
headers={"Authorization": f"Bearer {self.api_key}"}
)
return state
def update_tool_args(
self,
tool_name: str,
tool_args: dict,
messages: vf.Messages,
state: vf.State,
**kwargs
) -> dict:
"""Inject authenticated session."""
tool_args["session"] = state["http_session"]
return tool_args
@vf.cleanup
async def cleanup_session(self, state: vf.State):
"""Close HTTP session."""
if "http_session" in state:
state["http_session"].close()
def load_environment():
dataset = vf.Environment.make_dataset(
[{"question": "Fetch user data and create a new post"}]
)
return APIEnv(
api_key="your-api-key",
dataset=dataset,
rubric=vf.Rubric(lambda completion: 1.0)
)
Multi-Argument State Injection
import verifiers as vf
def process_data(
query: str,
user_id: str,
session_id: str,
api_key: str
) -> dict:
"""Process data with multiple hidden arguments.
Args:
query: User query
user_id: User identifier (hidden)
session_id: Session identifier (hidden)
api_key: API key (hidden)
"""
return {"result": f"Processed {query} for {user_id}"}
class MultiStateEnv(vf.StatefulToolEnv):
def __init__(self, api_key: str, **kwargs):
self.api_key = api_key
super().__init__(**kwargs)
# Hide multiple arguments
self.add_tool(
process_data,
args_to_skip=["user_id", "session_id", "api_key"]
)
async def setup_state(self, state: vf.State) -> vf.State:
"""Initialize multiple state values."""
state["user_id"] = state["input"].get("user_id", "default_user")
state["session_id"] = generate_session_id()
return state
def update_tool_args(
self,
tool_name: str,
tool_args: dict,
messages: vf.Messages,
state: vf.State,
**kwargs
) -> dict:
"""Inject all hidden arguments."""
tool_args["user_id"] = state["user_id"]
tool_args["session_id"] = state["session_id"]
tool_args["api_key"] = self.api_key
return tool_args
Conditional State Injection
import verifiers as vf
def read_file(path: str, sandbox_id: str | None = None) -> str:
"""Read file, optionally from sandbox."""
if sandbox_id:
return read_from_sandbox(sandbox_id, path)
return read_from_disk(path)
def write_file(path: str, content: str, sandbox_id: str | None = None) -> str:
"""Write file, optionally to sandbox."""
if sandbox_id:
return write_to_sandbox(sandbox_id, path, content)
return write_to_disk(path, content)
class ConditionalEnv(vf.StatefulToolEnv):
def __init__(self, use_sandbox: bool = True, **kwargs):
self.use_sandbox = use_sandbox
super().__init__(**kwargs)
self.add_tool(read_file, args_to_skip=["sandbox_id"])
self.add_tool(write_file, args_to_skip=["sandbox_id"])
async def setup_state(self, state: vf.State) -> vf.State:
if self.use_sandbox:
state["sandbox_id"] = create_sandbox()
return state
def update_tool_args(
self,
tool_name: str,
tool_args: dict,
messages: vf.Messages,
state: vf.State,
**kwargs
) -> dict:
# Only inject if sandbox is enabled
if self.use_sandbox:
tool_args["sandbox_id"] = state["sandbox_id"]
return tool_args
Schema Filtering
Hidden arguments are removed from the tool schema shown to the model:
def my_tool(visible_arg: str, hidden_arg: int) -> str:
"""Tool with hidden argument."""
return f"{visible_arg}-{hidden_arg}"
env = MyEnv()
env.add_tool(my_tool, args_to_skip=["hidden_arg"])
# Model sees:
# {
# "name": "my_tool",
# "parameters": {
# "properties": {"visible_arg": {"type": "string"}},
# "required": ["visible_arg"]
# }
# }
# But actual call includes hidden_arg via update_tool_args()
Common Patterns
Setup and Cleanup
Use setup_state() and @vf.cleanup for resource management:
class ResourceEnv(vf.StatefulToolEnv):
async def setup_state(self, state: vf.State) -> vf.State:
state["resource"] = create_resource()
return state
@vf.cleanup
async def cleanup_resource(self, state: vf.State):
if "resource" in state:
destroy_resource(state["resource"])
Inject different state for different tools:
def update_tool_args(self, tool_name, tool_args, messages, state, **kwargs):
if tool_name == "tool_a":
tool_args["resource"] = state["resource_a"]
elif tool_name == "tool_b":
tool_args["resource"] = state["resource_b"]
return tool_args
Dynamic State Updates
Modify state based on tool calls:
def update_tool_args(self, tool_name, tool_args, messages, state, **kwargs):
# Update state based on tool being called
state["last_tool"] = tool_name
state["call_count"] = state.get("call_count", 0) + 1
# Inject state
tool_args["context"] = state["call_count"]
return tool_args
When to Use
Use StatefulToolEnv for:
- Sandbox environments (code execution, file operations)
- Database transactions
- Authenticated API sessions
- Stateful workflows
- Tools requiring persistent connections
Use ToolEnv for:
- Stateless, idempotent tools
- Read-only operations
- Tools with no shared state
See Also