The Browser Automation toolkit enables AI agents to interact with websites autonomously, performing complex web-based tasks like research, form filling, e-commerce transactions, and data extraction without manual intervention.
from browser_agent import BrowserToolimport asyncio# Initialize the toolbrowser_tool = BrowserTool()# Run a simple taskasync def browse(): result = await browser_tool._arun( task="Go to Wikipedia and summarize the article about Python programming" ) print(result)asyncio.run(browse())
# Use the synchronous wrapperresult = browser_tool._run( task="Search Google for 'machine learning tutorials' and return the top 3 results")print(result)
from langchain_core.tools import BaseToolfrom browser_use import Agent, Browser, BrowserConfigfrom langchain_anthropic import ChatAnthropicclass BrowserTool(BaseTool): """Tool for autonomous web browsing and research.""" name: Literal["browser_agent"] = "browser_agent" description: str = """Use this tool for web-based tasks requiring browser interaction. Input should be a clear description of what you want to accomplish online. Examples: - "Order a large pepperoni pizza from Domino's" - "Browse Amazon and add a Nintendo Switch to cart" - "Research World War II and summarize key points" - "Compare flight prices from NYC to London" """ llm: ChatAnthropic = Field( default_factory=lambda: ChatAnthropic(model="claude-3-5-sonnet-latest") ) browser: Browser = Field( default_factory=lambda: Browser( config=BrowserConfig( chrome_instance_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', ) ) )
from langchain_anthropic import ChatAnthropicfrom browser_agent import BrowserTool# Use a different Claude modelcustom_llm = ChatAnthropic( model="claude-3-opus-20240229", temperature=0.7, max_tokens=4096)browser_tool = BrowserTool(llm=custom_llm)
# Order food deliveryresult = browser_tool._run( task="Order a large pepperoni pizza from Domino's for delivery to my address")# Shopping cart managementresult = browser_tool._run( task="Browse Amazon and add a Nintendo Switch, two controllers, and Zelda game to cart")# Price comparisonresult = browser_tool._run( task="Compare prices for iPhone 15 Pro on Amazon, Best Buy, and Apple.com")
The tool assumes billing and shipping information is already saved on websites. It will not ask for payment details.
# Academic researchresult = browser_tool._run( task="Research the key events of World War II and create a timeline with dates")# Technical documentationresult = browser_tool._run( task="Find the official Python documentation for asyncio and summarize the main concepts")# Market researchresult = browser_tool._run( task="Research the top 5 CRM software solutions and compare their pricing and features")
# Newsletter signupresult = browser_tool._run( task="Sign up for the TechCrunch newsletter using my email")# Gym membershipresult = browser_tool._run( task="Navigate to Planet Fitness website and start the membership signup process")# Service schedulingresult = browser_tool._run( task="Schedule a grocery delivery from Whole Foods for tomorrow between 3-5 PM")
# Flight searchresult = browser_tool._run( task="Search for round-trip flights from NYC to London departing next month, return cheapest options")# Hotel booking researchresult = browser_tool._run( task="Find hotels in San Francisco near Moscone Center under $200/night with good reviews")
from langchain_core.tools import BaseToolfrom typing import Listfrom .browser_tool import BrowserToolclass BrowserToolkit: """Toolkit for browser automation capabilities.""" def __init__(self, llm=None): self.llm = llm def get_tools(self) -> List[BaseTool]: """Get the list of tools in the toolkit.""" return [BrowserTool(llm=self.llm)] @classmethod def from_llm(cls, llm=None) -> "BrowserToolkit": """Create a BrowserToolkit from an LLM.""" return cls(llm=llm)
async def complex_workflow(): browser_tool = BrowserTool() # Step 1: Research research = await browser_tool._arun( task="Find the top-rated Italian restaurants in San Francisco on Yelp" ) # Step 2: Price comparison prices = await browser_tool._arun( task="Compare menu prices for the top 3 restaurants from the previous search" ) # Step 3: Reservation (example - won't actually book) info = await browser_tool._arun( task="Get reservation availability for tonight at the highest-rated restaurant" ) return { "research": research, "prices": prices, "availability": info }result = asyncio.run(complex_workflow())
from browser_use import Browser, BrowserConfigfrom browser_agent import BrowserTool# Configure browser to save screenshotsconfig = BrowserConfig( chrome_instance_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', save_screenshots=True, screenshot_dir='./browser_screenshots')browser = Browser(config=config)browser_tool = BrowserTool(browser=browser)# Screenshots will be automatically saved during executionresult = browser_tool._run( task="Navigate to the OpenAI website and capture the homepage")
# Specific and actionable"Order a large pepperoni pizza from Domino's for delivery""Find the documentation for React hooks and summarize useState""Search Amazon for wireless headphones under $100 and sort by rating""Navigate to GitHub, search for 'langchain', and get the star count"
# Too vague"Buy something" # What? Where?"Research AI" # Too broad, no specific goal"Go to Google" # No action specified"Find prices" # For what product? Which sites?
# Never hardcode sensitive information# BADresult = browser_tool._run( task="Login to example.com with username: admin, password: secret123")# GOOD - Use environment variables or secure credential storageimport osusername = os.getenv('APP_USERNAME')password = os.getenv('APP_PASSWORD')result = browser_tool._run( task=f"Login to example.com with saved credentials")
For production use, integrate with secure credential management systems like AWS Secrets Manager or HashiCorp Vault.
from browser_use import Browser, BrowserConfigconfig = BrowserConfig( chrome_instance_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', headless=False, # See the browser in action save_screenshots=True, screenshot_dir='./debug_screenshots')browser = Browser(config=config)browser_tool = BrowserTool(browser=browser)# Run task with visible browserresult = browser_tool._run("Navigate to example.com and click the login button")