Overview
JARVIS uses Browser Use for autonomous web navigation and data extraction. Browser Use provides both a local SDK and a Cloud API for running browser agents at scale.Architecture Decision
backend/agents/browser_use_client.py
# RESEARCH: Browser Use Cloud API v2 (docs.cloud.browser-use.com)
# DECISION: Using httpx async client — pure REST, no SDK needed
# ALT: browser-use pip package (not needed for Cloud API)
- Browser Use Cloud API (REST) - For marketplace skills and task execution
- Browser Use SDK (Python) - For custom agents with browser-use Agent class
Cloud API Client
Implementation
backend/agents/browser_use_client.py
import httpx
from loguru import logger
BU_BASE = "https://api.browser-use.com/api/v2"
class BrowserUseClient:
"""Async client for Browser Use Cloud API v2."""
def __init__(self, settings: Settings):
self._key = settings.browser_use_api_key
self._headers = {"X-Browser-Use-API-Key": self._key or ""}
def _check(self, resp: httpx.Response) -> dict[str, Any]:
if resp.status_code >= 400:
logger.error("Browser Use API error: {} {}", resp.status_code, resp.text[:200])
raise BrowserUseError(resp.status_code, resp.text[:200])
return resp.json()
Creating Sessions
backend/agents/browser_use_client.py
async def create_session(
self,
start_url: str | None = None,
width: int = 1280,
height: int = 800,
) -> dict[str, Any]:
"""POST /api/v2/sessions — create a cloud browser session."""
body: dict[str, Any] = {
"browserScreenWidth": width,
"browserScreenHeight": height,
"keepAlive": True,
}
if start_url:
body["startUrl"] = start_url
async with httpx.AsyncClient(timeout=30) as c:
resp = await c.post(f"{BU_BASE}/sessions", json=body, headers=self._headers)
result = self._check(resp)
logger.info("Created BU session: {}", result.get("id"))
return result
Creating Tasks
backend/agents/browser_use_client.py
async def create_task(
self,
session_id: str,
task: str,
start_url: str | None = None,
allowed_domains: list[str] | None = None,
) -> dict[str, Any]:
"""POST /api/v2/tasks — create a task attached to a session."""
body: dict[str, Any] = {
"task": task,
"sessionId": session_id,
"maxSteps": 50,
"vision": True,
}
if start_url:
body["startUrl"] = start_url
if allowed_domains:
body["allowedDomains"] = allowed_domains
async with httpx.AsyncClient(timeout=30) as c:
resp = await c.post(f"{BU_BASE}/tasks", json=body, headers=self._headers)
result = self._check(resp)
logger.info("Created BU task: {} on session {}", result.get("id"), session_id)
return result
Getting Task Status
backend/agents/browser_use_client.py
async def get_task(self, task_id: str) -> dict[str, Any]:
"""GET /api/v2/tasks/{id} — get task with steps and status."""
async with httpx.AsyncClient(timeout=30) as c:
resp = await c.get(f"{BU_BASE}/tasks/{task_id}", headers=self._headers)
return self._check(resp)
Making Sessions Public
backend/agents/browser_use_client.py
async def make_session_public(self, session_id: str) -> dict[str, Any]:
"""POST /api/v2/sessions/{id}/public-share — get public replay URL."""
async with httpx.AsyncClient(timeout=30) as c:
resp = await c.post(
f"{BU_BASE}/sessions/{session_id}/public-share",
headers=self._headers,
)
result = self._check(resp)
logger.info("Public share created for session {}: {}", session_id, result.get("shareUrl"))
return result
Cloud Skills Runner
TheCloudSkillRunner wraps the Cloud API to run marketplace skills:
backend/agents/cloud_skills.py
# Browser Use Cloud SDK skill runner + autonomous task executor
from browser_use_sdk import AsyncBrowserUse
class CloudSkillRunner:
"""Runs Browser Use Cloud tasks — with or without marketplace skills.
Two modes:
1. run_skill(skill_name, task) — guided by a marketplace/cloned skill
2. run_task(task) — freeform browser task (for deep URL extraction)
"""
def __init__(self, settings: Settings):
self._api_key = settings.browser_use_api_key
self._profile_id = settings.browser_use_profile_id
self._op_vault_id = settings.op_vault_id
self._client = None
def _get_client(self):
if self._client is not None:
return self._client
try:
from browser_use_sdk import AsyncBrowserUse
self._client = AsyncBrowserUse(api_key=self._api_key)
return self._client
except ImportError:
logger.warning("browser_use_sdk not installed")
return None
Marketplace Skills
backend/agents/cloud_skills.py
# Cloned skill registry (26 skills in account)
SKILL_IDS = {
# Social Media — person research
"tiktok_profile": "60e6940b-eb2c-4d54-aa54-0012ff6b6a8d",
"instagram_posts": "dc6b4dac-f983-488b-9f15-81cdef09a2a5",
"facebook_page": "07bca652-0668-4836-9a89-68f728a6b922",
"linktree_profile": "4d3cc402-f840-4939-9f2b-0f707adf67d1",
"pinterest_pins": "7dffd877-5c0f-4c72-a1c2-6f7d641a3283",
"youtube_filmography": "f5d4c977-126e-4374-b1b3-8b02942dfa1c",
"reddit_subreddit": "4693909a-7d0e-4628-8e0d-1cab01271a3c",
# Professional / OSINT
"linkedin_company_posts": "b50a1f27-22a6-489f-9768-83c0c52e8207",
"company_employees": "47634a96-110e-4da0-ace5-8c520af7d9c3",
"github_profile": "bcc9987f-7bc7-4af5-8c8e-7046be487e64",
"sec_filings": "ce6cc606-837a-44d0-94e7-977d8b470113",
"whois_lookup": "a9bf3a53-d58f-4a09-9a29-d5663a33937b",
"osint_scraper": "2a607934-bf3e-43d9-aaa2-30f2b419eaf5",
"yc_company": "cc661bb6-e3f3-4a48-9012-08b252a0a0bb",
"ancestry_records": "b985c331-4515-4702-bc64-be75f429aee3",
}
Running Skills
backend/agents/cloud_skills.py
async def run_skill(
self,
skill_name: str,
task: str,
*,
max_steps: int = 5,
timeout: float = 60.0,
secrets: dict[str, str] | None = None,
allowed_domains: list[str] | None = None,
) -> dict | None:
"""Run a skill-guided task and wait for completion."""
skill_id = SKILL_IDS.get(skill_name)
if not skill_id:
logger.error("cloud_skills: unknown skill '{}'", skill_name)
return None
return await self._create_and_poll(
task,
skill_ids=[skill_id],
max_steps=max_steps,
timeout=timeout,
secrets=secrets,
allowed_domains=allowed_domains,
label=skill_name,
)
Polling Tasks
backend/agents/cloud_skills.py
POLL_INTERVAL_SECONDS = 2.0
async def _create_and_poll(
self,
task: str,
*,
skill_ids: list[str] | None = None,
max_steps: int = 10,
timeout: float = 60.0,
secrets: dict[str, str] | None = None,
allowed_domains: list[str] | None = None,
label: str = "task",
) -> dict | None:
"""Core create_task + polling loop."""
client = self._get_client()
if client is None:
return None
try:
create_kwargs: dict = {
"task": task,
"llm": "browser-use-2.0",
"max_steps": max_steps,
}
if skill_ids:
create_kwargs["skill_ids"] = skill_ids
if secrets:
create_kwargs["secrets"] = secrets
result = await client.tasks.create_task(**create_kwargs)
task_id = result.id
live_url = getattr(result, "live_url", None)
max_polls = int(timeout / POLL_INTERVAL_SECONDS)
for _ in range(max_polls):
await asyncio.sleep(POLL_INTERVAL_SECONDS)
status = await client.tasks.get_task_status(task_id)
if status.status in ("finished", "stopped"):
output = {
"output": status.output or "",
"success": bool(status.is_success),
"cost": str(status.cost) if status.cost else "unknown",
"task_id": task_id,
"live_url": live_url,
"label": label,
}
logger.info(
"cloud_skills: completed label={} success={} cost={}",
label, output["success"], output["cost"],
)
return output
logger.warning("cloud_skills: timed out label={} after {:.0f}s", label, timeout)
return {"output": "", "success": False, "task_id": task_id, "timed_out": True}
except Exception as exc:
logger.error("cloud_skills: error label={}: {}", label, str(exc))
return None
Browser Use SDK Integration
BaseBrowserAgent
Agents that need custom browser automation use the SDK:backend/agents/browser_agent.py
from browser_use import Agent, Browser, ChatBrowserUse
from langchain_openai import ChatOpenAI
class BaseBrowserAgent(ABC):
def _build_llm(self):
"""Build the LLM instance for browser-use agents.
Prefers ChatBrowserUse (optimized for browser automation, 3-5x faster).
Falls back to ChatOpenAI with gpt-4o-mini if OPENAI_API_KEY is set.
"""
if self._settings.browser_use_api_key:
try:
from browser_use import ChatBrowserUse
logger.debug("agent={} using ChatBrowserUse bu-2-0", self.agent_name)
return ChatBrowserUse(model="bu-2-0")
except (ImportError, Exception) as exc:
logger.debug("agent={} ChatBrowserUse unavailable, trying ChatOpenAI", self.agent_name)
if self._settings.openai_api_key:
from langchain_openai import ChatOpenAI
logger.debug("agent={} using ChatOpenAI gpt-4o-mini", self.agent_name)
return ChatOpenAI(
model="gpt-4o-mini",
api_key=self._settings.openai_api_key,
)
raise RuntimeError("No LLM configured: set BROWSER_USE_API_KEY or OPENAI_API_KEY")
Creating Agents
backend/agents/browser_agent.py
def _create_browser_agent(
self, task: str, *, max_steps: int = 10, needs_login: bool = False,
):
"""Create a Browser Use Agent optimized for speed.
Speed optimizations:
- flash_mode=True for faster LLM responses
- max_failures=2 to fail fast instead of retrying 5x
- enable_planning=False to skip planning overhead on focused tasks
- step_timeout=60 to kill stuck navigation steps quickly
- max_actions_per_step=3 to keep each step small
- use_vision="auto" to only send screenshots when needed
"""
from browser_use import Agent
llm = self._build_llm()
agent_kwargs: dict = {
"task": task,
"llm": llm,
"max_failures": 2,
"flash_mode": True,
"enable_planning": False,
"step_timeout": 60,
"max_actions_per_step": 3,
"use_vision": "auto",
}
if self._settings.browser_use_api_key:
try:
from browser_use import Browser
browser = Browser(use_cloud=True)
agent_kwargs["browser"] = browser
logger.debug("agent={} using Browser Use cloud", self.agent_name)
except Exception as exc:
logger.warning("agent={} cloud browser setup failed: {}", self.agent_name, str(exc))
return Agent(**agent_kwargs)
Usage Examples
Using Cloud Skills
from agents.cloud_skills import CloudSkillRunner
from config import Settings
settings = Settings()
runner = CloudSkillRunner(settings)
# Run a marketplace skill
result = await runner.run_skill(
"tiktok_profile",
"Get TikTok profile info for Elon Musk",
timeout=60.0,
)
if result and result["success"]:
print(f"Output: {result['output']}")
print(f"Cost: {result['cost']}")
print(f"Live URL: {result['live_url']}")
Using SDK Agents
from agents.browser_agent import BaseBrowserAgent
from agents.models import AgentResult, ResearchRequest
class CustomAgent(BaseBrowserAgent):
agent_name = "custom"
async def _run_task(self, request: ResearchRequest) -> AgentResult:
query = self._build_search_query(request)
task = f"Search for {query} and extract information"
agent = self._create_browser_agent(task, max_steps=5)
result = await agent.run()
final_result = result.final_result() if result else None
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.SUCCESS,
snippets=[str(final_result)],
)
Deep URL Extraction
from agents.cloud_skills import CloudSkillRunner
runner = CloudSkillRunner(settings)
result = await runner.deep_extract_url(
url="https://example.com/profile",
person_name="John Doe",
timeout=60.0,
)
if result and result["success"]:
print(f"Extracted: {result['output']}")
Environment Variables
.env
# Browser Use Cloud API key
BROWSER_USE_API_KEY=bu_...
# Optional: Browser Use Profile ID for synced cookies
BROWSER_USE_PROFILE_ID=profile_...
# Optional: 1Password Vault ID for cross-session auth
OP_VAULT_ID=vault_...
# Fallback: OpenAI API key for local browser agents
OPENAI_API_KEY=sk-...
Troubleshooting
API Key Not Working
# Verify API key is set
from config import Settings
settings = Settings()
print(f"Browser Use configured: {bool(settings.browser_use_api_key)}")
Task Timing Out
# Increase timeout and max_steps
result = await runner.run_skill(
"tiktok_profile",
"Get TikTok profile",
max_steps=10, # Default is 5
timeout=120.0, # Default is 60s
)
Session Not Loading
# Check session status
client = BrowserUseClient(settings)
session = await client.get_session(session_id)
print(f"Session status: {session.get('status')}")
print(f"Live URL: {session.get('liveUrl')}")
Cloud SDK Import Error
# Install browser-use-sdk
pip install browser-use-sdk
Performance Tips
- Use Skills for Known Platforms: Marketplace skills are faster than freeform tasks
- Limit max_steps: Default to 5 for focused tasks, use 10+ only for complex workflows
- Set Tight Timeouts: Use 30-60s for simple tasks, 120s+ for complex multi-step workflows
- Parallel Execution: Use
run_parallel()to run multiple skills concurrently - Semaphore Limiting: DeepResearcher limits to 25 concurrent sessions to avoid rate limits
Next Steps
LinkedIn Agent
See Browser Use in action for LinkedIn scraping
Deep Researcher
Multi-phase pipeline using Cloud Skills