Overview
The LinkedIn agent extracts professional information from LinkedIn profiles using a two-tier approach: Browser Use Cloud SDK skills (primary) and browser-use Agent (fallback).Implementation
backend/agents/linkedin_agent.py
Architecture Decision
backend/agents/linkedin_agent.py
Two-Tier Approach
async def _run_task(self, request: ResearchRequest) -> AgentResult:
# Try Cloud SDK skill first
if self._cloud.configured:
cloud_result = await self._try_cloud_skill(request)
if (
cloud_result
and cloud_result.status == AgentStatus.SUCCESS
and cloud_result.profiles
):
return cloud_result
# Fallback to Google-scraping
return await self._try_browser_use(request)
async def _try_cloud_skill(self, request: ResearchRequest) -> AgentResult | None:
"""Try LinkedIn marketplace skills via Cloud SDK."""
query = self._build_search_query(request)
# Use LinkedIn Company Posts skill for company-affiliated searches
task = (
f"Find the LinkedIn profile for {query} and extract their professional information "
f"including full name, headline, current company, title, location, and about section."
)
try:
result = await self._cloud.run_skill(
"linkedin_company_posts",
task,
timeout=45.0,
)
if not result or not result.get("success"):
logger.info("linkedin cloud skill returned no result, falling back")
return None
output = result.get("output", "")
parsed = _parse_linkedin_output(output, request.person_name)
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.SUCCESS,
profiles=[parsed["profile"]],
snippets=parsed["snippets"],
urls_found=[parsed["profile"].url] if parsed["profile"].url else [],
)
except Exception as exc:
logger.warning("linkedin cloud skill error: {}", str(exc))
return None
async def _try_browser_use(self, request: ResearchRequest) -> AgentResult:
"""Fallback: Google-first scraping via browser-use Agent."""
if not self.configured:
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.FAILED,
error="Browser Use not configured (BROWSER_USE_API_KEY or OPENAI_API_KEY missing)",
)
query = self._build_search_query(request)
logger.info("linkedin agent (fallback) searching: {}", query)
try:
task = (
f"Go to https://www.google.com/search?q={query.replace(' ', '+')}+LinkedIn+profile "
f"and use the extract tool on the search results to get this JSON:\n"
f'{{"full_name": "", "headline": "", "location": "", "about": "", '
f'"current_company": "", "current_title": "", "profile_url": ""}}\n'
f"Extract from Google's snippets and knowledge panel. "
f"Do NOT click into LinkedIn. Do NOT scroll. "
f"After extracting, immediately call done with the JSON result."
)
agent = self._create_browser_agent(task, max_steps=3)
result = await agent.run()
final_result = result.final_result() if result else None
if final_result:
parsed = _parse_linkedin_output(str(final_result), request.person_name)
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.SUCCESS,
profiles=[parsed["profile"]],
snippets=parsed["snippets"],
urls_found=[parsed["profile"].url] if parsed["profile"].url else [],
)
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.SUCCESS,
snippets=["No LinkedIn profile found"],
)
except Exception as exc:
logger.error("linkedin agent error: {}", str(exc))
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.FAILED,
error=f"LinkedIn agent error: {exc}",
)
Output Parsing
JSON Extraction
Robustly extracts JSON from Cloud SDK or browser-use output:backend/agents/linkedin_agent.py
Profile Parsing
backend/agents/linkedin_agent.py
Extracted Data
The LinkedIn agent extracts:- Profile Info: Full name, headline, location, about section
- Current Role: Company and title
- Experience: Job history with titles and companies
- Education: Degrees, fields of study, schools
- Skills: Endorsed skills list
- Connections: Connection count (converted to followers)
- Recent Posts: Latest LinkedIn activity
- Profile URL: Direct link to LinkedIn profile
Usage Example
Performance
Cloud SDK Path
- Duration: 20-30s typical
- Cost: $0.01 per run
- Success Rate: ~85%
- Data Quality: High (direct LinkedIn extraction)
Browser-Use Fallback
- Duration: 5-10s (Google snippets only)
- Cost: Browser Use API usage
- Success Rate: ~60% (limited to Google knowledge panel)
- Data Quality: Medium (summary data only)
Troubleshooting
Cloud Skill Failing
Empty Results
Timeout Issues
Rate Limiting
Next Steps
Twitter Agent
Twitter/X scraping with twscrape
Instagram Agent
Instagram profile extraction
Browser Use Integration
Deep dive into Cloud SDK skills
Agent Overview
Full agent system architecture