Overview
The Twitter agent extracts profile information and tweets from Twitter/X using a two-tier approach: twscrape (primary) for reverse-engineered GraphQL access, and browser-use Agent (fallback) for Google-first scraping.Implementation
backend/agents/twitter_agent.py
Architecture Decision
backend/agents/twitter_agent.py
- No API key required (uses reverse-engineered GraphQL)
- Fast and reliable (~2-5s per search)
- Returns structured data directly
- Free and actively maintained
- Requires Twitter API v2 access ($100/month minimum)
- Rate limits are strict
- No free tier for comprehensive data
Two-Tier Approach
async def _run_task(self, request: ResearchRequest) -> AgentResult:
# Try twscrape first (faster, no browser needed)
twscrape_result = await self._try_twscrape(request)
if twscrape_result and twscrape_result.status == AgentStatus.SUCCESS:
return twscrape_result
# Fallback to browser-use
logger.info("twscrape failed or empty, falling back to browser-use for twitter")
return await self._try_browser_use(request)
async def _try_twscrape(self, request: ResearchRequest) -> AgentResult | None:
try:
from twscrape import API
api = API()
query = self._build_search_query(request)
logger.info("twitter twscrape searching: {}", query)
profiles: list[SocialProfile] = []
snippets: list[str] = []
users = []
async for user in api.search(query, limit=5):
users.append(user)
if not users:
return None
for user in users[:3]:
user_dict = user.dict() if hasattr(user, "dict") else {}
username = user_dict.get("username", "")
display_name = user_dict.get("displayname", "")
bio = user_dict.get("rawDescription", "")
followers = user_dict.get("followersCount", 0)
following = user_dict.get("friendsCount", 0)
location = user_dict.get("location", "")
verified = user_dict.get("verified", False) or user_dict.get("blue", False)
tweets_count = user_dict.get("statusesCount", 0)
created = user_dict.get("created")
raw_data = {
**user_dict,
"tweets_count": tweets_count,
"account_created": str(created) if created else None,
}
profile = SocialProfile(
platform="twitter",
url=f"https://x.com/{username}" if username else "",
username=username,
display_name=display_name,
bio=bio,
followers=followers if followers else None,
following=following if following else None,
location=location or None,
verified=verified,
raw_data=raw_data,
)
profiles.append(profile)
if bio:
snippets.append(f"@{username}: {bio}")
if tweets_count:
snippets.append(f"@{username} tweets: {tweets_count}")
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.SUCCESS,
profiles=profiles,
snippets=snippets,
urls_found=[p.url for p in profiles if p.url],
)
except ImportError:
logger.warning("twscrape not available")
return None
except Exception as exc:
logger.warning("twscrape search failed: {}", str(exc))
return None
async def _try_browser_use(self, request: ResearchRequest) -> AgentResult:
if not self.configured:
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.FAILED,
error="Neither twscrape nor Browser Use available",
)
try:
query = self._build_search_query(request)
task = (
f"Go to https://www.google.com/search?q={query.replace(' ', '+')}+site:x.com+OR+site:twitter.com "
f"and use the extract tool to get this JSON from the Google results:\n"
f'{{"username": "", "display_name": "", "bio": "", '
f'"followers": 0, "following": 0, "profile_url": ""}}\n'
f"Extract from Google snippets. Do NOT click into X/Twitter. Do NOT scroll. "
f"After extracting, immediately call done with the JSON result."
)
agent = self._create_browser_agent(task, max_steps=3)
result = await agent.run()
final_result = result.final_result() if result else None
if final_result:
parsed = _parse_twitter_output(str(final_result), request.person_name)
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.SUCCESS,
profiles=[parsed["profile"]],
snippets=parsed["snippets"],
urls_found=[parsed["profile"].url] if parsed["profile"].url else [],
)
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.SUCCESS,
snippets=["No Twitter profile found"],
)
except Exception as exc:
logger.error("twitter browser-use fallback error: {}", str(exc))
return AgentResult(
agent_name=self.agent_name,
status=AgentStatus.FAILED,
error=f"Twitter browser fallback error: {exc}",
)
Output Parsing
JSON Extraction (Browser-Use Path)
backend/agents/twitter_agent.py
Profile Parsing
backend/agents/twitter_agent.py
Human Number Parsing
backend/agents/models.py
Extracted Data
The Twitter agent extracts:- Profile Info: Username, display name, bio
- Metrics: Followers, following, tweet count
- Location: User’s listed location
- Verification: Verified badge status (blue checkmark)
- Recent Tweets: Latest tweet content
- Interests: Inferred topics of interest
- Account Age: Account creation date (twscrape only)
- Profile URL: Direct link to Twitter/X profile
Usage Example
Performance
twscrape Path
- Duration: 2-5s typical
- Cost: Free (no API key)
- Success Rate: ~90%
- Data Quality: High (direct Twitter GraphQL)
Browser-Use Fallback
- Duration: 5-10s (Google snippets only)
- Cost: Browser Use API usage
- Success Rate: ~55%
- Data Quality: Medium (summary from Google knowledge panel)
Installing twscrape
Troubleshooting
twscrape Not Installed
Empty Results
Rate Limiting (twscrape)
Username vs Display Name
Follower Count Parsing
Advanced Usage
Getting Recent Tweets (twscrape)
Multiple Profile Search
Next Steps
Instagram Agent
Instagram profile extraction with Cloud SDK
Google Agent
Google search-based intelligence gathering
LinkedIn Agent
Professional profile scraping
Agent Overview
Full agent system architecture