Skip to main content

Overview

The Twitter agent extracts profile information and tweets from Twitter/X using a two-tier approach: twscrape (primary) for reverse-engineered GraphQL access, and browser-use Agent (fallback) for Google-first scraping.

Implementation

backend/agents/twitter_agent.py
class TwitterAgent(BaseBrowserAgent):
    """Scrapes Twitter/X profiles using twscrape, falls back to browser-use.
    
    Extracts: bio, tweets, followers/following, interests, verified status.
    twscrape uses reverse-engineered GraphQL (no API key needed).
    """
    
    agent_name = "twitter"
    
    def __init__(self, settings: Settings, *, inbox_pool=None):
        super().__init__(settings, inbox_pool=inbox_pool)

Architecture Decision

backend/agents/twitter_agent.py
# RESEARCH: Checked twscrape (2.5k stars, updated Jan 2026), tweepy (10k stars, API v2 only)
# DECISION: twscrape primary (reverse GraphQL, no API key needed), browser-use fallback
# ALT: tweepy requires paid API access ($100/mo minimum)
Why twscrape?
  • No API key required (uses reverse-engineered GraphQL)
  • Fast and reliable (~2-5s per search)
  • Returns structured data directly
  • Free and actively maintained
Why not tweepy?
  • Requires Twitter API v2 access ($100/month minimum)
  • Rate limits are strict
  • No free tier for comprehensive data

Two-Tier Approach

1
Try twscrape First
2
Uses reverse-engineered GraphQL for fast, structured extraction:
3
async def _run_task(self, request: ResearchRequest) -> AgentResult:
    # Try twscrape first (faster, no browser needed)
    twscrape_result = await self._try_twscrape(request)
    if twscrape_result and twscrape_result.status == AgentStatus.SUCCESS:
        return twscrape_result
    
    # Fallback to browser-use
    logger.info("twscrape failed or empty, falling back to browser-use for twitter")
    return await self._try_browser_use(request)
4
twscrape Implementation
5
async def _try_twscrape(self, request: ResearchRequest) -> AgentResult | None:
    try:
        from twscrape import API
        
        api = API()
        query = self._build_search_query(request)
        logger.info("twitter twscrape searching: {}", query)
        
        profiles: list[SocialProfile] = []
        snippets: list[str] = []
        
        users = []
        async for user in api.search(query, limit=5):
            users.append(user)
        
        if not users:
            return None
        
        for user in users[:3]:
            user_dict = user.dict() if hasattr(user, "dict") else {}
            username = user_dict.get("username", "")
            display_name = user_dict.get("displayname", "")
            bio = user_dict.get("rawDescription", "")
            followers = user_dict.get("followersCount", 0)
            following = user_dict.get("friendsCount", 0)
            location = user_dict.get("location", "")
            verified = user_dict.get("verified", False) or user_dict.get("blue", False)
            tweets_count = user_dict.get("statusesCount", 0)
            created = user_dict.get("created")
            
            raw_data = {
                **user_dict,
                "tweets_count": tweets_count,
                "account_created": str(created) if created else None,
            }
            
            profile = SocialProfile(
                platform="twitter",
                url=f"https://x.com/{username}" if username else "",
                username=username,
                display_name=display_name,
                bio=bio,
                followers=followers if followers else None,
                following=following if following else None,
                location=location or None,
                verified=verified,
                raw_data=raw_data,
            )
            profiles.append(profile)
            
            if bio:
                snippets.append(f"@{username}: {bio}")
            if tweets_count:
                snippets.append(f"@{username} tweets: {tweets_count}")
        
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.SUCCESS,
            profiles=profiles,
            snippets=snippets,
            urls_found=[p.url for p in profiles if p.url],
        )
    
    except ImportError:
        logger.warning("twscrape not available")
        return None
    
    except Exception as exc:
        logger.warning("twscrape search failed: {}", str(exc))
        return None
6
Browser-Use Fallback
7
If twscrape fails or isn’t available, falls back to Google-first scraping:
8
async def _try_browser_use(self, request: ResearchRequest) -> AgentResult:
    if not self.configured:
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.FAILED,
            error="Neither twscrape nor Browser Use available",
        )
    
    try:
        query = self._build_search_query(request)
        task = (
            f"Go to https://www.google.com/search?q={query.replace(' ', '+')}+site:x.com+OR+site:twitter.com "
            f"and use the extract tool to get this JSON from the Google results:\n"
            f'{{"username": "", "display_name": "", "bio": "", '
            f'"followers": 0, "following": 0, "profile_url": ""}}\n'
            f"Extract from Google snippets. Do NOT click into X/Twitter. Do NOT scroll. "
            f"After extracting, immediately call done with the JSON result."
        )
        
        agent = self._create_browser_agent(task, max_steps=3)
        result = await agent.run()
        final_result = result.final_result() if result else None
        
        if final_result:
            parsed = _parse_twitter_output(str(final_result), request.person_name)
            return AgentResult(
                agent_name=self.agent_name,
                status=AgentStatus.SUCCESS,
                profiles=[parsed["profile"]],
                snippets=parsed["snippets"],
                urls_found=[parsed["profile"].url] if parsed["profile"].url else [],
            )
        
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.SUCCESS,
            snippets=["No Twitter profile found"],
        )
    
    except Exception as exc:
        logger.error("twitter browser-use fallback error: {}", str(exc))
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.FAILED,
            error=f"Twitter browser fallback error: {exc}",
        )

Output Parsing

JSON Extraction (Browser-Use Path)

backend/agents/twitter_agent.py
def _extract_json(raw: str) -> dict:
    """Robustly extract JSON from browser-use output.
    
    Handles: markdown code fences, escaped quotes, surrounding text.
    """
    # Strip markdown code fences if present
    cleaned = raw.strip()
    if "```json" in cleaned:
        cleaned = cleaned.split("```json", 1)[1]
        cleaned = cleaned.split("```", 1)[0]
    elif "```" in cleaned:
        cleaned = cleaned.split("```", 1)[1]
        cleaned = cleaned.split("```", 1)[0]
    
    cleaned = cleaned.strip()
    
    # Try direct parse first
    try:
        return json.loads(cleaned)
    except (json.JSONDecodeError, ValueError):
        pass
    
    # Find the outermost { } and parse
    start = cleaned.find("{")
    end = cleaned.rfind("}") + 1
    if start >= 0 and end > start:
        try:
            return json.loads(cleaned[start:end])
        except (json.JSONDecodeError, ValueError):
            pass
    
    # Try on original raw input
    start = raw.find("{")
    end = raw.rfind("}") + 1
    if start >= 0 and end > start:
        try:
            return json.loads(raw[start:end])
        except (json.JSONDecodeError, ValueError):
            pass
    
    return {}

Profile Parsing

backend/agents/twitter_agent.py
def _parse_twitter_output(raw_output: str, person_name: str) -> dict:
    """Parse browser-use output into structured Twitter profile data."""
    data = _extract_json(raw_output)
    
    username = data.get("username", "")
    display_name = data.get("display_name", person_name)
    bio = data.get("bio", "")
    followers = parse_human_number(data.get("followers"))
    following = parse_human_number(data.get("following"))
    tweets_count = parse_human_number(data.get("tweets_count"))
    location = data.get("location")
    verified = data.get("verified", False)
    recent_tweets = data.get("recent_tweets", [])
    interests = data.get("interests", [])
    profile_url = data.get("profile_url", "")
    
    raw_data = {
        "tweets_count": tweets_count,
        "recent_tweets": recent_tweets,
        "interests": interests,
        "browser_use_output": raw_output,
    }
    
    profile = SocialProfile(
        platform="twitter",
        url=profile_url if profile_url else f"https://x.com/search?q={person_name}",
        username=username or None,
        display_name=display_name,
        bio=bio or None,
        followers=followers,
        following=following,
        location=location or None,
        verified=bool(verified),
        raw_data=raw_data,
    )
    
    snippets: list[str] = []
    if bio:
        snippets.append(f"@{username}: {bio}" if username else f"Twitter: {bio}")
    if followers is not None:
        snippets.append(f"Followers: {followers:,}" if isinstance(followers, int) else "")
    if recent_tweets:
        for tweet in recent_tweets[:3]:
            text = tweet.get("text", "")
            if text:
                snippets.append(f"Tweet: {text[:150]}")
    if interests:
        snippets.append(f"Interests: {', '.join(interests[:8])}")
    if not snippets:
        snippets.append(raw_output[:500])
    
    # Filter empty strings
    snippets = [s for s in snippets if s]
    
    return {"profile": profile, "snippets": snippets}

Human Number Parsing

backend/agents/models.py
def parse_human_number(value) -> int | None:
    """Parse human-readable numbers like '5.5K+', '10M', '1,234' into integers."""
    if value is None:
        return None
    if isinstance(value, int):
        return value
    if isinstance(value, float):
        return int(value)
    s = str(value).strip().replace(",", "").replace("+", "").upper()
    if not s:
        return None
    multipliers = {"K": 1_000, "M": 1_000_000, "B": 1_000_000_000}
    for suffix, mult in multipliers.items():
        if s.endswith(suffix):
            try:
                return int(float(s[:-1]) * mult)
            except (ValueError, TypeError):
                return None
    try:
        return int(float(s))
    except (ValueError, TypeError):
        return None

Extracted Data

The Twitter agent extracts:
  • Profile Info: Username, display name, bio
  • Metrics: Followers, following, tweet count
  • Location: User’s listed location
  • Verification: Verified badge status (blue checkmark)
  • Recent Tweets: Latest tweet content
  • Interests: Inferred topics of interest
  • Account Age: Account creation date (twscrape only)
  • Profile URL: Direct link to Twitter/X profile

Usage Example

from agents.twitter_agent import TwitterAgent
from agents.models import ResearchRequest, AgentStatus
from config import Settings

settings = Settings()
agent = TwitterAgent(settings)

request = ResearchRequest(
    person_name="Elon Musk",
    timeout_seconds=30.0,
)

result = await agent.run(request)

if result.status == AgentStatus.SUCCESS:
    for profile in result.profiles:
        print(f"Found: @{profile.username} ({profile.display_name})")
        print(f"  URL: {profile.url}")
        print(f"  Bio: {profile.bio}")
        print(f"  Followers: {profile.followers:,}")
        print(f"  Following: {profile.following:,}")
        print(f"  Verified: {profile.verified}")
        print(f"  Location: {profile.location}")
    
    print("\nSnippets:")
    for snippet in result.snippets:
        print(f"  - {snippet}")

Performance

twscrape Path

  • Duration: 2-5s typical
  • Cost: Free (no API key)
  • Success Rate: ~90%
  • Data Quality: High (direct Twitter GraphQL)

Browser-Use Fallback

  • Duration: 5-10s (Google snippets only)
  • Cost: Browser Use API usage
  • Success Rate: ~55%
  • Data Quality: Medium (summary from Google knowledge panel)

Installing twscrape

pip install twscrape

Troubleshooting

twscrape Not Installed

# Agent automatically falls back to browser-use
# To check if twscrape is available:
try:
    import twscrape
    print("twscrape available")
except ImportError:
    print("twscrape not installed, will use browser-use fallback")

Empty Results

# Check if any profiles were found
if result.status == AgentStatus.SUCCESS and not result.profiles:
    print("No Twitter profiles found for this person")
    # This might mean:
    # 1. Person doesn't have a Twitter account
    # 2. Account is suspended/deleted
    # 3. Search query didn't match any profiles

Rate Limiting (twscrape)

# twscrape uses internal rate limiting, but if you hit limits:
# 1. Wait a few minutes before retrying
# 2. The agent will automatically fall back to browser-use
# 3. No action needed - fallback is automatic

Username vs Display Name

# twscrape returns both username and display name
for profile in result.profiles:
    print(f"Username: @{profile.username}")  # @elonmusk
    print(f"Display Name: {profile.display_name}")  # Elon Musk

Follower Count Parsing

# Handles human-readable formats like "5.2M" or "1.5K"
from agents.models import parse_human_number

followers = parse_human_number("5.2M")  # Returns 5200000
followers = parse_human_number("1.5K+")  # Returns 1500
followers = parse_human_number("1,234")  # Returns 1234

Advanced Usage

Getting Recent Tweets (twscrape)

# twscrape can fetch tweets, but it's not in the default agent
# To extend the agent:
from twscrape import API

api = API()
user = await api.user_by_login("elonmusk")
tweets = []
async for tweet in api.user_tweets(user.id, limit=10):
    tweets.append({
        "text": tweet.rawContent,
        "likes": tweet.likeCount,
        "retweets": tweet.retweetCount,
        "date": str(tweet.date),
    })
# The agent returns up to 3 profiles from twscrape search
result = await agent.run(request)
print(f"Found {len(result.profiles)} Twitter profiles")

for i, profile in enumerate(result.profiles, 1):
    print(f"{i}. @{profile.username} - {profile.display_name}")
    print(f"   Followers: {profile.followers:,}")

Next Steps

Instagram Agent

Instagram profile extraction with Cloud SDK

Google Agent

Google search-based intelligence gathering

LinkedIn Agent

Professional profile scraping

Agent Overview

Full agent system architecture

Build docs developers (and LLMs) love