Instagram Agent

Overview

The Instagram agent extracts profile information and posts from Instagram using Browser Use Cloud SDK skills (primary) with browser-use Agent fallback for Google-first scraping.

Implementation

backend/agents/instagram_agent.py

class InstagramAgent(BaseBrowserAgent):
    """Scrapes Instagram profiles via Cloud SDK skill, falls back to browser-use.
    
    Primary: Cloud SDK Instagram Profile Posts skill (deterministic, $0.01/run)
    Fallback: browser-use Agent + Google snippet extraction
    """
    
    agent_name = "instagram"
    
    def __init__(self, settings: Settings, *, inbox_pool=None):
        super().__init__(settings, inbox_pool=inbox_pool)
        self._cloud = CloudSkillRunner(settings)

Architecture Decision

backend/agents/instagram_agent.py

# RESEARCH: Checked instaloader (8k stars), instagram-private-api (archived), instagrapi (5k stars)
# DECISION: Browser Use Cloud SDK skill (Instagram Profile Posts, $0.01/run, ~23s, deterministic)
# FALLBACK: browser-use Agent with Google-first scraping
# ALT: instagrapi for heavier scraping needs (risk of account bans)

Why Cloud SDK?

Marketplace skill handles Instagram’s anti-bot measures
Deterministic results (~$0.01/run, 20-30s)
No account bans or IP blocks
Structured output parsing

Why not instagrapi/instaloader?

Risk of account bans
Requires managing Instagram sessions
More complex setup and maintenance

Two-Tier Approach

Try Cloud SDK Skill First

Uses the instagram_posts marketplace skill for reliable extraction:

async def _run_task(self, request: ResearchRequest) -> AgentResult:
    # Try Cloud SDK skill first (faster, more reliable)
    if self._cloud.configured:
        cloud_result = await self._try_cloud_skill(request)
        if (
            cloud_result
            and cloud_result.status == AgentStatus.SUCCESS
            and cloud_result.profiles
        ):
            return cloud_result
    
    # Fallback to Google-scraping via browser-use Agent
    return await self._try_browser_use(request)

Cloud SDK Implementation

async def _try_cloud_skill(self, request: ResearchRequest) -> AgentResult | None:
    """Try the Instagram Profile Posts marketplace skill."""
    query = self._build_search_query(request)
    task = (
        f"Search for Instagram profile of {query} and extract profile info "
        f"including username, bio, followers, following, and post count."
    )
    
    try:
        result = await self._cloud.run_skill(
            "instagram_posts",
            task,
            timeout=60.0,
        )
        
        if not result or not result.get("success"):
            logger.info("instagram cloud skill returned no result, falling back")
            return None
        
        output = result.get("output", "")
        parsed = _parse_instagram_output(output, request.person_name)
        
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.SUCCESS,
            profiles=[parsed["profile"]],
            snippets=parsed["snippets"],
            urls_found=[parsed["profile"].url] if parsed["profile"].url else [],
        )
    
    except Exception as exc:
        logger.warning("instagram cloud skill error: {}", str(exc))
        return None

Browser-Use Fallback

If Cloud SDK fails, falls back to Google-first scraping:

async def _try_browser_use(self, request: ResearchRequest) -> AgentResult:
    """Fallback: Google-first scraping via browser-use Agent."""
    if not self.configured:
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.FAILED,
            error="Browser Use not configured (BROWSER_USE_API_KEY or OPENAI_API_KEY missing)",
        )
    
    query = self._build_search_query(request)
    logger.info("instagram agent (fallback) searching: {}", query)
    
    try:
        task = (
            "Go to https://www.google.com/search"
            f"?q={query.replace(' ', '+')}+site:instagram.com "
            f"and use the extract tool to get this JSON from the Google results:\n"
            f'{{"username": "", "display_name": "", "bio": "", '
            f'"followers": 0, "following": 0, "post_count": 0, '
            f'"profile_url": ""}}\n'
            f"Extract from Google snippets. Do NOT click into Instagram. Do NOT scroll. "
            f"After extracting, immediately call done with the JSON result."
        )
        
        agent = self._create_browser_agent(task, max_steps=3)
        result = await agent.run()
        final_result = result.final_result() if result else None
        
        if final_result:
            parsed = _parse_instagram_output(str(final_result), request.person_name)
            return AgentResult(
                agent_name=self.agent_name,
                status=AgentStatus.SUCCESS,
                profiles=[parsed["profile"]],
                snippets=parsed["snippets"],
                urls_found=[parsed["profile"].url] if parsed["profile"].url else [],
            )
        
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.SUCCESS,
            snippets=["No Instagram profile found"],
        )
    
    except Exception as exc:
        logger.error("instagram agent error: {}", str(exc))
        return AgentResult(
            agent_name=self.agent_name,
            status=AgentStatus.FAILED,
            error=f"Instagram agent error: {exc}",
        )

Output Parsing

JSON Extraction

backend/agents/instagram_agent.py

def _extract_json(raw: str) -> dict:
    """Robustly extract JSON from browser-use output."""
    cleaned = raw.strip()
    if "```json" in cleaned:
        cleaned = cleaned.split("```json", 1)[1].split("```", 1)[0]
    elif "```" in cleaned:
        cleaned = cleaned.split("```", 1)[1].split("```", 1)[0]
    cleaned = cleaned.strip()
    
    for text in [cleaned, raw]:
        try:
            return json.loads(text)
        except (json.JSONDecodeError, ValueError):
            pass
        start = text.find("{")
        end = text.rfind("}") + 1
        if start >= 0 and end > start:
            try:
                return json.loads(text[start:end])
            except (json.JSONDecodeError, ValueError):
                pass
    return {}

Profile Parsing

backend/agents/instagram_agent.py

def _parse_instagram_output(raw_output: str, person_name: str) -> dict:
    """Parse browser-use or Cloud SDK output into structured Instagram profile data."""
    data = _extract_json(raw_output)
    
    username = data.get("username", "")
    display_name = data.get("display_name", person_name)
    bio = data.get("bio", "")
    followers = parse_human_number(data.get("followers"))
    following = parse_human_number(data.get("following"))
    post_count = parse_human_number(data.get("post_count"))
    is_verified = data.get("is_verified", False)
    is_private = data.get("is_private", False)
    recent_posts = data.get("recent_posts", [])
    profile_url = data.get("profile_url", "")
    
    raw_data = {
        "post_count": post_count,
        "is_private": is_private,
        "recent_posts": recent_posts,
        "browser_use_output": raw_output,
    }
    
    profile = SocialProfile(
        platform="instagram",
        url=profile_url if profile_url else f"https://instagram.com/{username}" if username else "",
        username=username or None,
        display_name=display_name,
        bio=bio or None,
        followers=followers,
        following=following,
        verified=bool(is_verified),
        raw_data=raw_data,
    )
    
    snippets: list[str] = []
    if bio:
        snippets.append(
            f"@{username}: {bio}" if username else f"Instagram: {bio}"
        )
    if followers is not None:
        snippets.append(f"Followers: {followers:,}" if isinstance(followers, int) else "")
    if post_count is not None:
        snippets.append(f"Posts: {post_count}")
    if is_private:
        snippets.append("Account is private")
    if recent_posts:
        for post in recent_posts[:3]:
            caption = post.get("caption", "")
            if caption:
                snippets.append(f"Post: {caption[:150]}")
    if not snippets:
        snippets.append(raw_output[:500])
    
    snippets = [s for s in snippets if s]
    
    return {"profile": profile, "snippets": snippets}

Extracted Data

The Instagram agent extracts:

Profile Info: Username, display name, bio
Metrics: Followers, following, post count
Verification: Blue checkmark status
Privacy: Whether account is private
Recent Posts: Latest post captions and media
Profile URL: Direct link to Instagram profile

Usage Example

from agents.instagram_agent import InstagramAgent
from agents.models import ResearchRequest, AgentStatus
from config import Settings

settings = Settings()
agent = InstagramAgent(settings)

request = ResearchRequest(
    person_name="Cristiano Ronaldo",
    timeout_seconds=60.0,
)

result = await agent.run(request)

if result.status == AgentStatus.SUCCESS:
    for profile in result.profiles:
        print(f"Found: @{profile.username} ({profile.display_name})")
        print(f"  URL: {profile.url}")
        print(f"  Bio: {profile.bio}")
        print(f"  Followers: {profile.followers:,}")
        print(f"  Following: {profile.following:,}")
        print(f"  Posts: {profile.raw_data.get('post_count')}")
        print(f"  Verified: {profile.verified}")
        print(f"  Private: {profile.raw_data.get('is_private')}")
    
    print("\nRecent Posts:")
    for snippet in result.snippets:
        if snippet.startswith("Post:"):
            print(f"  - {snippet}")

Performance

Cloud SDK Path

Duration: 20-30s typical
Cost: $0.01 per run
Success Rate: ~80%
Data Quality: High (direct Instagram extraction)

Browser-Use Fallback

Duration: 5-10s (Google snippets only)
Cost: Browser Use API usage
Success Rate: ~50%
Data Quality: Medium (summary from Google knowledge panel)

Handling Private Accounts

# Private accounts return limited data
if result.status == AgentStatus.SUCCESS:
    for profile in result.profiles:
        if profile.raw_data.get("is_private"):
            print(f"Account @{profile.username} is private")
            print("Available data: username, bio, follower count only")
            print("Recent posts: Not accessible")

Troubleshooting

Cloud Skill Failing

# Check if Cloud SDK is configured
from agents.cloud_skills import CloudSkillRunner
from config import Settings

settings = Settings()
runner = CloudSkillRunner(settings)
print(f"Cloud configured: {runner.configured}")

Empty Results

# The agent returns SUCCESS with empty snippets if no profile found
if result.status == AgentStatus.SUCCESS and not result.profiles:
    print("No Instagram profile found for this person")
    # Possible reasons:
    # 1. Person doesn't have Instagram
    # 2. Account was deleted/banned
    # 3. Username doesn't match search query

Timeout Issues

# Instagram skill can take 30-60s, increase timeout if needed
request = ResearchRequest(
    person_name="John Doe",
    timeout_seconds=90.0,  # Default is 60s
)

Post Count Parsing

# Handles human-readable formats
from agents.models import parse_human_number

post_count = parse_human_number("1.2K")  # Returns 1200
post_count = parse_human_number("500")   # Returns 500
post_count = parse_human_number("5M+")   # Returns 5000000

Private vs Public Profiles

# Check if profile is private
for profile in result.profiles:
    is_private = profile.raw_data.get("is_private", False)
    if is_private:
        print("Limited data available (account is private)")
    else:
        print("Full profile data available")
        recent_posts = profile.raw_data.get("recent_posts", [])
        print(f"Found {len(recent_posts)} recent posts")

Advanced Usage

Extracting Recent Posts

# Access recent posts from raw_data
for profile in result.profiles:
    recent_posts = profile.raw_data.get("recent_posts", [])
    for post in recent_posts:
        print(f"Caption: {post.get('caption', 'No caption')}")
        print(f"Likes: {post.get('likes', 0)}")
        print(f"Comments: {post.get('comments', 0)}")
        print(f"Posted: {post.get('timestamp', 'Unknown')}")
        print()

Checking Verification Status

# Verified accounts have the blue checkmark
for profile in result.profiles:
    if profile.verified:
        print(f"@{profile.username} is verified")
    else:
        print(f"@{profile.username} is not verified")

Follower Growth Analysis

# Store follower counts over time for growth tracking
import json
from datetime import datetime

result = await agent.run(request)
for profile in result.profiles:
    data_point = {
        "date": datetime.now().isoformat(),
        "username": profile.username,
        "followers": profile.followers,
        "following": profile.following,
        "posts": profile.raw_data.get("post_count"),
    }
    # Save to database or file for time-series analysis
    print(json.dumps(data_point, indent=2))

Next Steps

Twitter Agent

Twitter/X profile scraping with twscrape

Google Agent

Google search-based intelligence gathering

Browser Use Integration

Deep dive into Cloud SDK skills

Agent Overview

Full agent system architecture

Get Started

Core Concepts

Hardware Integration

Backend Services

Agent System

Frontend

Data & Storage

Deployment

Instagram Agent

Overview

Implementation

Architecture Decision

Two-Tier Approach

Output Parsing

JSON Extraction

Profile Parsing

Extracted Data

Usage Example

Performance

Cloud SDK Path

Browser-Use Fallback

Handling Private Accounts

Troubleshooting

Cloud Skill Failing

Empty Results

Timeout Issues

Post Count Parsing

Private vs Public Profiles

Advanced Usage

Extracting Recent Posts

Checking Verification Status

Follower Growth Analysis

Next Steps

Twitter Agent

Google Agent

Browser Use Integration

Agent Overview

Build docs developers (and LLMs) love

Get Started

Core Concepts

Hardware Integration

Backend Services

Agent System

Frontend

Data & Storage

Deployment

​Overview

​Implementation

​Architecture Decision

​Two-Tier Approach

​Output Parsing

​JSON Extraction

​Profile Parsing

​Extracted Data

​Usage Example

​Performance

​Cloud SDK Path

​Browser-Use Fallback

​Handling Private Accounts

​Troubleshooting

​Cloud Skill Failing

​Empty Results

​Timeout Issues

​Post Count Parsing

​Private vs Public Profiles

​Advanced Usage

​Extracting Recent Posts

​Checking Verification Status

​Follower Growth Analysis

​Next Steps

Twitter Agent

Google Agent

Browser Use Integration

Agent Overview

Build docs developers (and LLMs) love

Overview

Implementation

Architecture Decision

Two-Tier Approach

Output Parsing

JSON Extraction

Profile Parsing

Extracted Data

Usage Example

Performance

Cloud SDK Path

Browser-Use Fallback

Handling Private Accounts

Troubleshooting

Cloud Skill Failing

Empty Results

Timeout Issues

Post Count Parsing

Private vs Public Profiles

Advanced Usage

Extracting Recent Posts

Checking Verification Status

Follower Growth Analysis

Next Steps