Skip to main content

Overview

VIGIA integrates with VigiAccess, the WHO’s global database of reported adverse drug reactions, to retrieve safety data and distribution statistics. The system supports both the unofficial protocol-based scraper and the official WHO-UMC API.

Key Capabilities

Drug Search

Search for medicines by trade name or active ingredient

SOC Distribution

System Organ Class distribution with percentages and counts

Demographics

Geographic, age, sex, and temporal distributions

Signal Detection

Identify disproportionate adverse event patterns

Architecture

VIGIA supports two integration methods:
Unofficial scraper that mimics browser behavior to access VigiAccess data.Pros:
  • No API key required
  • Real-time data access
  • Mirrors official web interface
Cons:
  • Subject to website changes
  • Rate limited by session cookies

Data Structure

SOC Distribution

System Organ Class data with counts and percentages:
@dataclass
class SocRow:
    soc: str                    # System Organ Class name
    percent: Optional[float]    # Percentage of total reports
    adr_count: Optional[int]    # Number of ADR reports
Example Response:
{
  "drug": "Atorvastatin",
  "token": "cid-v1:xyz123",
  "total_reports": 45230,
  "soc_rows": [
    {
      "soc": "Musculoskeletal and connective tissue disorders",
      "percent": 28.5,
      "adr_count": 12890
    },
    {
      "soc": "Nervous system disorders",
      "percent": 18.2,
      "adr_count": 8232
    },
    {
      "soc": "Gastrointestinal disorders",
      "percent": 15.7,
      "adr_count": 7101
    }
  ],
  "source": "VigiAccess (scraper no-oficial)"
}

Implementation

Protocol-Based Scraper

class VigiAccessScraper:
    def __init__(self, base_url: str | None = None, timeout: float = 15.0, max_retries: int = 2):
        """
        Scraper that mimics browser behavior:
        1) GET "/" → cookies (ASLBSA, ASLBSACORS)
        2) POST /protocol/IProtocol/search (JSON → msgpack)
        3) POST /protocol/IProtocol/distribution (JSON → msgpack)
        """
        default_base = "https://www.vigiaccess.org"
        self.base_url = (base_url or default_base).rstrip("/")
        self.timeout = timeout
        self.max_retries = max_retries
        
        self.client = httpx.Client(
            base_url=self.base_url,
            timeout=self.timeout,
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/142.0.0.0",
                "Accept": "*/*",
                "Accept-Encoding": "gzip, deflate, br, zstd",
                "Accept-Language": "es-ES,es;q=0.9",
            },
        )
    
    def scrape_drug(self, term: str) -> Dict[str, Any]:
        """
        Main entry point for drug scraping.
        Returns: {
            "drug": label,
            "token": encrypted_token,
            "total_reports": int,
            "soc_rows": [{soc, percent, adr_count}, ...],
            "source": "VigiAccess (scraper no-oficial)"
        }
        """
        term = term.strip()
        if not term:
            raise VigiAccessError("Debe indicar el nombre del fármaco")
        
        # 1) Search for drug candidates
        candidates = self._search_drug(term)
        chosen = self._pick_candidate(term, candidates)
        logger.info("[VigiAccess] candidato elegido: label=%s token=%s",
                    chosen["label"], chosen["token"])
        
        # 2) Fetch distribution data
        raw_dist = self._fetch_distribution_raw(chosen["token"])
        
        # 3) Parse to SOC rows
        soc_rows = self._parse_distribution_to_soc_rows(raw_dist)
        
        return {
            "drug": chosen["label"],
            "token": chosen["token"],
            "total_reports": sum(r.adr_count or 0 for r in soc_rows),
            "soc_rows": [
                {"soc": r.soc, "percent": r.percent, "adr_count": r.adr_count}
                for r in soc_rows
            ],
            "source": "VigiAccess (scraper no-oficial)",
        }

WHO-UMC Official API

API_BASE = "https://api.who-umc.org/vigibase/icsrstatistics"
API_KEY = os.getenv("VIGIACCESS_API_KEY", "")

async def get_dashboard_tables_for_tradename(tradename: str) -> Dict[str, Any]:
    """
    Fetch distribution tables from WHO-UMC API.
    
    Returns:
        {
            "query": tradename,
            "geo": [{label, count, percent}, ...],
            "sex": [{label, count, percent}, ...],
            "age": [{label, count, percent}, ...],
            "year": [{label, count, percent}, ...],
            "errors": {key: error_msg, ...}
        }
    """
    t = (tradename or "").strip()
    if not t:
        raise WhoUmcVigiAccessError("tradename vacío")
    
    mapping = {
        "geo": "dimensions/continent",
        "sex": "dimensions/sex",
        "age": "dimensions/agegroup",
        "year": "dimensions/year",
    }
    
    out = {"query": t, "geo": [], "sex": [], "age": [], "year": []}
    errors: Dict[str, str] = {}
    
    for key, path in mapping.items():
        try:
            js = await _get_json(path, params={"tradename": t})
            out[key] = _normalize_rows(js)
        except Exception as e:
            errors[key] = str(e)
    
    if errors and all(len(out[k]) == 0 for k in ("geo", "sex", "age", "year")):
        raise WhoUmcVigiAccessError(f"No pude obtener tablas. Detalle: {errors}")
    
    out["errors"] = errors
    return out

async def _get_json(path: str, params: Optional[Dict[str, Any]] = None) -> Any:
    url = f"{API_BASE}/{path.lstrip('/')}"
    async with httpx.AsyncClient(timeout=30.0) as client:
        r = await client.get(url, headers=_headers(), params=params or {})
        if r.status_code >= 400:
            raise WhoUmcVigiAccessError(
                f"WHO-UMC API {r.status_code} url={url} params={params} body={r.text[:200]}"
            )
        return r.json()

def _headers() -> Dict[str, str]:
    if not API_KEY:
        raise WhoUmcVigiAccessError("Falta VIGIACCESS_API_KEY en .env")
    return {
        "Accept": "application/json",
        "X-API-Key": API_KEY,
        "Ocp-Apim-Subscription-Key": API_KEY,
    }

Data Cleaning

VigiAccess returns strings with invisible Unicode characters:
@staticmethod
def _clean_soc_name(name: str) -> str:
    """
    Remove invisible characters (zero-width, etc.) from strings.
    Removes category "Cf" (format) characters like ZERO WIDTH SPACE.
    """
    import re
    return re.sub(r"[\u200b-\u200f\u202a-\u202e\u2060-\u206f\ufeff]", "", name).strip()

Configuration

Environment Variables

# Protocol scraper base URL (optional)
VIGIACCESS_BASE_URL=https://www.vigiaccess.org

# WHO-UMC Official API (optional, for official API method)
VIGIACCESS_API_BASE_URL=https://api.who-umc.org/vigibase/icsrstatistics
VIGIACCESS_API_KEY=your_api_key_here
VIGIACCESS_API_TIMEOUT=30

Scraper Configuration

class VigiAccessScraper:
    def __init__(
        self,
        base_url: str | None = None,
        timeout: float = 15.0,     # Request timeout
        max_retries: int = 2,      # Retry failed requests
    ):
        ...

API Endpoints

Scrape Drug (Protocol Method)

GET /api/v1/vigiaccess/scrape?q={drug_name}
Response:
{
  "drug": "Atorvastatin",
  "token": "cid-v1:xyz123",
  "total_reports": 45230,
  "soc_rows": [
    {
      "soc": "Musculoskeletal and connective tissue disorders",
      "percent": 28.5,
      "adr_count": 12890
    },
    {
      "soc": "Nervous system disorders",
      "percent": 18.2,
      "adr_count": 8232
    }
  ],
  "source": "VigiAccess (scraper no-oficial)"
}

WHO-UMC API (Official Method)

GET /api/v1/vigiaccess/official?tradename={name}
Response:
{
  "query": "atorvastatin",
  "geo": [
    {"label": "Europe", "count": 15234, "percent": 33.7},
    {"label": "Americas", "count": 12456, "percent": 27.5}
  ],
  "sex": [
    {"label": "Female", "count": 24567, "percent": 54.3},
    {"label": "Male", "count": 20663, "percent": 45.7}
  ],
  "age": [
    {"label": "65-74 years", "count": 14523, "percent": 32.1},
    {"label": "55-64 years", "count": 11234, "percent": 24.8}
  ],
  "year": [
    {"label": "2023", "count": 5234, "percent": 11.6},
    {"label": "2022", "count": 4987, "percent": 11.0}
  ],
  "errors": {}
}

Error Handling

Common issues:
  • No candidates found: Drug name misspelled or not in database
  • Msgpack decode errors: VigiAccess changed response format
  • Cookie expiration: Session cookies expired, retry with new session
  • API key invalid: Check WHO-UMC subscription status
class VigiAccessError(Exception):
    """Errores de scraping a VigiAccess."""
    pass

class WhoUmcVigiAccessError(Exception):
    """Errores de la API oficial WHO-UMC."""
    pass

Usage Examples

Protocol Scraper

from app.services.vigiaccess import VigiAccessScraper

scraper = VigiAccessScraper()
result = scraper.scrape_drug("atorvastatin")

print(f"Drug: {result['drug']}")
print(f"Total reports: {result['total_reports']}")
print("\nTop 5 SOCs:")
for row in result['soc_rows'][:5]:
    print(f"  {row['soc']}: {row['percent']:.1f}% ({row['adr_count']} reports)")

scraper.close()

WHO-UMC API

from app.services.who_umc_vigiaccess_api import get_dashboard_tables_for_tradename
import asyncio

async def main():
    result = await get_dashboard_tables_for_tradename("atorvastatin")
    
    print(f"Query: {result['query']}")
    print(f"\nGeographic distribution:")
    for row in result['geo'][:5]:
        print(f"  {row['label']}: {row['percent']:.1f}% ({row['count']} reports)")
    
    if result['errors']:
        print(f"\nWarnings: {result['errors']}")

asyncio.run(main())

DIGEMID

Peruvian regulatory authority

FDA

US FDA device recalls

EMA

European Medicines Agency

Build docs developers (and LLMs) love