The identification system takes a face image and determines who the person is by searching across multiple face recognition and reverse image search engines. The system implements a waterfall strategy with automatic fallback.Location:backend/identification/
class FaceSearchManager: """Orchestrates face search across PimEyes and reverse image engines.""" def __init__(self, settings: Settings): self._pimeyes = PimEyesSearcher(settings) self._reverse = ReverseImageSearcher() async def search_face(self, request: FaceSearchRequest) -> FaceSearchResult: # Tier 1: PimEyes (purpose-built face search) logger.info("Face search: trying PimEyes first") pimeyes_result = await self._pimeyes.search_face(request) if pimeyes_result.success and pimeyes_result.matches: logger.info("PimEyes found {} matches, skipping reverse search", len(pimeyes_result.matches)) return pimeyes_result # Tier 2: Reverse image search (Google, Yandex, Bing) logger.info("PimEyes returned no matches, falling back to reverse search") reverse_result = await self._reverse.search_face(request) if reverse_result.success and reverse_result.matches: logger.info("Reverse search found {} matches", len(reverse_result.matches)) return reverse_result # Both failed — merge errors return FaceSearchResult( matches=[], success=False, error=f"PimEyes: {pimeyes_result.error} | ReverseSearch: {reverse_result.error}", ) def best_name_from_results(self, result: FaceSearchResult) -> str | None: """Extract most likely person name using frequency analysis.""" if not result.matches: return None name_counts = {} for match in result.matches: if match.person_name: name = match.person_name.strip() name_counts[name] = name_counts.get(name, 0) + 1 if not name_counts: return None # Return the most frequent name return max(name_counts, key=name_counts.get)
Location:backend/identification/pimeyes.pyPimEyes is a specialized facial recognition search engine. It’s the most accurate for face-based searches but requires careful anti-bot measures.
Anti-Bot StrategyPimEyes actively blocks automated searches. The system uses account pooling and rate limiting to avoid detection:
Pool of 3-5 PimEyes accounts
Round-robin selection per search
Minimum 30 seconds between uses per account
Exponential backoff on detected blocks
class AccountPool: """Manages PimEyes account rotation with rate limiting.""" MIN_INTERVAL = 30 # seconds between uses per account async def get_account(self) -> dict: async with self._lock: now = time.time() for acc in self._accounts: if not acc["blocked"] and (now - acc["last_used"]) > self.MIN_INTERVAL: acc["last_used"] = now return acc # All accounts busy — wait for shortest cooldown wait_times = [ self.MIN_INTERVAL - (now - acc["last_used"]) for acc in self._accounts if not acc["blocked"] ] if wait_times: await asyncio.sleep(min(wait_times)) return await self.get_account() raise RuntimeError("All PimEyes accounts blocked") def mark_blocked(self, email: str): for acc in self._accounts: if acc["email"] == email: acc["blocked"] = True
class ReverseImageSearcher: """Searches face across Google, Yandex, and Bing reverse image engines.""" async def search_face(self, request: FaceSearchRequest) -> FaceSearchResult: # Launch all engines in parallel tasks = [ self._search_google(request.image_data), self._search_yandex(request.image_data), self._search_bing(request.image_data), ] results = await asyncio.gather(*tasks, return_exceptions=True) # Merge results from all engines all_matches = [] for result in results: if isinstance(result, Exception): logger.warning("Reverse search engine failed: {}", result) continue if result: all_matches.extend(result) return FaceSearchResult( matches=self._deduplicate(all_matches), success=len(all_matches) > 0, error=None if all_matches else "No matches across any engine", )
async def _search_google(self, image_data: bytes) -> list[FaceMatch]: """Search via Google Lens using Browser Use.""" browser = Browser(config=BrowserConfig(headless=True)) agent = Agent( task=""" 1. Go to https://lens.google.com 2. Upload the provided face image 3. Look for matches with names or social profiles 4. Extract: URL, person name (if visible), thumbnail 5. Return as JSON array """, llm="gemini-2.0-flash", browser=browser, ) result = await agent.run() await browser.close() parsed = json.loads(result.final_result()) return [ FaceMatch( url=m["url"], person_name=m.get("name"), thumbnail_url=m.get("thumbnail"), source="google", confidence=0.7, # Google doesn't provide scores ) for m in parsed ]
Why not use all engines for every search?PimEyes is optimized specifically for faces and typically returns better results than generic reverse image search. Running all engines sequentially would add 15-30 seconds per search with diminishing returns. The waterfall strategy provides the best balance of speed and accuracy.
When multiple search results contain different names, the system uses frequency analysis:
def best_name_from_results(self, result: FaceSearchResult) -> str | None: """Extract most likely person name using frequency analysis.""" name_counts = {} for match in result.matches: if match.person_name: name = match.person_name.strip() name_counts[name] = name_counts.get(name, 0) + 1 if not name_counts: return None # Return the most frequent name return max(name_counts, key=name_counts.get)
def profile_urls_from_results(self, result: FaceSearchResult) -> list[str]: """Extract social profile URLs from search results.""" social_domains = { "linkedin.com", "twitter.com", "x.com", "instagram.com", "facebook.com", "github.com", "tiktok.com", } urls = [] seen = set() for match in result.matches: if match.url and match.url not in seen: if any(domain in match.url for domain in social_domains): urls.append(match.url) seen.add(match.url) return urls
Purpose: Feed discovered social profiles directly to the research orchestrator, skipping the agent’s own discovery phase.