While Convex handles real-time updates for the live demo, JARVIS uses MongoDB Atlas for persistent storage of:
Raw capture images and embeddings
Archived dossiers for cross-session memory
Face embedding vectors for similarity search
Historical intelligence data
MongoDB is optional in the current implementation. The system works with Convex alone for real-time demo purposes. MongoDB enables persistent memory and vector search for production deployments.
Architecture
JARVIS uses a DatabaseGateway protocol to abstract storage operations:
from typing import Protocol, Any
class DatabaseGateway ( Protocol ):
@ property
def configured ( self ) -> bool : ...
async def store_person ( self , person_id : str , data : dict[ str , Any]) -> str :
"""Persist a person record."""
...
async def get_person ( self , person_id : str ) -> dict[ str , Any] | None :
"""Retrieve a person record by ID."""
...
async def update_person ( self , person_id : str , data : dict[ str , Any]) -> None :
"""Merge new data into existing person record."""
...
async def store_capture ( self , capture_id : str , metadata : dict[ str , Any]) -> str :
"""Persist capture metadata."""
...
Three implementations exist:
ConvexGateway — Real-time updates via Convex HTTP API
InMemoryDatabaseGateway — Local testing without external dependencies
MongoDBGateway — (Planned) Persistent storage with vector search
Configuration
Set the MongoDB connection string in your environment:
MONGODB_URI = mongodb+srv://username:[email protected] /jarvis? retryWrites = true & w = majority
The backend automatically detects MongoDB availability:
from pydantic import Field
from pydantic_settings import BaseSettings
class Settings ( BaseSettings ):
mongodb_uri: str | None = Field( default = None , alias = "MONGODB_URI" )
def service_flags ( self ) -> dict[ str , bool ]:
return {
"mongodb" : bool ( self .mongodb_uri),
# ... other services
}
Check MongoDB status:
curl http://localhost:8000/api/services
Response:
[
{
"name" : "mongodb" ,
"configured" : true ,
"notes" : "Persistent raw captures and dossiers"
}
]
Data Models
Persons Collection
Stores enriched person records with embeddings:
{
"_id" : ObjectId( "..." ),
"person_id" : "person_abc123" ,
"name" : "Alice Smith" ,
"photo_url" : "https://storage.example.com/faces/abc123.jpg" ,
"confidence" : 0.95 ,
"embedding" : [ 0.123 , - 0.456 , ... ], # 512-dim ArcFace vector
"status" : "enriched" ,
"dossier" : {
"summary" : "AI researcher at OpenAI. Stanford PhD." ,
"title" : "Research Scientist" ,
"company" : "OpenAI" ,
"work_history" : [
{
"role" : "Research Scientist" ,
"company" : "OpenAI" ,
"period" : "2022-present"
}
],
"education" : [
{
"school" : "Stanford University" ,
"degree" : "PhD Computer Science"
}
],
"social_profiles" : {
"linkedin" : "https://linkedin.com/in/alicesmith" ,
"twitter" : "https://twitter.com/alicesmith" ,
"github" : "https://github.com/alicesmith"
},
"notable_activity" : [
"Published 15 papers on transformer architectures" ,
"Co-authored GPT-4 technical report"
],
"conversation_hooks" : [
"Ask about recent work on multimodal models" ,
"Discuss Stanford AI Lab research"
],
"risk_flags" : []
},
"created_at" : ISODate( "2024-01-15T10:30:00Z" ),
"updated_at" : ISODate( "2024-01-15T10:45:00Z" )
}
Captures Collection
Stores raw capture metadata and image references:
{
"_id" : ObjectId( "..." ),
"capture_id" : "cap_xyz789" ,
"image_url" : "s3://jarvis-captures/2024/01/15/xyz789.jpg" ,
"source" : "glasses" ,
"timestamp" : ISODate( "2024-01-15T10:30:00Z" ),
"status" : "identified" ,
"person_id" : "person_abc123" ,
"detection_metadata" : {
"face_count" : 1 ,
"bbox" : [ 120 , 80 , 300 , 260 ],
"landmarks" : { ... }
}
}
Vector Search
MongoDB Atlas supports vector search for finding similar faces using embeddings:
Create Vector Search Index
// In MongoDB Atlas UI or via API
{
"name" : "face_embedding_index" ,
"type" : "vectorSearch" ,
"definition" : {
"fields" : [
{
"type" : "vector" ,
"path" : "embedding" ,
"numDimensions" : 512 ,
"similarity" : "cosine"
}
]
}
}
Similarity Search Example
from motor.motor_asyncio import AsyncIOMotorClient
class MongoDBGateway :
def __init__ ( self , uri : str ):
self .client = AsyncIOMotorClient(uri)
self .db = self .client.jarvis
self .persons = self .db.persons
async def find_similar_faces (
self ,
embedding : list[ float ],
limit : int = 5 ,
min_score : float = 0.7
) -> list[ dict ]:
"""Find persons with similar face embeddings."""
pipeline = [
{
"$vectorSearch" : {
"index" : "face_embedding_index" ,
"path" : "embedding" ,
"queryVector" : embedding,
"numCandidates" : 100 ,
"limit" : limit
}
},
{
"$project" : {
"person_id" : 1 ,
"name" : 1 ,
"photo_url" : 1 ,
"confidence" : 1 ,
"score" : { "$meta" : "vectorSearchScore" }
}
},
{
"$match" : {
"score" : { "$gte" : min_score}
}
}
]
results = await self .persons.aggregate(pipeline).to_list( length = limit)
return results
Usage in Pipeline
backend/identification/pipeline.py
async def identify_person (
face_image : bytes ,
embedder : FaceEmbedder,
db : MongoDBGateway
) -> dict | None :
# Generate embedding from face
embedding = embedder.embed_from_bytes(face_image)
# Search for similar faces in MongoDB
matches = await db.find_similar_faces(
embedding = embedding,
limit = 3 ,
min_score = 0.85 # High confidence threshold
)
if matches:
best_match = matches[ 0 ]
logger.info(
f "Found match: { best_match[ 'name' ] } "
f "(score: { best_match[ 'score' ] :.2f} )"
)
return best_match
return None
Dual-Write Strategy
For production, JARVIS uses a dual-write pattern :
Write to Convex — Real-time UI updates
Write to MongoDB — Persistent storage and vector search
backend/db/dual_gateway.py
class DualGateway :
"""Writes to both Convex (real-time) and MongoDB (persistent)."""
def __init__ ( self , convex : ConvexGateway, mongo : MongoDBGateway):
self .convex = convex
self .mongo = mongo
async def store_person ( self , person_id : str , data : dict ) -> str :
# Write to both in parallel
convex_task = self .convex.store_person(person_id, data)
mongo_task = self .mongo.store_person(person_id, data)
results = await asyncio.gather(
convex_task,
mongo_task,
return_exceptions = True
)
# Log any MongoDB failures but don't block on them
if isinstance (results[ 1 ], Exception ):
logger.warning( f "MongoDB write failed: { results[ 1 ] } " )
return results[ 0 ] # Return Convex result
Data Lifecycle
Capture → Storage Flow
Archival Policy
# Archive old captures after 30 days
async def archive_old_captures ():
cutoff = datetime.now() - timedelta( days = 30 )
# Move from Convex to MongoDB cold storage
old_captures = await convex.query(
"captures:listAll" ,
{ "before" : cutoff.timestamp()}
)
for capture in old_captures:
await mongo.archive_capture(capture)
await convex.delete_capture(capture[ "_id" ])
Face embeddings are 512-dimensional float32 vectors (~2KB each). Store them as BSON binary for efficient vector search: import struct
def encode_embedding ( embedding : list[ float ]) -> bytes :
return struct.pack( f " { len (embedding) } f" , * embedding)
def decode_embedding ( data : bytes ) -> list[ float ]:
return list (struct.unpack( f " { len (data) // 4 } f" , data))
Create indexes for common queries: // Compound index for status queries
db . persons . createIndex ({ "status" : 1 , "updated_at" : - 1 })
// Text search on names
db . persons . createIndex ({ "name" : "text" })
// Capture source filtering
db . captures . createIndex ({ "source" : 1 , "timestamp" : - 1 })
Configure Motor for optimal async performance: client = AsyncIOMotorClient(
mongodb_uri,
maxPoolSize = 50 ,
minPoolSize = 10 ,
maxIdleTimeMS = 30000 ,
serverSelectionTimeoutMS = 5000
)
Migration Scripts
Export from Convex to MongoDB
scripts/export_to_mongo.py
import asyncio
from backend.db.convex_client import ConvexGateway
from backend.db.mongo_client import MongoDBGateway
from backend.config import get_settings
async def export_persons ():
settings = get_settings()
convex = ConvexGateway(settings)
mongo = MongoDBGateway(settings.mongodb_uri)
persons = await convex._query( "persons:listAll" , {})
for person in persons:
await mongo.store_person(person[ "_id" ], person)
print ( f "Exported { person[ 'name' ] } " )
if __name__ == "__main__" :
asyncio.run(export_persons())
Next: Supermemory Learn about cross-session agent memory using Supermemory