Overview
The Recall API allows you to retrieve relevant memories based on semantic search. It uses embeddings to find facts that match your query, enabling context-aware AI applications.
recall()
Retrieve relevant memories based on a query string.
Parameters
The search query to find relevant memories. The query is embedded and compared against stored memory embeddings using semantic similarity.Examples:
"What is the user's favorite color?"
"user preferences"
"previous issues reported by this customer"
Maximum number of facts to return. If not specified, uses config.recall_facts_limit (default: 5).Range: Typically 1-100
Returns
return
list[FactSearchResult | Mapping[str, object] | str]
List of relevant memory facts matching the query. The format depends on the storage backend:
- Local storage (PostgreSQL, etc.): Returns
FactSearchResult objects with fields like fact, score, metadata
- Cloud storage: Returns dictionaries or strings containing fact data
Results are sorted by relevance (highest similarity first).
Usage Examples
Basic Recall
from memori import Memori
mem = Memori()
mem.attribution(entity_id="user-123")
facts = mem.recall("What are my preferences?", limit=10)
for fact in facts:
print(fact)
Recall with OpenAI
import os
from openai import OpenAI
from memori import Memori
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
engine = create_engine(os.getenv("DATABASE_CONNECTION_STRING"))
Session = sessionmaker(bind=engine)
mem = Memori(conn=Session).llm.register(client)
mem.attribution(entity_id="user-123", process_id="my-app")
mem.config.storage.build()
# First conversation - create memories
response1 = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{
"role": "user",
"content": "My favorite color is blue and I live in Paris"
}]
)
# Manual recall to check what was stored
facts = mem.recall("user's favorite color", limit=5)
print(f"Found {len(facts)} relevant facts:")
for i, fact in enumerate(facts, 1):
print(f"{i}. {fact}")
# Later conversation - automatic recall happens via LLM integration
response2 = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{
"role": "user",
"content": "What's my favorite color?"
}]
)
print(response2.choices[0].message.content) # "Your favorite color is blue"
mem.augmentation.wait()
Recall with Custom Limit
mem = Memori()
mem.attribution(entity_id="customer-456")
# Get top 3 most relevant facts
top_facts = mem.recall("customer issues", limit=3)
# Get many facts for comprehensive context
all_facts = mem.recall("customer history", limit=50)
Cloud Storage Recall
import os
os.environ["MEMORI_API_KEY"] = "your-api-key"
mem = Memori() # Uses cloud storage
mem.attribution(entity_id="user-789", process_id="my-app")
facts = mem.recall("user preferences", limit=10)
for fact in facts:
print(fact)
Processing Recall Results
from memori import Memori
mem = Memori()
mem.attribution(entity_id="user-123")
facts = mem.recall("user preferences", limit=10)
if not facts:
print("No relevant memories found")
else:
print(f"Found {len(facts)} relevant memories:")
for i, fact in enumerate(facts, 1):
# Cloud storage returns dicts or strings
if isinstance(fact, dict):
print(f"{i}. {fact.get('fact', fact)}")
elif isinstance(fact, str):
print(f"{i}. {fact}")
else:
# Local storage returns FactSearchResult objects
print(f"{i}. {fact}")
Building Context for LLM
from openai import OpenAI
from memori import Memori
client = OpenAI()
mem = Memori()
mem.attribution(entity_id="user-123")
user_query = "What products have I shown interest in?"
# Retrieve relevant memories
facts = mem.recall(user_query, limit=5)
# Build context from facts
context = "\n".join([str(fact) for fact in facts])
# Use context in LLM prompt
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": f"Relevant context:\n{context}"
},
{
"role": "user",
"content": user_query
}
]
)
print(response.choices[0].message.content)
Customer Support Agent
from memori import Memori
from agno.agent import Agent
from agno.models.openai import OpenAIChat
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
engine = create_engine("sqlite:///support.db")
Session = sessionmaker(bind=engine)
model = OpenAIChat(id="gpt-4o-mini")
mem = Memori(conn=Session).llm.register(openai_chat=model)
mem.config.storage.build()
def handle_customer(customer_id: str, query: str):
mem.attribution(entity_id=customer_id, process_id="support")
# Recall customer history
facts = mem.recall(
f"customer issues and preferences: {query}",
limit=10
)
print(f"Recalled {len(facts)} relevant facts for customer {customer_id}")
# Agent uses recalled context automatically
agent = Agent(
model=model,
instructions=[
"You are a helpful customer support agent.",
"Use customer history to provide personalized support."
]
)
response = agent.run(query)
return response.content
# Usage
response = handle_customer("customer-123", "I need help with my order")
print(response)
Configuration
Recall Limits
mem = Memori()
# Set default fact limit
mem.config.recall_facts_limit = 10
# Set embeddings search limit (affects search performance)
mem.config.recall_embeddings_limit = 2000
# Set relevance threshold (0.0 to 1.0)
mem.config.recall_relevance_threshold = 0.1
facts = mem.recall("query") # Uses recall_facts_limit=10
Environment Variables
# Set embeddings limit via environment
export MEMORI_RECALL_EMBEDDINGS_LIMIT=5000
# Automatically picked up by Memori
mem = Memori()
print(mem.config.recall_embeddings_limit) # 5000
Embedding Model
# Change the embedding model
mem = Memori()
mem.config.embeddings.model = "all-MiniLM-L6-v2"
# Or via environment variable
import os
os.environ["MEMORI_EMBEDDINGS_MODEL"] = "all-MiniLM-L6-v2"
mem = Memori()
How Recall Works
- Query Embedding: Your query is converted to an embedding vector using the configured model
- Similarity Search: The embedding is compared against stored fact embeddings using cosine similarity
- Filtering: Results are filtered by entity_id (from attribution)
- Ranking: Facts are ranked by similarity score
- Limit: Top N facts are returned based on the limit parameter
# Internally, recall does this:
query = "user preferences"
query_embedding = embed_texts(query, model="all-MiniLM-L6-v2")[0]
facts = search_facts(
driver=storage.driver.entity_fact,
entity_id=entity_id,
query_embedding=query_embedding,
limit=5,
embeddings_limit=1000
)
Recall Class (Internal)
The Recall class is used internally by mem.recall(). You typically don’t instantiate it directly, but it’s available if needed:
from memori.memory.recall import Recall
recall = Recall(mem.config)
facts = recall.search_facts(
query="user preferences",
limit=10,
entity_id=None, # Uses config.entity_id
cloud=False # Uses config.cloud
)
1. Limit Results
Only fetch what you need:
# Good: Focused recall
facts = mem.recall("recent user activity", limit=5)
# Avoid: Fetching too many facts
facts = mem.recall("everything", limit=1000)
2. Embeddings Limit
Adjust recall_embeddings_limit for performance vs. accuracy:
# Fast search, might miss some relevant facts
mem.config.recall_embeddings_limit = 500
# Slower but more comprehensive
mem.config.recall_embeddings_limit = 5000
3. Query Specificity
Use specific queries for better results:
# Good: Specific query
facts = mem.recall("user's favorite programming language")
# Less effective: Vague query
facts = mem.recall("user info")
Error Handling
from memori import Memori
mem = Memori()
try:
# Recall without attribution
facts = mem.recall("query")
# Returns empty list if entity_id not set
print(f"Found {len(facts)} facts")
except Exception as e:
print(f"Recall error: {e}")
Best Practices
1. Set Attribution Before Recall
mem = Memori()
mem.attribution(entity_id="user-123") # Required!
facts = mem.recall("user preferences")
2. Use Appropriate Limits
# For LLM context injection
facts = mem.recall(query, limit=5)
# For comprehensive analysis
facts = mem.recall(query, limit=20)
3. Check for Empty Results
facts = mem.recall("query")
if not facts:
print("No relevant memories found")
else:
# Process facts
pass
4. Combine with LLM Integration
Let Memori handle recall automatically:
from openai import OpenAI
client = OpenAI()
mem = Memori().llm.register(client)
mem.attribution(entity_id="user-123")
# Recall happens automatically during chat
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "What's my favorite color?"}]
)
See Also