Overview
The Embeddings API converts text into vector representations (embeddings) that capture semantic meaning. These embeddings power Memori’s semantic search and memory recall capabilities.
embed_texts()
Generate embeddings for one or more text inputs.
Parameters
Single text string or list of text strings to embed.Examples:
"Hello world"
["Hello world", "Goodbye world"]
["Short text", "A much longer piece of text with more content"]
If True, returns an awaitable that runs embedding in a threadpool executor. Useful for async frameworks.Note: Parameter name is async_ (with underscore) to avoid conflict with Python keyword.
Returns
return
list[list[float]] | Awaitable[list[list[float]]]
List of embedding vectors, one per input text. Each embedding is a list of floats.
- Synchronous mode (
async_=False): Returns list[list[float]]
- Asynchronous mode (
async_=True): Returns Awaitable[list[list[float]]]
The length of each embedding vector depends on the model:
all-MiniLM-L6-v2 (default): 384 dimensions
- Other models: varies (typically 384-1536 dimensions)
Usage Examples
Basic Embedding
from memori import Memori
mem = Memori()
# Embed a single text
embedding = mem.embed_texts("Hello world")
print(len(embedding)) # 1 (one text)
print(len(embedding[0])) # 384 (embedding dimension)
print(type(embedding[0])) # <class 'list'>
print(type(embedding[0][0])) # <class 'float'>
Multiple Texts
from memori import Memori
mem = Memori()
texts = [
"The quick brown fox",
"jumps over the lazy dog",
"Machine learning is fascinating"
]
embeddings = mem.embed_texts(texts)
print(len(embeddings)) # 3 (three texts)
print(len(embeddings[0])) # 384 (each embedding)
# Access individual embeddings
for i, text in enumerate(texts):
print(f"Text: {text}")
print(f"Embedding: {embeddings[i][:5]}...") # Show first 5 dimensions
Asynchronous Embedding
import asyncio
from memori import Memori
mem = Memori()
async def embed_async():
# Returns awaitable
embeddings = await mem.embed_texts(
["First text", "Second text"],
async_=True
)
print(f"Generated {len(embeddings)} embeddings")
return embeddings
# Run async function
embeddings = asyncio.run(embed_async())
print(len(embeddings)) # 2
FastAPI Integration
from fastapi import FastAPI
from memori import Memori
from pydantic import BaseModel
app = FastAPI()
mem = Memori()
class EmbedRequest(BaseModel):
texts: list[str]
class EmbedResponse(BaseModel):
embeddings: list[list[float]]
count: int
dimensions: int
@app.post("/embed", response_model=EmbedResponse)
async def embed_endpoint(request: EmbedRequest):
# Use async embedding
embeddings = await mem.embed_texts(request.texts, async_=True)
return EmbedResponse(
embeddings=embeddings,
count=len(embeddings),
dimensions=len(embeddings[0]) if embeddings else 0
)
Custom Embedding Model
import os
from memori import Memori
# Set model via environment variable
os.environ["MEMORI_EMBEDDINGS_MODEL"] = "all-MiniLM-L6-v2"
mem = Memori()
# Or set via config
mem = Memori()
mem.config.embeddings.model = "all-MiniLM-L6-v2"
embeddings = mem.embed_texts("Hello world")
Direct API Usage
You can also use the embed_texts function directly:
from memori.embeddings import embed_texts
# Synchronous
embeddings = embed_texts(
texts=["Hello", "World"],
model="all-MiniLM-L6-v2"
)
print(len(embeddings)) # 2
Semantic Similarity
import numpy as np
from memori import Memori
mem = Memori()
def cosine_similarity(a, b):
"""Calculate cosine similarity between two vectors."""
a = np.array(a)
b = np.array(b)
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
# Embed texts
texts = [
"I love programming",
"Coding is my passion",
"The weather is nice today"
]
embeddings = mem.embed_texts(texts)
# Calculate similarity
sim_1_2 = cosine_similarity(embeddings[0], embeddings[1])
sim_1_3 = cosine_similarity(embeddings[0], embeddings[2])
print(f"Similarity between text 1 and 2: {sim_1_2:.3f}") # High (similar meaning)
print(f"Similarity between text 1 and 3: {sim_1_3:.3f}") # Low (different topics)
Batch Processing
from memori import Memori
mem = Memori()
def embed_large_dataset(texts: list[str], batch_size: int = 32):
"""Embed texts in batches."""
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
embeddings = mem.embed_texts(batch)
all_embeddings.extend(embeddings)
print(f"Processed {min(i + batch_size, len(texts))}/{len(texts)} texts")
return all_embeddings
# Process 1000 texts in batches of 32
large_dataset = [f"Text {i}" for i in range(1000)]
embeddings = embed_large_dataset(large_dataset, batch_size=32)
print(f"Generated {len(embeddings)} embeddings")
Advanced Usage
TEI (Text Embeddings Inference)
Memori supports TEI for high-performance embedding generation:
from memori.embeddings import embed_texts, TEI
# Configure TEI endpoint
tei = TEI(url="http://localhost:8080")
embeddings = embed_texts(
texts=["Hello world"],
model="all-MiniLM-L6-v2",
tei=tei,
chunk_size=128
)
When storing embeddings in databases, use the formatting utility:
from memori.embeddings import embed_texts, format_embedding_for_db
embedding = embed_texts("Hello world", model="all-MiniLM-L6-v2")[0]
# Format for PostgreSQL
pg_embedding = format_embedding_for_db(embedding, dialect="postgresql")
# Format for MongoDB
mongo_embedding = format_embedding_for_db(embedding, dialect="mongodb")
# Format for OceanBase
ocean_embedding = format_embedding_for_db(embedding, dialect="oceanbase")
Direct Recall API Usage
The embed_texts function is used internally by the recall API:
from memori import Memori
mem = Memori()
mem.attribution(entity_id="user-123")
# This internally:
# 1. Calls mem.embed_texts("user preferences")
# 2. Searches for similar embeddings in storage
# 3. Returns matching facts
facts = mem.recall("user preferences", limit=5)
Configuration
Embedding Model
import os
from memori import Memori
# Via environment variable
os.environ["MEMORI_EMBEDDINGS_MODEL"] = "all-MiniLM-L6-v2"
mem = Memori()
# Via config
mem = Memori()
mem.config.embeddings.model = "all-MiniLM-L6-v2"
print(mem.config.embeddings.model) # "all-MiniLM-L6-v2"
Supported Models
Memori uses sentence-transformers models. Popular options:
all-MiniLM-L6-v2 (default) - 384 dimensions, fast, good quality
all-mpnet-base-v2 - 768 dimensions, higher quality
multi-qa-MiniLM-L6-cos-v1 - 384 dimensions, optimized for Q&A
Embedding Time
Embedding performance depends on:
- Model size (larger = slower but better quality)
- Text length (longer = slower)
- Number of texts (batch processing is efficient)
- Hardware (GPU vs CPU)
import time
from memori import Memori
mem = Memori()
texts = [f"Sample text {i}" for i in range(100)]
start = time.time()
embeddings = mem.embed_texts(texts)
end = time.time()
print(f"Embedded {len(texts)} texts in {end - start:.2f} seconds")
print(f"Average: {(end - start) / len(texts) * 1000:.2f} ms per text")
import asyncio
from memori import Memori
mem = Memori()
async def embed_multiple_batches():
batches = [
["Text 1", "Text 2"],
["Text 3", "Text 4"],
["Text 5", "Text 6"]
]
# Process batches concurrently
tasks = [
mem.embed_texts(batch, async_=True)
for batch in batches
]
results = await asyncio.gather(*tasks)
return results
# Run
all_embeddings = asyncio.run(embed_multiple_batches())
print(f"Generated {len(all_embeddings)} batches")
Error Handling
from memori import Memori
mem = Memori()
try:
# Empty input
embeddings = mem.embed_texts([])
print(embeddings) # Returns []
# None input
embeddings = mem.embed_texts(None)
except Exception as e:
print(f"Error: {e}")
Best Practices
1. Batch Processing
Process multiple texts at once for efficiency:
# Good: Batch processing
texts = ["Text 1", "Text 2", "Text 3"]
embeddings = mem.embed_texts(texts)
# Avoid: One at a time
embeddings = [mem.embed_texts(text) for text in texts]
2. Use Async in Web Applications
# Good: Async in FastAPI/async frameworks
embeddings = await mem.embed_texts(texts, async_=True)
# Avoid: Blocking in async context
embeddings = mem.embed_texts(texts) # Blocks event loop
3. Choose Appropriate Model
# For speed: all-MiniLM-L6-v2
mem.config.embeddings.model = "all-MiniLM-L6-v2"
# For quality: all-mpnet-base-v2
mem.config.embeddings.model = "all-mpnet-base-v2"
4. Cache Embeddings
If embedding the same texts repeatedly:
from functools import lru_cache
from memori import Memori
mem = Memori()
@lru_cache(maxsize=1000)
def cached_embed(text: str) -> tuple[float, ...]:
"""Cache embeddings for repeated texts."""
embedding = mem.embed_texts(text)[0]
return tuple(embedding) # Tuples are hashable
# Reuse cached results
embed1 = cached_embed("Hello world")
embed2 = cached_embed("Hello world") # Returns cached result
See Also