Skip to main content

Overview

Memori performs memory operations asynchronously to avoid blocking your application. This guide covers async patterns and best practices.

Async LLM Calls

import asyncio
from openai import AsyncOpenAI
from memori import Memori

async def chat_async():
    client = AsyncOpenAI()
    mem = Memori().llm.register(client)
    mem.attribution(entity_id="user-123", process_id="async-app")

    # Make async LLM call
    response = await client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": "What's the weather like?"}],
    )

    print(response.choices[0].message.content)

    # Give time for memory processing
    await asyncio.sleep(1)

asyncio.run(chat_async())

Async Recall

Manually recall memories asynchronously.
import asyncio
from memori import Memori

async def recall_memories():
    mem = Memori()
    mem.attribution(entity_id="user-123")

    # Note: Python recall is currently synchronous but non-blocking
    # It runs in the background thread pool
    facts = mem.recall("What are my preferences?")

    for fact in facts:
        print(f"- {fact['content']}")
        print(f"  Score: {fact.get('score', 0):.2f}")

asyncio.run(recall_memories())

Concurrent Operations

Handle multiple memory operations in parallel.
import asyncio
from openai import AsyncOpenAI
from memori import Memori

async def process_multiple_users():
    client = AsyncOpenAI()

    async def handle_user(user_id: str, message: str):
        mem = Memori().llm.register(client)
        mem.attribution(entity_id=user_id, process_id="concurrent-app")

        response = await client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": message}],
        )

        return {
            "user_id": user_id,
            "response": response.choices[0].message.content,
        }

    # Process multiple users concurrently
    tasks = [
        handle_user("user-1", "What's my favorite color?"),
        handle_user("user-2", "What do I like to eat?"),
        handle_user("user-3", "Where do I live?"),
    ]

    results = await asyncio.gather(*tasks)

    for result in results:
        print(f"{result['user_id']}: {result['response']}")

    # Give time for all memory processing
    await asyncio.sleep(2)

asyncio.run(process_multiple_users())

Waiting for Augmentation

In short-lived applications (CLI tools, scripts), wait for memory processing to complete.
from openai import OpenAI
from memori import Memori

def main():
    client = OpenAI()
    mem = Memori().llm.register(client)
    mem.attribution(entity_id="user-123", process_id="cli-tool")

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": "Remember: I prefer dark mode"}],
    )

    print(response.choices[0].message.content)

    # Wait for augmentation to complete before exiting
    # This is CRITICAL for short-lived scripts
    mem.augmentation.wait(timeout=30)  # Wait up to 30 seconds

if __name__ == "__main__":
    main()

Async Web Server

In long-running servers, you don’t need to wait for augmentation.
from fastapi import FastAPI
from openai import AsyncOpenAI
from memori import Memori

app = FastAPI()

@app.post("/chat")
async def chat(user_id: str, message: str):
    client = AsyncOpenAI()
    mem = Memori().llm.register(client)
    mem.attribution(entity_id=user_id, process_id="web-server")

    response = await client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": message}],
    )

    # No need to wait - server keeps running
    return {"response": response.choices[0].message.content}

# Optional: Graceful shutdown
@app.on_event("shutdown")
async def shutdown():
    # Give time for pending augmentations
    import asyncio
    await asyncio.sleep(5)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

Background Tasks

Process memories in background tasks for better performance.
import asyncio
from celery import Celery
from openai import AsyncOpenAI
from memori import Memori

celery = Celery('tasks', broker='redis://localhost:6379')

@celery.task
def process_conversation(user_id: str, message: str):
    async def _process():
        client = AsyncOpenAI()
        mem = Memori().llm.register(client)
        mem.attribution(entity_id=user_id, process_id="background-task")

        response = await client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": message}],
        )

        # Wait for augmentation in background task
        await asyncio.sleep(2)

        return response.choices[0].message.content

    return asyncio.run(_process())

# Usage
result = process_conversation.delay("user-123", "Hello!")

Custom Embeddings (Async)

Generate embeddings asynchronously for better performance.
import asyncio
from memori import Memori

async def generate_embeddings():
    mem = Memori()

    # Generate embeddings asynchronously
    texts = [
        "Machine learning is fascinating",
        "I love neural networks",
        "Deep learning powers AI",
    ]

    # This runs in a thread pool, non-blocking
    embeddings = mem.embed_texts(texts, async_=True)

    # Wait for the result
    results = await embeddings

    print(f"Generated {len(results)} embeddings")
    print(f"First embedding dimension: {len(results[0])}")

asyncio.run(generate_embeddings())

Error Handling

Handle async errors gracefully.
import asyncio
from openai import AsyncOpenAI
from memori import Memori

async def safe_chat(user_id: str, message: str):
    try:
        client = AsyncOpenAI()
        mem = Memori().llm.register(client)
        mem.attribution(entity_id=user_id, process_id="error-handling")

        response = await client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": message}],
        )

        return response.choices[0].message.content

    except Exception as e:
        print(f"Error: {e}")
        # Memory operations continue in background even if LLM call fails
        return None

async def main():
    result = await safe_chat("user-123", "Hello!")
    if result:
        print(result)

asyncio.run(main())

Best Practices

Wait in Scripts

Always call mem.augmentation.wait() in short-lived CLI tools and scripts.

Don't Wait in Servers

Long-running servers don’t need to wait - memory processing happens in background.

Parallel Operations

Use Promise.all() or asyncio.gather() for concurrent operations.

Handle Errors

Wrap async operations in try-catch blocks to prevent unhandled rejections.

Performance Tips

1

Use Async Clients

Always use AsyncOpenAI / AsyncAnthropic for async applications.
2

Avoid Blocking Operations

Memori operations are async - don’t block the event loop with synchronous code.
3

Batch Operations

Process multiple users concurrently with Promise.all() or asyncio.gather().
4

Monitor Memory Usage

In high-throughput apps, monitor memory usage as Memori buffers conversations.

Next Steps

Custom Embeddings

Use custom embedding models

Basic Memory

Review basic memory operations

Build docs developers (and LLMs) love