Skip to main content

Overview

The Memory API provides persistent semantic storage for AXON programs. It enables remember and recall statements to store and retrieve values during execution.
from axon.runtime.memory_backend import InMemoryBackend

memory = InMemoryBackend()

# Store values
await memory.store(
    key="contract_type",
    value="NDA",
    metadata={"source": "extract_step", "confidence": 0.9}
)

await memory.store(
    key="risk_score",
    value=0.75,
    metadata={"source": "analyze_step"}
)

# Retrieve values
results = await memory.retrieve("contract", top_k=5)
for entry in results:
    print(f"{entry.key}: {entry.value} (score: {entry.score})")

# Output:
# contract_type: NDA (score: 0.7)

Class: MemoryBackend

Abstract base class for semantic memory storage.

Abstract Methods

Subclasses must implement:

async store(key, value, metadata) -> MemoryEntry

Store a value in semantic memory.
key
str
required
The storage key / identifier
value
Any
required
The value to store (any type)
metadata
dict
Optional key-value annotations
Returns: The created MemoryEntry with timestamp Raises: ValueError if key is empty
entry = await memory.store(
    key="extracted_clauses",
    value=["clause1", "clause2"],
    metadata={
        "source_step": "Extract",
        "confidence": 0.85,
        "type": "list[str]"
    }
)

print(f"Stored: {entry.key} at {entry.timestamp}")

async retrieve(query, top_k, scope) -> list[MemoryEntry]

Retrieve values from semantic memory.
query
str
required
The retrieval query (semantic search in vector backends, key prefix in simple backends)
top_k
int
default:"5"
Maximum number of results to return
scope
str
Optional scope filter (memory name / namespace)
Returns: Ordered list of MemoryEntry results, highest relevance first
results = await memory.retrieve(
    query="contract clauses",
    top_k=3,
    scope="legal"
)

for entry in results:
    print(f"Key: {entry.key}")
    print(f"  Value: {entry.value}")
    print(f"  Score: {entry.score:.2f}")
    print(f"  Metadata: {entry.metadata}")

async clear(scope) -> int

Clear stored entries.
scope
str
If provided, only clear entries matching this scope. If None, clear everything.
Returns: The number of entries cleared
# Clear all entries
count = await memory.clear()
print(f"Cleared {count} entries")

# Clear specific scope
count = await memory.clear(scope="legal")
print(f"Cleared {count} legal entries")

Class: MemoryEntry

A single value stored in semantic memory.
class MemoryEntry:
    key: str
    value: Any
    metadata: dict[str, Any]
    score: float  # Relevance score (0.0-1.0)
    timestamp: float  # Unix timestamp
    
    def to_dict(self) -> dict[str, Any]:
        ...
key
str
required
The storage key / identifier
value
Any
required
The stored value (any type)
metadata
dict
Arbitrary key-value annotations (e.g., type, source step, confidence)
score
float
Relevance score from retrieval (0.0-1.0). Only meaningful in retrieval results.
timestamp
float
Unix timestamp when the entry was stored
Example:
import json

entry = await memory.store(
    key="risk_assessment",
    value={"score": 0.7, "factors": ["clause_ambiguity"]},
    metadata={"confidence": 0.85}
)

print(json.dumps(entry.to_dict(), indent=2))
# {
#   "key": "risk_assessment",
#   "value": "{\"score\": 0.7, \"factors\": [...]}}",
#   "timestamp": 1705329600.0,
#   "metadata": {"confidence": 0.85}
# }

Class: InMemoryBackend

Dict-based memory backend for testing and simple use cases.
from axon.runtime.memory_backend import InMemoryBackend

memory = InMemoryBackend()

Constructor

tracer
Tracer
Optional tracer for recording memory operations
from axon.runtime.tracer import Tracer

tracer = Tracer(program_name="MyProgram", backend_name="test")
memory = InMemoryBackend(tracer=tracer)

Retrieval Scoring

The InMemoryBackend uses simple substring matching:
  • Exact key match: score = 1.0
  • Key contains query: score = 0.7
  • Value string contains query: score = 0.4
await memory.store("contract_type", "NDA")
await memory.store("contract_date", "2024-01-15")
await memory.store("risk_factors", "Contract has ambiguous termination clause")

results = await memory.retrieve("contract")
for entry in results:
    print(f"{entry.key}: {entry.score}")

# Output:
# contract_type: 0.7 (key contains "contract")
# contract_date: 0.7 (key contains "contract")
# risk_factors: 0.4 (value contains "contract")

Properties

entry_count: int

The number of entries currently stored.
await memory.store("key1", "value1")
await memory.store("key2", "value2")

print(memory.entry_count)  # 2

get_all_entries() -> list[MemoryEntry]

Return all stored entries (for testing/debugging).
entries = memory.get_all_entries()
for entry in entries:
    print(f"{entry.key}: {entry.value}")

Usage with Executor

from axon.runtime import Executor, ModelClient
from axon.runtime.memory_backend import InMemoryBackend

client = MyModelClient()
memory = InMemoryBackend()

executor = Executor(
    client=client,
    memory=memory
)

result = await executor.execute(compiled_program)

# Memory is automatically populated during execution
print(f"Stored entries: {memory.entry_count}")

AXON Integration

Remember Statement

flow Analyze(doc: Document) -> Report {
    step Extract {
        ask: "Extract key clauses"
    }
    
    remember(Extract.output) -> ContractMemory
}
This translates to:
# In the executor:
extract_result = await call_model(...)
await memory.store(
    key="Extract.output",
    value=extract_result,
    metadata={"scope": "ContractMemory"}
)

Recall Statement

flow Research(topic: String) -> Analysis {
    step Search {
        recall("previous analyses") from ContractMemory
        ask: "Analyze {topic} considering past findings"
    }
}
This translates to:
# In the executor:
recalled = await memory.retrieve(
    query="previous analyses",
    scope="ContractMemory",
    top_k=5
)

# Inject into prompt
prompt = f"Previous findings: {recalled}. Analyze {topic}..."

Creating Custom Backends

Example: Redis Backend

from axon.runtime.memory_backend import MemoryBackend, MemoryEntry
import redis.asyncio as redis
import json
import time

class RedisMemoryBackend(MemoryBackend):
    def __init__(self, redis_url: str = "redis://localhost"):
        self.redis = redis.from_url(redis_url)
    
    async def store(
        self,
        key: str,
        value: Any,
        metadata: dict[str, Any] | None = None,
    ) -> MemoryEntry:
        if not key:
            raise ValueError("Memory key must not be empty")
        
        entry = MemoryEntry(
            key=key,
            value=value,
            metadata=metadata or {},
            timestamp=time.time(),
        )
        
        # Serialize and store
        data = {
            "value": json.dumps(value),
            "metadata": json.dumps(metadata or {}),
            "timestamp": entry.timestamp,
        }
        
        await self.redis.hset(f"memory:{key}", mapping=data)
        return entry
    
    async def retrieve(
        self,
        query: str,
        top_k: int = 5,
        scope: str | None = None,
    ) -> list[MemoryEntry]:
        # Get all keys
        pattern = f"memory:*{query}*"
        keys = []
        async for key in self.redis.scan_iter(match=pattern):
            keys.append(key.decode())
        
        # Retrieve entries
        entries = []
        for key in keys[:top_k]:
            data = await self.redis.hgetall(key)
            if data:
                entry = MemoryEntry(
                    key=key.replace("memory:", ""),
                    value=json.loads(data[b"value"]),
                    metadata=json.loads(data[b"metadata"]),
                    score=1.0,  # Simple: exact match
                    timestamp=float(data[b"timestamp"]),
                )
                entries.append(entry)
        
        return entries
    
    async def clear(self, scope: str | None = None) -> int:
        if scope:
            pattern = f"memory:*{scope}*"
        else:
            pattern = "memory:*"
        
        count = 0
        async for key in self.redis.scan_iter(match=pattern):
            await self.redis.delete(key)
            count += 1
        
        return count

Example: Vector Database Backend (Pinecone)

from axon.runtime.memory_backend import MemoryBackend, MemoryEntry
from pinecone import Pinecone
import openai
import time

class PineconeMemoryBackend(MemoryBackend):
    def __init__(self, api_key: str, index_name: str, openai_key: str):
        self.pc = Pinecone(api_key=api_key)
        self.index = self.pc.Index(index_name)
        self.openai_client = openai.OpenAI(api_key=openai_key)
    
    def _embed(self, text: str) -> list[float]:
        response = self.openai_client.embeddings.create(
            model="text-embedding-3-small",
            input=text
        )
        return response.data[0].embedding
    
    async def store(
        self,
        key: str,
        value: Any,
        metadata: dict[str, Any] | None = None,
    ) -> MemoryEntry:
        if not key:
            raise ValueError("Memory key must not be empty")
        
        # Generate embedding
        text = f"{key} {value}"
        embedding = self._embed(text)
        
        # Store in Pinecone
        self.index.upsert(
            vectors=[(
                key,
                embedding,
                {
                    "value": str(value),
                    "metadata": metadata or {},
                    "timestamp": time.time(),
                }
            )]
        )
        
        return MemoryEntry(
            key=key,
            value=value,
            metadata=metadata or {},
            timestamp=time.time(),
        )
    
    async def retrieve(
        self,
        query: str,
        top_k: int = 5,
        scope: str | None = None,
    ) -> list[MemoryEntry]:
        # Semantic search using query embedding
        query_embedding = self._embed(query)
        
        filter_dict = {"scope": scope} if scope else None
        
        results = self.index.query(
            vector=query_embedding,
            top_k=top_k,
            include_metadata=True,
            filter=filter_dict
        )
        
        entries = []
        for match in results.matches:
            entry = MemoryEntry(
                key=match.id,
                value=match.metadata["value"],
                metadata=match.metadata.get("metadata", {}),
                score=match.score,
                timestamp=match.metadata.get("timestamp", 0.0),
            )
            entries.append(entry)
        
        return entries
    
    async def clear(self, scope: str | None = None) -> int:
        if scope:
            # Delete by metadata filter
            self.index.delete(filter={"scope": scope})
        else:
            # Delete all
            self.index.delete(delete_all=True)
        
        # Pinecone doesn't return count, estimate
        return 0

Testing Memory Backends

import pytest
from axon.runtime.memory_backend import InMemoryBackend

@pytest.mark.asyncio
async def test_store_and_retrieve():
    memory = InMemoryBackend()
    
    # Store
    entry = await memory.store(
        key="test_key",
        value="test_value",
        metadata={"source": "test"}
    )
    
    assert entry.key == "test_key"
    assert entry.value == "test_value"
    assert entry.metadata["source"] == "test"
    
    # Retrieve
    results = await memory.retrieve("test")
    assert len(results) > 0
    assert results[0].key == "test_key"

@pytest.mark.asyncio
async def test_clear():
    memory = InMemoryBackend()
    
    await memory.store("key1", "value1")
    await memory.store("key2", "value2")
    
    count = await memory.clear()
    assert count == 2
    assert memory.entry_count == 0

@pytest.mark.asyncio
async def test_scoped_retrieval():
    memory = InMemoryBackend()
    
    await memory.store("legal_doc", "contract", {"scope": "legal"})
    await memory.store("tech_doc", "specification", {"scope": "tech"})
    
    results = await memory.retrieve("doc", scope="legal")
    assert len(results) == 1
    assert results[0].key == "legal_doc"

Complete Example

import asyncio
from axon.runtime import Executor, ModelClient, ModelResponse
from axon.runtime.memory_backend import InMemoryBackend

class SimpleClient(ModelClient):
    async def call(self, system_prompt: str, user_prompt: str, **kwargs):
        return ModelResponse(content=f"Response to: {user_prompt}")

async def main():
    # Setup
    client = SimpleClient()
    memory = InMemoryBackend()
    
    executor = Executor(client=client, memory=memory)
    
    # Simulate remember
    await memory.store(
        key="previous_analysis",
        value={"conclusion": "High risk contract", "score": 0.8},
        metadata={"step": "Analyze", "flow": "ContractReview"}
    )
    
    # Simulate recall
    results = await memory.retrieve("analysis", top_k=3)
    
    for entry in results:
        print(f"Found: {entry.key}")
        print(f"  Value: {entry.value}")
        print(f"  Score: {entry.score}")
        print(f"  From: {entry.metadata.get('flow')}")
    
    # Statistics
    print(f"\nTotal entries: {memory.entry_count}")

await main()

Next Steps

Executor API

Use memory in program execution

Context API

Manage execution state between steps

Build docs developers (and LLMs) love