Overview
The BaseConfig class in src/remem/utils/config_utils.py contains all configuration parameters for REMem. This guide documents every parameter with examples and recommended values.
Basic Setup
from remem.utils.config_utils import BaseConfig
config = BaseConfig(
llm_name = "gpt-4o-mini" ,
embedding_model_name = "nvidia/NV-Embed-v2" ,
dataset = "my_dataset"
)
LLM Parameters
Model Selection
config = BaseConfig(
# Primary LLM for extraction and QA
llm_name = "gpt-4o-mini" , # Default: "gpt-4o-mini"
# Separate LLMs for specific tasks
extract_llm_label = None , # Defaults to llm_name
qa_llm_label = None , # Defaults to llm_name
# LLM API endpoint
llm_base_url = None , # Default: None (uses OpenAI)
# Azure OpenAI
use_azure = False # Set to True for Azure
)
Generation Parameters
config = BaseConfig(
# Token limits
max_new_tokens = 2048 , # Default: 2048, None = model default
max_model_len = 4096 , # Context window for vLLM
# Sampling
temperature = 0 , # Default: 0 (deterministic)
seed = None , # Random seed for reproducibility
num_gen_choices = 1 , # Completions per prompt
# Response format
extract_format = None , # "json_object" or "json_schema"
)
For Llama models, set extract_format="json_schema" to ensure proper structured output.
Inference Mode
config = BaseConfig(
# Online: real-time API calls
llm_infer_mode = "online" , # Default
# Offline: batch processing with vLLM
# llm_infer_mode="offline",
# vllm_tensor_parallel_size=2, # GPU parallelism
# max_num_seqs=256 # Max concurrent sequences
)
Async Configuration
config = BaseConfig(
max_retries = 10 , # Retry attempts for async API calls
)
Embedding Parameters
Model Selection
config = BaseConfig(
embedding_model_name = "nvidia/NV-Embed-v2" , # Default
# Other options:
# "text-embedding-3-large" # OpenAI
# "text-embedding-3-small" # OpenAI
# "GritLM/GritLM-7B" # GritLM
)
Embedding Configuration
config = BaseConfig(
embedding_batch_size = 16 , # Batch size for encoding
embedding_return_as_normalized = True , # Normalize embeddings
embedding_max_seq_len = 2048 , # Max sequence length
)
Storage Parameters
Index Management
config = BaseConfig(
# Rebuild index from scratch
force_index_from_scratch = False , # Default: reuse existing
# Rebuild OpenIE results
force_openie_from_scratch = False , # Default: reuse existing
# Save extracted information
save_openie = True , # Default: saves to disk
)
Save Directory
config = BaseConfig(
save_dir = None , # Auto-generated if None
dataset = "musique" , # Used to create save_dir
)
# Auto-generated paths:
# - dataset="musique" → "outputs/musique"
# - dataset="locomo_v1" → "outputs/locomo/locomo_v1"
# - dataset=None → "outputs"
config = BaseConfig(
extract_method = "openie" , # Default: OpenIE triples
# Options: "openie", "episodic", "episodic_gist", "temporal"
)
Method Descriptions:
openie - Extracts subject-predicate-object triples
episodic - Conversational/temporal data extraction
episodic_gist - Gists, facts, entities, and metadata
temporal - Time-sensitive relationship extraction
Skip Graph Construction
config = BaseConfig(
skip_graph = False , # Set True to only do OpenIE
)
Set skip_graph=True when running vLLM offline indexing for the first time to extract information without building the graph.
Text Preprocessing
Chunking Parameters
config = BaseConfig(
text_preprocessor_class_name = "TextPreprocessor" , # Preprocessor class
# Tokenizer for chunking
preprocess_encoder_name = "gpt-4o" ,
# Chunk size
preprocess_chunk_max_token_size = None , # None = no chunking
# Overlap between chunks
preprocess_chunk_overlap_token_size = 128 ,
# Chunking function
preprocess_chunk_func = "by_token" ,
)
Example: Enable Chunking
config = BaseConfig(
preprocess_chunk_max_token_size = 512 , # 512 tokens per chunk
preprocess_chunk_overlap_token_size = 128 , # 128 token overlap
preprocess_encoder_name = "gpt-4o" # Use GPT-4o tokenizer
)
Graph Construction
Graph Type
config = BaseConfig(
graph_type = "facts_and_sim_passage_node_unidirectional" , # Default
# Options:
# - "dpr_only"
# - "facts_and_sim"
# - "facts_and_sim_passage_node_unidirectional"
is_directed_graph = False , # Use undirected graph
)
Synonymy Edges
Control entity linking via similarity:
config = BaseConfig(
# KNN parameters for synonym detection
synonymy_edge_topk = 2047 , # Neighbors to consider
synonymy_edge_query_batch_size = 1000 , # Query batch size
synonymy_edge_key_batch_size = 10000 , # Key batch size
# Similarity threshold
synonymy_edge_sim_threshold = 0.8 , # 0.0-1.0
)
Episodic Graph Options
config = BaseConfig(
# Concatenate gists per chunk into single node
concatenate_gists_per_chunk = False ,
# Split long verbatim content into separate nodes
split_verbatim_per_chunk = True ,
)
Retrieval Parameters
Basic Retrieval
config = BaseConfig(
retrieval_top_k = 200 , # Total passages to retrieve
linking_top_k = 5 , # Entities to link per step
damping = 0.5 , # PPR damping factor (0-1)
)
Passage Node Weight
Balance passage vs entity importance:
config = BaseConfig(
passage_node_weight = 0.05 , # Multiplicative factor for passages in PPR
)
Reranking
config = BaseConfig(
rerank_dspy_file_path = None , # Path to trained DSPy filter
# Example:
# rerank_dspy_file_path="src/remem/prompts/dspy_prompts/filter_llama3.3-70B-Instruct.json"
)
QA Parameters
Reading Configuration
config = BaseConfig(
qa_top_k = 5 , # Passages to feed to LLM
qa_passage_prefix = "Wikipedia Title: " , # Prefix for each passage
qa_prompt_template = None , # Custom prompt template name
qa_reader = "remem" , # "remem" or "tiser"
)
QA Prompt Template
# Auto-detect based on dataset
config = BaseConfig(
dataset = "musique" ,
qa_prompt_template = None # Uses "rag_qa_musique" template
)
# Or specify explicitly
config = BaseConfig(
qa_prompt_template = "rag_qa_custom"
)
Agent Parameters
For episodic and temporal extraction methods:
config = BaseConfig(
# Tool selection mode
agent_fixed_tools = False , # False = full tool selection
# Reasoning steps
agent_max_steps = 5 , # Max steps for agent reasoning
# Fixed tool mode configuration
agent_fixed_retrieval_tool = "semantic_retrieve" , # or "lexical_retrieve"
)
Fixed Tools Mode:
agent_max_steps=1 - Semantic retrieve only
agent_max_steps=2 - Semantic retrieve + output answer
Evaluation Parameters
Enable Evaluation
config = BaseConfig(
do_eval_retrieval = True , # Evaluate retrieval quality
do_eval_qa = True , # Evaluate answer quality
)
Corpus Configuration
config = BaseConfig(
corpus_len = None , # Total number of documents in corpus
)
Complete Configuration Examples
Minimal Configuration
config = BaseConfig(
llm_name = "gpt-4o-mini" ,
embedding_model_name = "nvidia/NV-Embed-v2"
)
Production Configuration
config = BaseConfig(
# LLM
llm_name = "gpt-4o-mini" ,
extract_llm_label = "gpt-4o-mini" ,
qa_llm_label = "gpt-4o" , # More powerful for QA
max_new_tokens = 2048 ,
temperature = 0 ,
# Embeddings
embedding_model_name = "nvidia/NV-Embed-v2" ,
embedding_batch_size = 16 ,
# Storage
dataset = "my_dataset" ,
force_index_from_scratch = False ,
save_openie = True ,
# Preprocessing
preprocess_chunk_max_token_size = 512 ,
preprocess_chunk_overlap_token_size = 128 ,
# Graph
graph_type = "facts_and_sim_passage_node_unidirectional" ,
synonymy_edge_topk = 2047 ,
synonymy_edge_sim_threshold = 0.8 ,
# Retrieval
retrieval_top_k = 200 ,
linking_top_k = 5 ,
damping = 0.5 ,
passage_node_weight = 0.05 ,
rerank_dspy_file_path = "path/to/filter.json" ,
# QA
qa_top_k = 5 ,
qa_passage_prefix = "Wikipedia Title: " ,
# Evaluation
do_eval_retrieval = True ,
do_eval_qa = True ,
)
Offline (vLLM) Configuration
config = BaseConfig(
# Offline inference
llm_infer_mode = "offline" ,
llm_name = "meta-llama/Llama-3.3-70B-Instruct" ,
extract_format = "json_schema" ,
vllm_tensor_parallel_size = 2 ,
max_num_seqs = 256 ,
max_model_len = 4096 ,
# First pass: extract only
skip_graph = True ,
# Other settings...
embedding_model_name = "nvidia/NV-Embed-v2" ,
dataset = "my_dataset" ,
)
Episodic Gist Configuration
config = BaseConfig(
# Extraction
extract_method = "episodic_gist" ,
# Agent
agent_fixed_tools = False ,
agent_max_steps = 5 ,
# Graph options
concatenate_gists_per_chunk = False ,
split_verbatim_per_chunk = True ,
# Other settings...
llm_name = "gpt-4o-mini" ,
embedding_model_name = "nvidia/NV-Embed-v2" ,
retrieval_top_k = 200 ,
qa_top_k = 5 ,
)
Next Steps
Indexing Learn how to index documents
Embeddings Configure embedding models