Skip to main content

Overview

The BaseConfig class in src/remem/utils/config_utils.py contains all configuration parameters for REMem. This guide documents every parameter with examples and recommended values.

Basic Setup

from remem.utils.config_utils import BaseConfig

config = BaseConfig(
    llm_name="gpt-4o-mini",
    embedding_model_name="nvidia/NV-Embed-v2",
    dataset="my_dataset"
)

LLM Parameters

Model Selection

config = BaseConfig(
    # Primary LLM for extraction and QA
    llm_name="gpt-4o-mini",  # Default: "gpt-4o-mini"
    
    # Separate LLMs for specific tasks
    extract_llm_label=None,  # Defaults to llm_name
    qa_llm_label=None,  # Defaults to llm_name
    
    # LLM API endpoint
    llm_base_url=None,  # Default: None (uses OpenAI)
    
    # Azure OpenAI
    use_azure=False  # Set to True for Azure
)

Generation Parameters

config = BaseConfig(
    # Token limits
    max_new_tokens=2048,  # Default: 2048, None = model default
    max_model_len=4096,  # Context window for vLLM
    
    # Sampling
    temperature=0,  # Default: 0 (deterministic)
    seed=None,  # Random seed for reproducibility
    num_gen_choices=1,  # Completions per prompt
    
    # Response format
    extract_format=None,  # "json_object" or "json_schema"
)
For Llama models, set extract_format="json_schema" to ensure proper structured output.

Inference Mode

config = BaseConfig(
    # Online: real-time API calls
    llm_infer_mode="online",  # Default
    
    # Offline: batch processing with vLLM
    # llm_infer_mode="offline",
    # vllm_tensor_parallel_size=2,  # GPU parallelism
    # max_num_seqs=256  # Max concurrent sequences
)

Async Configuration

config = BaseConfig(
    max_retries=10,  # Retry attempts for async API calls
)

Embedding Parameters

Model Selection

config = BaseConfig(
    embedding_model_name="nvidia/NV-Embed-v2",  # Default
    # Other options:
    # "text-embedding-3-large"  # OpenAI
    # "text-embedding-3-small"  # OpenAI
    # "GritLM/GritLM-7B"  # GritLM
)

Embedding Configuration

config = BaseConfig(
    embedding_batch_size=16,  # Batch size for encoding
    embedding_return_as_normalized=True,  # Normalize embeddings
    embedding_max_seq_len=2048,  # Max sequence length
)

Storage Parameters

Index Management

config = BaseConfig(
    # Rebuild index from scratch
    force_index_from_scratch=False,  # Default: reuse existing
    
    # Rebuild OpenIE results
    force_openie_from_scratch=False,  # Default: reuse existing
    
    # Save extracted information
    save_openie=True,  # Default: saves to disk
)

Save Directory

config = BaseConfig(
    save_dir=None,  # Auto-generated if None
    dataset="musique",  # Used to create save_dir
)

# Auto-generated paths:
# - dataset="musique" → "outputs/musique"
# - dataset="locomo_v1" → "outputs/locomo/locomo_v1"
# - dataset=None → "outputs"

Information Extraction

Extraction Method

config = BaseConfig(
    extract_method="openie",  # Default: OpenIE triples
    # Options: "openie", "episodic", "episodic_gist", "temporal"
)
Method Descriptions:
  • openie - Extracts subject-predicate-object triples
  • episodic - Conversational/temporal data extraction
  • episodic_gist - Gists, facts, entities, and metadata
  • temporal - Time-sensitive relationship extraction

Skip Graph Construction

config = BaseConfig(
    skip_graph=False,  # Set True to only do OpenIE
)
Set skip_graph=True when running vLLM offline indexing for the first time to extract information without building the graph.

Text Preprocessing

Chunking Parameters

config = BaseConfig(
    text_preprocessor_class_name="TextPreprocessor",  # Preprocessor class
    
    # Tokenizer for chunking
    preprocess_encoder_name="gpt-4o",
    
    # Chunk size
    preprocess_chunk_max_token_size=None,  # None = no chunking
    
    # Overlap between chunks
    preprocess_chunk_overlap_token_size=128,
    
    # Chunking function
    preprocess_chunk_func="by_token",
)

Example: Enable Chunking

config = BaseConfig(
    preprocess_chunk_max_token_size=512,  # 512 tokens per chunk
    preprocess_chunk_overlap_token_size=128,  # 128 token overlap
    preprocess_encoder_name="gpt-4o"  # Use GPT-4o tokenizer
)

Graph Construction

Graph Type

config = BaseConfig(
    graph_type="facts_and_sim_passage_node_unidirectional",  # Default
    # Options:
    # - "dpr_only"
    # - "facts_and_sim"
    # - "facts_and_sim_passage_node_unidirectional"
    
    is_directed_graph=False,  # Use undirected graph
)

Synonymy Edges

Control entity linking via similarity:
config = BaseConfig(
    # KNN parameters for synonym detection
    synonymy_edge_topk=2047,  # Neighbors to consider
    synonymy_edge_query_batch_size=1000,  # Query batch size
    synonymy_edge_key_batch_size=10000,  # Key batch size
    
    # Similarity threshold
    synonymy_edge_sim_threshold=0.8,  # 0.0-1.0
)

Episodic Graph Options

config = BaseConfig(
    # Concatenate gists per chunk into single node
    concatenate_gists_per_chunk=False,
    
    # Split long verbatim content into separate nodes
    split_verbatim_per_chunk=True,
)

Retrieval Parameters

Basic Retrieval

config = BaseConfig(
    retrieval_top_k=200,  # Total passages to retrieve
    linking_top_k=5,  # Entities to link per step
    damping=0.5,  # PPR damping factor (0-1)
)

Passage Node Weight

Balance passage vs entity importance:
config = BaseConfig(
    passage_node_weight=0.05,  # Multiplicative factor for passages in PPR
)

Reranking

config = BaseConfig(
    rerank_dspy_file_path=None,  # Path to trained DSPy filter
    # Example:
    # rerank_dspy_file_path="src/remem/prompts/dspy_prompts/filter_llama3.3-70B-Instruct.json"
)

QA Parameters

Reading Configuration

config = BaseConfig(
    qa_top_k=5,  # Passages to feed to LLM
    qa_passage_prefix="Wikipedia Title: ",  # Prefix for each passage
    qa_prompt_template=None,  # Custom prompt template name
    qa_reader="remem",  # "remem" or "tiser"
)

QA Prompt Template

# Auto-detect based on dataset
config = BaseConfig(
    dataset="musique",
    qa_prompt_template=None  # Uses "rag_qa_musique" template
)

# Or specify explicitly
config = BaseConfig(
    qa_prompt_template="rag_qa_custom"
)

Agent Parameters

For episodic and temporal extraction methods:
config = BaseConfig(
    # Tool selection mode
    agent_fixed_tools=False,  # False = full tool selection
    
    # Reasoning steps
    agent_max_steps=5,  # Max steps for agent reasoning
    
    # Fixed tool mode configuration
    agent_fixed_retrieval_tool="semantic_retrieve",  # or "lexical_retrieve"
)
Fixed Tools Mode:
  • agent_max_steps=1 - Semantic retrieve only
  • agent_max_steps=2 - Semantic retrieve + output answer

Evaluation Parameters

Enable Evaluation

config = BaseConfig(
    do_eval_retrieval=True,  # Evaluate retrieval quality
    do_eval_qa=True,  # Evaluate answer quality
)

Corpus Configuration

config = BaseConfig(
    corpus_len=None,  # Total number of documents in corpus
)

Complete Configuration Examples

Minimal Configuration

config = BaseConfig(
    llm_name="gpt-4o-mini",
    embedding_model_name="nvidia/NV-Embed-v2"
)

Production Configuration

config = BaseConfig(
    # LLM
    llm_name="gpt-4o-mini",
    extract_llm_label="gpt-4o-mini",
    qa_llm_label="gpt-4o",  # More powerful for QA
    max_new_tokens=2048,
    temperature=0,
    
    # Embeddings
    embedding_model_name="nvidia/NV-Embed-v2",
    embedding_batch_size=16,
    
    # Storage
    dataset="my_dataset",
    force_index_from_scratch=False,
    save_openie=True,
    
    # Preprocessing
    preprocess_chunk_max_token_size=512,
    preprocess_chunk_overlap_token_size=128,
    
    # Graph
    graph_type="facts_and_sim_passage_node_unidirectional",
    synonymy_edge_topk=2047,
    synonymy_edge_sim_threshold=0.8,
    
    # Retrieval
    retrieval_top_k=200,
    linking_top_k=5,
    damping=0.5,
    passage_node_weight=0.05,
    rerank_dspy_file_path="path/to/filter.json",
    
    # QA
    qa_top_k=5,
    qa_passage_prefix="Wikipedia Title: ",
    
    # Evaluation
    do_eval_retrieval=True,
    do_eval_qa=True,
)

Offline (vLLM) Configuration

config = BaseConfig(
    # Offline inference
    llm_infer_mode="offline",
    llm_name="meta-llama/Llama-3.3-70B-Instruct",
    extract_format="json_schema",
    vllm_tensor_parallel_size=2,
    max_num_seqs=256,
    max_model_len=4096,
    
    # First pass: extract only
    skip_graph=True,
    
    # Other settings...
    embedding_model_name="nvidia/NV-Embed-v2",
    dataset="my_dataset",
)

Episodic Gist Configuration

config = BaseConfig(
    # Extraction
    extract_method="episodic_gist",
    
    # Agent
    agent_fixed_tools=False,
    agent_max_steps=5,
    
    # Graph options
    concatenate_gists_per_chunk=False,
    split_verbatim_per_chunk=True,
    
    # Other settings...
    llm_name="gpt-4o-mini",
    embedding_model_name="nvidia/NV-Embed-v2",
    retrieval_top_k=200,
    qa_top_k=5,
)

Next Steps

Indexing

Learn how to index documents

Embeddings

Configure embedding models

Build docs developers (and LLMs) love