The RAGEngine class is the central orchestrator of Quest’s on-device RAG system. It combines exact matching, semantic retrieval, and local LLM inference to provide intelligent responses to coding questions.
def _build_exact_match_map(self) -> dict: """Build a hash map for exact match search.""" exact_match_map = {} for solution in self.retriever.solutions: normalized_title = self._normalize_title(solution.title) exact_match_map[normalized_title] = solution return exact_match_mapdef _normalize_title(self, title: str) -> str: """Normalize a title for exact match search.""" return title.strip().lower()
Key Features:
Title normalization (lowercase, stripped whitespace)
Direct solution retrieval without LLM inference
Bypasses retrieval and generation for exact matches
When you know the exact problem title (e.g., “two sum”), Quest returns the solution instantly without using the LLM, saving time and compute resources.
retrieved_solutions = self.retriever.search( query, k=k, return_scores=True)filtered_solutions = [ sol for sol in retrieved_solutions if hasattr(sol, 'score') and float(sol.score) >= min_confidence]
The engine retrieves k similar solutions using FAISS and filters them based on a minimum confidence threshold.
def stop(self): """Stop the ongoing generation process.""" self.stop_generation = True logger.info("Generation process stopped.")def reset(self): """Reset the stop flag to allow new generations.""" self.stop_generation = False logger.info("Generation process reset.")
View complete answer_question method
def answer_question( self, query: str, k: int = 5, min_confidence: float = 0.6) -> str: """Answer a question using the enhanced RAG engine.""" try: # Reset the stop flag self.reset() # Check for exact match normalized_query = self._normalize_title(query) if normalized_query in self.exact_match_map: exact_match_solution = self.exact_match_map[normalized_query] logger.info("Exact match found. Returning solution directly.") return f"Exact Match Solution:\n{exact_match_solution.solution}" # Retrieve relevant context retrieved_solutions = self.retriever.search( query, k=k, return_scores=True) filtered_solutions = [ sol for sol in retrieved_solutions if hasattr(sol, 'score') and float(sol.score) >= min_confidence ] # Fallback if no solutions meet confidence threshold if not filtered_solutions and k < 5: return self.answer_question( query, k=k + 2, min_confidence=min_confidence - 0.1) # Generate enhanced prompt prompt = self.generate_enhanced_prompt(query, filtered_solutions) # Get response from Ollama response = self.call_ollama(prompt) # Add to conversation history self.conversation_history.add_query(query, response) # Filter response if in reasoning mode if self.mode == "reasoning": response = self.filter_reasoning_response(response) return f"Generated Solution:\n{response}" except Exception as e: logger.error(f"Failed to answer question: {e}") return "An error occurred while generating the response."
from src.DSAAssistant.components.retriever2 import LeetCodeRetrieverfrom rag_engine import RAGEngine# Initialize retriever and engineretriever = LeetCodeRetriever()rag_engine = RAGEngine(retriever, max_history=3)# Set moderag_engine.set_mode("general")# Ask a questionanswer = rag_engine.answer_question( "How do I solve the two sum problem?", k=3)print(answer)