Learn how to generate follow-up questions using GraphRAG’s entity-based question generation
Question generation takes a list of user queries and generates candidate follow-up questions, enabling conversational exploration and deeper investigation of your dataset.
The question generation method combines structured data from the knowledge graph with unstructured data from input documents to generate candidate questions related to specific entities. This is useful for:
Conversational AI: Generating follow-up questions in multi-turn conversations
Guided exploration: Creating question lists for investigators to dive deeper into datasets
Content discovery: Surfacing important themes and topics for further investigation
Interactive dashboards: Suggesting next questions based on user interests
Question generation uses the same context-building approach as local search, ensuring that generated questions are grounded in actual data from the knowledge graph.
from graphrag.query.question_gen.local_gen import LocalQuestionGenfrom graphrag.query.context_builder.builders import LocalContextBuilderfrom graphrag_llm.completion import get_llm_completion# Initialize the question generatormodel = get_llm_completion(config)context_builder = LocalContextBuilder( entities=entities, relationships=relationships, reports=community_reports, text_units=text_units, # ... other required parameters)question_gen = LocalQuestionGen( model=model, context_builder=context_builder, system_prompt=custom_prompt, # Optional llm_params={"temperature": 0.7, "max_tokens": 500})# Generate questions based on prior queriesprior_questions = [ "What are the main research areas in the dataset?", "Who are the key researchers?"]follow_up_questions = await question_gen.generate( prior_questions=prior_questions, num_questions=5)for i, question in enumerate(follow_up_questions, 1): print(f"{i}. {question}")
CUSTOM_QUESTION_PROMPT = """---Role---You are a scientific research assistant helping to explore a medical research dataset.---Goal---Generate insightful follow-up questions that would help a researcher understand:- Novel discoveries or findings- Experimental methodologies- Relationships between different studies- Gaps in the research---Context Data---{context_data}---Prior Questions---{prior_questions}---Instructions---Generate {num_questions} follow-up questions that:1. Build on the prior questions2. Are specific and answerable from the dataset3. Focus on scientific insights and methodological details4. Help identify important patterns or anomaliesReturn ONLY the questions, one per line, numbered."""question_gen = LocalQuestionGen( model=model, context_builder=context_builder, system_prompt=CUSTOM_QUESTION_PROMPT, llm_params={"temperature": 0.7})
from typing import Listimport redef filter_questions( questions: List[str], prior_questions: List[str], min_length: int = 10, max_length: int = 150) -> List[str]: """Filter out low-quality or duplicate questions.""" filtered = [] prior_set = {q.lower().strip() for q in prior_questions} for q in questions: q = q.strip() # Remove numbering if present q = re.sub(r'^\d+\.\s*', '', q) # Check length if not (min_length <= len(q) <= max_length): continue # Check for duplicates if q.lower() in prior_set: continue # Check if it's actually a question if not q.endswith('?'): q += '?' filtered.append(q) return filtered# Use with question generationraw_questions = await question_gen.generate( prior_questions=prior_questions, num_questions=10)filtered_questions = filter_questions( questions=raw_questions, prior_questions=prior_questions)