Grounding in Vertex AI lets you use generative models to generate content grounded in your own documents and data. This capability ensures that model responses are anchored in specific information and reduces hallucinations.
Reduces Hallucinations
Prevents models from generating false information by anchoring in verified data
Increases Trust
Provides citations and sources for generated content
Current Information
Access real-time data beyond the model’s training cutoff
Private Data
Ground responses in your organization’s proprietary information
Ground responses in publicly available web content indexed by Google:
from google import genaifrom google.genai.types import Tool, GoogleSearch, GenerateContentConfigclient = genai.Client(vertexai=True, project=PROJECT_ID, location="global")# Use Google Search for groundingsearch_tool = Tool(google_search=GoogleSearch())response = client.models.generate_content( model="gemini-2.0-flash-001", contents="What are the latest developments in quantum computing?", config=GenerateContentConfig( tools=[search_tool], temperature=0.2, ),)print(response.text)
If you use Google Search grounding in production, you must also implement a Google Search entry point to comply with usage requirements.
Split based on semantic boundaries (paragraphs, sections):
import refrom typing import Listdef semantic_chunk(text: str, max_chunk_size: int = 1000) -> List[str]: """Split text on paragraph boundaries while respecting max size.""" # Split on double newlines (paragraphs) paragraphs = re.split(r'\n\n+', text) chunks = [] current_chunk = "" for para in paragraphs: # If adding paragraph exceeds max size, start new chunk if len(current_chunk) + len(para) > max_chunk_size: if current_chunk: chunks.append(current_chunk.strip()) current_chunk = para else: current_chunk += "\n\n" + para if current_chunk else para if current_chunk: chunks.append(current_chunk.strip()) return chunks# Example usagetext = open("document.txt").read()chunks = semantic_chunk(text, max_chunk_size=800)
Create small chunks for retrieval but provide larger context for generation:
def hierarchical_chunk( text: str, child_size: int = 256, parent_size: int = 1024) -> List[dict]: """Create parent-child chunk hierarchy.""" # Split into parent chunks parents = [text[i:i+parent_size] for i in range(0, len(text), parent_size)] chunks = [] for parent_idx, parent in enumerate(parents): # Split each parent into children children = [ parent[i:i+child_size] for i in range(0, len(parent), child_size) ] for child_idx, child in enumerate(children): chunks.append({ "child_text": child, "parent_text": parent, "parent_id": parent_idx, "child_id": child_idx, }) return chunks# Retrieve with child, generate with parentchunks = hierarchical_chunk(text)# Store child embeddings, but return parent text during retrieval
from google.genai.types import Tool, Retrieval, VertexAISearch, ToolConfig, FunctionCallingConfig# Configure dynamic retrievaltool_config = ToolConfig( function_calling_config=FunctionCallingConfig( mode=FunctionCallingConfig.Mode.AUTO, # Let model decide when to retrieve ))response = client.models.generate_content( model="gemini-2.0-flash-001", contents="What is 2+2?", # Simple query, may not trigger retrieval config=GenerateContentConfig( tools=[search_tool], tool_config=tool_config, ),)
# Retrieve more candidatesrag_tool = Tool( retrieval=Retrieval( vertex_rag_store=VertexRagStore( rag_resources=[corpus_name], similarity_top_k=10, # Retrieve 10 candidates ) ))# Use model to rerank and select top 3 for generationresponse = client.models.generate_content( model="gemini-2.0-flash-001", contents="query", config=GenerateContentConfig(tools=[rag_tool]),)
from google.genai.types import GenerateContentResponsefrom IPython.display import Markdown, displaydef print_grounding_data(response: GenerateContentResponse) -> None: """Print Gemini response with grounding citations.""" candidate = response.candidates[0] if response.candidates else None metadata = getattr(candidate, "grounding_metadata", None) if not metadata: print("Response does not contain grounding metadata.") return # Insert citation markers ENCODING = "utf-8" text_bytes = response.text.encode(ENCODING) parts = [] last = 0 for support in metadata.grounding_supports or []: end = support.segment.end_index parts.append(text_bytes[last:end].decode(ENCODING)) parts.append( " " + "".join(f"[{i + 1}]" for i in support.grounding_chunk_indices) ) last = end parts.append(text_bytes[last:].decode(ENCODING)) parts.append("\n\n---\n## Grounding Sources\n") # List grounding chunks if chunks := metadata.grounding_chunks: parts.append("### Sources\n") for i, chunk in enumerate(chunks, 1): if ctx := chunk.web or chunk.retrieved_context: if chunk.web: uri = chunk.web.uri title = chunk.web.title elif chunk.retrieved_context: uri = chunk.retrieved_context.uri title = chunk.retrieved_context.title parts.append(f"[{i}] [{title}]({uri})\n") display(Markdown("".join(parts)))# Use the functionresponse = client.models.generate_content( model="gemini-2.0-flash-001", contents="What is our refund policy?", config=GenerateContentConfig(tools=[search_tool]),)print_grounding_data(response)
from vertexai.evaluation import EvalTask# Define evaluation metricseval_task = EvalTask( dataset=eval_dataset, metrics=[ "groundedness", # How well grounded is the response "citation_recall", # Are relevant sources cited "citation_precision", # Are citations accurate ],)eval_result = eval_task.evaluate( model=model, prompt_template=prompt_template,)print(eval_result.summary_metrics)