Skip to main content

Retrieval-Augmented Generation (RAG)

Retrieval-Augmented Generation (RAG) enhances AI responses by retrieving relevant information from your documents before generating answers. This reduces hallucinations and grounds responses in your actual data.

How RAG Works

RAG follows a two-step process:
  1. Index: Convert documents to embeddings and store them in a vector database
  2. Retrieve: Find relevant documents and use them as context for generation
User Query → Embed Query → Find Similar Documents → Generate Answer with Context

Basic RAG Flow

Here’s a complete RAG implementation:
import (
    "github.com/firebase/genkit/go/ai"
    "github.com/firebase/genkit/go/genkit"
    "github.com/firebase/genkit/go/plugins/googlegenai"
    "github.com/firebase/genkit/go/plugins/localvec"
)

func main() {
    ctx := context.Background()
    g := genkit.Init(ctx, genkit.WithPlugins(&googlegenai.GoogleAI{}))

    // Initialize embedder
    embedder := googlegenai.GoogleAIEmbedder(g, "embedding-001")
    
    // Initialize local vector store
    localvec.Init()
    docStore, retriever, _ := localvec.DefineRetriever(g, "simpleQa", 
        localvec.Config{Embedder: embedder}, 
        &ai.RetrieverOptions{Label: "simpleQa"})

    // Define RAG flow
    genkit.DefineFlow(g, "simpleQaFlow", 
        func(ctx context.Context, input *simpleQaInput) (string, error) {
            // 1. Index documents
            docs := []*ai.Document{
                ai.DocumentFromText("Paris is the capital of France", nil),
                ai.DocumentFromText("USA is the largest importer of coffee", nil),
                ai.DocumentFromText("Water exists in 3 states - solid, liquid and gas", nil),
            }
            localvec.Index(ctx, docs, docStore)

            // 2. Retrieve relevant documents
            dRequest := ai.DocumentFromText(input.Question, nil)
            response, _ := genkit.Retrieve(ctx, g,
                ai.WithRetriever(retriever),
                ai.WithDocs(dRequest),
                ai.WithConfig(&localvec.RetrieverOptions{K: 2}),
            )

            // 3. Build context from retrieved documents
            var context strings.Builder
            for _, d := range response.Documents {
                context.WriteString(d.Content[0].Text)
                context.WriteByte('\n')
            }

            // 4. Generate answer with context
            return genkit.GenerateText(ctx, g,
                ai.WithModelName("googleai/gemini-2.5-flash"),
                ai.WithPrompt("Answer the question based on this context:\n\nContext: %s\n\nQuestion: %s", 
                    context.String(), input.Question),
            )
        })
}

Embedders

Embedders convert text into vector representations. Genkit supports multiple embedder providers:
// Google AI embedder
embed := googlegenai.GoogleAIEmbedder(g, "embedding-001")

// Vertex AI embedder
embed := googlegenai.VertexAIEmbedder(g, "textembedding-gecko")

Vector Stores

Genkit provides plugins for various vector databases:

Local Vector Store (Development)

Perfect for development and testing:
import "github.com/firebase/genkit/go/plugins/localvec"

localvec.Init()
docStore, retriever, _ := localvec.DefineRetriever(g, "myIndex", 
    localvec.Config{Embedder: embedder},
    &ai.RetrieverOptions{Label: "myIndex"})

Pinecone

import { pineconeRetrieverRef, pineconeIndexerRef } from 'genkitx-pinecone';

const retriever = pineconeRetrieverRef({
  indexId: 'my-index',
  displayName: 'Pinecone retriever',
});

const indexer = pineconeIndexerRef({
  indexId: 'my-index',
  displayName: 'Pinecone indexer',
});

PostgreSQL (pgvector)

import "github.com/firebase/genkit/go/plugins/postgresql"

pg, _ := postgresql.New(ctx, cfg, embedder)
retriever := pg.Retriever("myRetriever")
indexer := pg.Indexer("myIndexer")

Weaviate

import "github.com/firebase/genkit/go/plugins/weaviate"

retriever, indexer := weaviate.DefineWeaviateRetriever(
    ctx, g, "myClass", weaviateConfig)

Indexing Documents

Add documents to your vector store:
docs := []*ai.Document{
    ai.DocumentFromText("The Godfather is a 1972 crime film.", nil),
    ai.DocumentFromText("The Matrix is a 1999 sci-fi film.", nil),
}

err := localvec.Index(ctx, docs, docStore)

Retrieving Documents

Find relevant documents based on a query:
dRequest := ai.DocumentFromText("What's a good sci-fi movie?", nil)
response, _ := genkit.Retrieve(ctx, g,
    ai.WithRetriever(retriever),
    ai.WithDocs(dRequest),
    ai.WithConfig(&localvec.RetrieverOptions{K: 3}),
)

for _, doc := range response.Documents {
    fmt.Println(doc.Content[0].Text)
}

Multimodal RAG

RAG also works with images and videos:
import { Document } from 'genkit';

// Index video with metadata
const videoDocs = [
  Document.fromMedia(
    'gs://cloud-samples-data/video/pixel8.mp4',
    'video/mp4',
    {
      videoSegmentConfig: {
        startOffsetSec: 0,
        endOffsetSec: 120,
        intervalSec: 15,
      },
    }
  ),
];

await ai.index({
  indexer: videoIndexer,
  documents: videoDocs,
});

// Retrieve and query video segments
const videoQA = ai.defineFlow(
  { name: 'videoQuestions', inputSchema: z.string() },
  async (query) => {
    const docs = await ai.retrieve({
      retriever: videoRetriever,
      query,
      options: { k: 1 },
    });

    // Use the video segment in the prompt
    const { text } = await ai.generate({
      model: googleAI.model('gemini-2.5-flash'),
      prompt: `Answer based on this video segment: ${query}`,
      media: docs[0].media,
    });
    
    return text;
  }
);

Document Metadata

Add metadata to documents for filtering and context:
doc := ai.DocumentFromText("Paris is the capital of France", map[string]any{
    "source": "geography_facts.txt",
    "category": "geography",
    "date": "2024-01-15",
})

Best Practices

Chunk Documents Appropriately

Break large documents into smaller, focused chunks:
  • Too small: Lacks context
  • Too large: Contains irrelevant information
  • Recommended: 200-500 words per chunk

Use Meaningful Metadata

Add metadata to help with filtering and ranking:
Go
metadata := map[string]any{
    "title": "Product Guide",
    "category": "electronics",
    "date": "2024-01-15",
    "version": "2.0",
}

Optimize Retrieval Parameters

Adjust k based on your use case:
  • Simple Q&A: k=1-3
  • Comprehensive answers: k=5-10
  • Research/summarization: k=10-20

Use System Prompts

Guide how the model uses retrieved context:
Go
ai.WithSystem("You are a helpful assistant. Answer questions based only on the provided context. If the context doesn't contain the answer, say 'I don't know'.")

Complete RAG Example

Here’s a production-ready RAG flow:
const qaPromptTemplate = `
You're a helpful agent that answers questions based on the context provided.

Here is the user's query: {{query}}

Here is the context you should use: {{context}}

Please provide the best answer you can.
`

type simpleQaInput struct {
    Question string `json:"question"`
}

type simpleQaPromptInput struct {
    Query   string `json:"query"`
    Context string `json:"context"`
}

qaPrompt := genkit.DefinePrompt(g, "simpleQaPrompt",
    ai.WithModelName("googleai/gemini-2.5-flash"),
    ai.WithPrompt(qaPromptTemplate),
    ai.WithInputType(simpleQaPromptInput{}),
)

genkit.DefineFlow(g, "simpleQaFlow", 
    func(ctx context.Context, input *simpleQaInput) (string, error) {
        // Retrieve documents
        dRequest := ai.DocumentFromText(input.Question, nil)
        response, _ := genkit.Retrieve(ctx, g,
            ai.WithRetriever(retriever),
            ai.WithDocs(dRequest),
            ai.WithConfig(&localvec.RetrieverOptions{K: 2}),
        )

        // Build context
        var context strings.Builder
        for _, d := range response.Documents {
            context.WriteString(d.Content[0].Text)
            context.WriteByte('\n')
        }

        // Generate answer
        resp, _ := qaPrompt.Execute(ctx, ai.WithInput(&simpleQaPromptInput{
            Query:   input.Question,
            Context: context.String(),
        }))
        
        return resp.Text(), nil
    })

Next Steps

  • Explore Multimodal for image and video RAG
  • Learn about Evaluation to test RAG quality
  • Check out Flows for production deployment

Build docs developers (and LLMs) love