RAG with Haystack

Build production-ready RAG pipelines using Haystack’s Docling extension with document-native chunking and grounding.

Overview

This example demonstrates:

Using DoclingConverter for document conversion
Document chunking with HybridChunker
Vector storage with Milvus
Building indexing and RAG pipelines
Document-level grounding with page numbers and bounding boxes

Installation

pip install docling-haystack haystack-ai docling \
    "pymilvus[milvus-lite]" milvus-haystack sentence-transformers

Configuration

import os
from pathlib import Path
from tempfile import mkdtemp

from docling_haystack.converter import ExportType

HF_TOKEN = os.getenv("HF_TOKEN")
PATHS = ["https://arxiv.org/pdf/2408.09869"]  # Docling Technical Report
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
GENERATION_MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
EXPORT_TYPE = ExportType.DOC_CHUNKS  # or ExportType.MARKDOWN
QUESTION = "Which are the main AI models in Docling?"
TOP_K = 3
MILVUS_URI = str(Path(mkdtemp()) / "docling.db")

Indexing Pipeline

Create Document Store

Initialize Milvus document store for vector storage.

Build Pipeline

Assemble converter, embedder, and writer components.

Connect Components

Wire up the pipeline based on export type.

Run Indexing

Process documents and store in vector database.

from docling_haystack.converter import DoclingConverter
from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from milvus_haystack import MilvusDocumentStore

from docling.chunking import HybridChunker

# Create document store
document_store = MilvusDocumentStore(
    connection_args={"uri": MILVUS_URI},
    drop_old=True,
    text_field="txt",  # prevents conflict with metadata field
)

# Build indexing pipeline
idx_pipe = Pipeline()
idx_pipe.add_component(
    "converter",
    DoclingConverter(
        export_type=EXPORT_TYPE,
        chunker=HybridChunker(tokenizer=EMBED_MODEL_ID),
    ),
)
idx_pipe.add_component(
    "embedder",
    SentenceTransformersDocumentEmbedder(model=EMBED_MODEL_ID),
)
idx_pipe.add_component(
    "writer",
    DocumentWriter(document_store=document_store)
)

# Connect components based on export type
if EXPORT_TYPE == ExportType.DOC_CHUNKS:
    idx_pipe.connect("converter", "embedder")
elif EXPORT_TYPE == ExportType.MARKDOWN:
    idx_pipe.add_component(
        "splitter",
        DocumentSplitter(split_by="sentence", split_length=1),
    )
    idx_pipe.connect("converter.documents", "splitter.documents")
    idx_pipe.connect("splitter.documents", "embedder.documents")

idx_pipe.connect("embedder", "writer")

# Run indexing
idx_pipe.run({"converter": {"paths": PATHS}})

RAG Pipeline

from haystack.components.builders import AnswerBuilder, PromptBuilder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.generators import HuggingFaceAPIGenerator
from haystack.utils import Secret
from milvus_haystack import MilvusEmbeddingRetriever

prompt_template = """
    Given these documents, answer the question.
    Documents:
    {% for doc in documents %}
        {{ doc.content }}
    {% endfor %}
    Question: {{query}}
    Answer:
    """

rag_pipe = Pipeline()
rag_pipe.add_component(
    "embedder",
    SentenceTransformersTextEmbedder(model=EMBED_MODEL_ID),
)
rag_pipe.add_component(
    "retriever",
    MilvusEmbeddingRetriever(document_store=document_store, top_k=TOP_K),
)
rag_pipe.add_component(
    "prompt_builder",
    PromptBuilder(template=prompt_template)
)
rag_pipe.add_component(
    "llm",
    HuggingFaceAPIGenerator(
        api_type="serverless_inference_api",
        api_params={"model": GENERATION_MODEL_ID},
        token=Secret.from_token(HF_TOKEN) if HF_TOKEN else None,
    ),
)
rag_pipe.add_component("answer_builder", AnswerBuilder())

# Connect components
rag_pipe.connect("embedder.embedding", "retriever")
rag_pipe.connect("retriever", "prompt_builder.documents")
rag_pipe.connect("prompt_builder", "llm")
rag_pipe.connect("llm.replies", "answer_builder.replies")
rag_pipe.connect("llm.meta", "answer_builder.meta")
rag_pipe.connect("retriever", "answer_builder.documents")

# Query
rag_res = rag_pipe.run(
    {
        "embedder": {"text": QUESTION},
        "prompt_builder": {"query": QUESTION},
        "answer_builder": {"query": QUESTION},
    }
)

Display Results

from docling.chunking import DocChunk

print(f"Question: {QUESTION}\n")
print(f"Answer: {rag_res['answer_builder']['answers'][0].data.strip()}\n")
print("Sources:")

sources = rag_res["answer_builder"]["answers"][0].documents
for source in sources:
    if EXPORT_TYPE == ExportType.DOC_CHUNKS:
        doc_chunk = DocChunk.model_validate(source.meta["dl_meta"])
        print(f"- Text: {doc_chunk.text[:150]}...")
        
        if doc_chunk.meta.origin:
            print(f"  File: {doc_chunk.meta.origin.filename}")
        
        if doc_chunk.meta.headings:
            print(f"  Section: {' / '.join(doc_chunk.meta.headings)}")
        
        bbox = doc_chunk.meta.doc_items[0].prov[0].bbox
        print(f"  Page: {doc_chunk.meta.doc_items[0].prov[0].page_no}")
        print(f"  Bbox: [{int(bbox.l)}, {int(bbox.t)}, {int(bbox.r)}, {int(bbox.b)}]")
    
    elif EXPORT_TYPE == ExportType.MARKDOWN:
        print(f"- {source.content[:200]}...")

Document-Level Grounding

When using ExportType.DOC_CHUNKS, each source includes:

File name: Original document filename
Section headings: Hierarchical section path
Page number: Exact page where content appears
Bounding box: Coordinates on the page (left, top, right, bottom)

Complete Example

import os
from pathlib import Path
from tempfile import mkdtemp

from docling.chunking import DocChunk, HybridChunker
from docling_haystack.converter import DoclingConverter, ExportType
from haystack import Pipeline
from haystack.components.builders import AnswerBuilder, PromptBuilder
from haystack.components.embedders import (
    SentenceTransformersDocumentEmbedder,
    SentenceTransformersTextEmbedder,
)
from haystack.components.generators import HuggingFaceAPIGenerator
from haystack.components.writers import DocumentWriter
from haystack.utils import Secret
from milvus_haystack import MilvusDocumentStore, MilvusEmbeddingRetriever

# Configuration
HF_TOKEN = os.getenv("HF_TOKEN")
PATHS = ["https://arxiv.org/pdf/2408.09869"]
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
GENERATION_MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
QUESTION = "Which are the main AI models in Docling?"
TOP_K = 3

# Indexing
document_store = MilvusDocumentStore(
    connection_args={"uri": str(Path(mkdtemp()) / "docling.db")},
    drop_old=True,
    text_field="txt",
)

idx_pipe = Pipeline()
idx_pipe.add_component(
    "converter",
    DoclingConverter(
        export_type=ExportType.DOC_CHUNKS,
        chunker=HybridChunker(tokenizer=EMBED_MODEL_ID),
    ),
)
idx_pipe.add_component(
    "embedder",
    SentenceTransformersDocumentEmbedder(model=EMBED_MODEL_ID),
)
idx_pipe.add_component("writer", DocumentWriter(document_store=document_store))
idx_pipe.connect("converter", "embedder")
idx_pipe.connect("embedder", "writer")
idx_pipe.run({"converter": {"paths": PATHS}})

# RAG
rag_pipe = Pipeline()
rag_pipe.add_component(
    "embedder", SentenceTransformersTextEmbedder(model=EMBED_MODEL_ID)
)
rag_pipe.add_component(
    "retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=TOP_K)
)
rag_pipe.add_component(
    "prompt_builder",
    PromptBuilder(template="Context: {% for doc in documents %}{{doc.content}}{% endfor %}\nQuestion: {{query}}\nAnswer:")
)
rag_pipe.add_component(
    "llm",
    HuggingFaceAPIGenerator(
        api_type="serverless_inference_api",
        api_params={"model": GENERATION_MODEL_ID},
        token=Secret.from_token(HF_TOKEN) if HF_TOKEN else None,
    ),
)
rag_pipe.add_component("answer_builder", AnswerBuilder())
rag_pipe.connect("embedder.embedding", "retriever")
rag_pipe.connect("retriever", "prompt_builder.documents")
rag_pipe.connect("prompt_builder", "llm")
rag_pipe.connect("llm.replies", "answer_builder.replies")
rag_pipe.connect("llm.meta", "answer_builder.meta")
rag_pipe.connect("retriever", "answer_builder.documents")

result = rag_pipe.run(
    {
        "embedder": {"text": QUESTION},
        "prompt_builder": {"query": QUESTION},
        "answer_builder": {"query": QUESTION},
    }
)

print(f"Q: {QUESTION}")
print(f"A: {result['answer_builder']['answers'][0].data.strip()}")

Export Types

ExportType.DOC_CHUNKS (recommended): Document-native chunking with rich metadata
ExportType.MARKDOWN: Markdown export with standard text splitting

Tech Stack

Component	Technology	Execution
Embedding	Hugging Face / Sentence Transformers	Local
Vector store	Milvus	Local
Gen AI	Hugging Face Inference API	Remote

For best conversion speed, use GPU acceleration when available.

Conversion

Advanced Processing

RAG & AI Workflows

RAG with Haystack

Overview

Installation

Configuration

Indexing Pipeline

RAG Pipeline

Display Results

Document-Level Grounding

Complete Example

Export Types

Tech Stack

Build docs developers (and LLMs) love

Conversion

Advanced Processing

RAG & AI Workflows

​Overview

​Installation

​Configuration

​Indexing Pipeline

​RAG Pipeline

​Display Results

​Document-Level Grounding

​Complete Example

​Export Types

​Tech Stack

Build docs developers (and LLMs) love

Overview

Installation

Configuration

Indexing Pipeline

RAG Pipeline

Display Results

Document-Level Grounding

Complete Example

Export Types

Tech Stack