Skip to main content
Build production-ready RAG pipelines using LlamaIndex’s Docling integration with document-native chunking and grounding.

Overview

This example demonstrates:
  • Using DoclingReader for document loading
  • Using DoclingNodeParser for document-native chunking
  • Vector storage with Milvus
  • Retrieval-augmented generation with HuggingFace models
  • Rich metadata including page numbers and bounding boxes

Installation

pip install llama-index-core llama-index-readers-docling \
    llama-index-node-parser-docling llama-index-embeddings-huggingface \
    llama-index-llms-huggingface-api llama-index-vector-stores-milvus

Configuration

import os
from pathlib import Path
from tempfile import mkdtemp
from warnings import filterwarnings

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

filterwarnings(action="ignore", category=UserWarning, module="pydantic")
filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

EMBED_MODEL = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
MILVUS_URI = str(Path(mkdtemp()) / "docling.db")
GEN_MODEL = HuggingFaceInferenceAPI(
    token=os.getenv("HF_TOKEN"),
    model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
)
SOURCE = "https://arxiv.org/pdf/2408.09869"  # Docling Technical Report
QUERY = "Which are the main AI models in Docling?"

embed_dim = len(EMBED_MODEL.get_text_embedding("hi"))

Using Markdown Export

1

Create Reader

Use DoclingReader with default Markdown export.
2

Parse Documents

Use standard MarkdownNodeParser for chunking.
3

Build Index

Create vector store index with embeddings.
4

Query

Ask questions and retrieve answers with sources.
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.node_parser import MarkdownNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore

# Create reader and parser
reader = DoclingReader()
node_parser = MarkdownNodeParser()

# Create vector store
vector_store = MilvusVectorStore(
    uri=MILVUS_URI,
    dim=embed_dim,
    overwrite=True,
)

# Build index
index = VectorStoreIndex.from_documents(
    documents=reader.load_data(SOURCE),
    transformations=[node_parser],
    storage_context=StorageContext.from_defaults(vector_store=vector_store),
    embed_model=EMBED_MODEL,
)

# Query
result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")

# Show sources
for n in result.source_nodes:
    print(f"\nSource: {n.text[:200]}...")
    print(f"Metadata: {n.metadata}")
from llama_index.node_parser.docling import DoclingNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core import StorageContext, VectorStoreIndex

# Create reader with JSON export
reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)
node_parser = DoclingNodeParser()

# Create vector store
vector_store = MilvusVectorStore(
    uri=MILVUS_URI,
    dim=embed_dim,
    overwrite=True,
)

# Build index
index = VectorStoreIndex.from_documents(
    documents=reader.load_data(SOURCE),
    transformations=[node_parser],
    storage_context=StorageContext.from_defaults(vector_store=vector_store),
    embed_model=EMBED_MODEL,
)

# Query
result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")

Document-Level Grounding

When using DoclingNodeParser, sources include rich metadata:
for n in result.source_nodes:
    print(f"Text: {n.text[:150]}...")
    
    # Access metadata
    meta = n.metadata
    
    # Document origin
    origin = meta.get('origin', {})
    print(f"File: {origin.get('filename')}")
    
    # Section headings
    headings = meta.get('headings', [])
    print(f"Section: {' / '.join(headings)}")
    
    # Page and bounding box
    doc_items = meta.get('doc_items', [])
    if doc_items:
        prov = doc_items[0].get('prov', [])
        if prov:
            bbox = prov[0].get('bbox', {})
            print(f"Page: {prov[0].get('page_no')}")
            print(f"Bbox: [l={bbox.get('l')}, t={bbox.get('t')}, "
                  f"r={bbox.get('r')}, b={bbox.get('b')}]")

With SimpleDirectoryReader

You can also use DoclingReader with LlamaIndex’s SimpleDirectoryReader:
from pathlib import Path
from tempfile import mkdtemp
import requests

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.node_parser.docling import DoclingNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore

# Download document to temp directory
tmp_dir_path = Path(mkdtemp())
r = requests.get(SOURCE)
with open(tmp_dir_path / "document.pdf", "wb") as out_file:
    out_file.write(r.content)

# Create reader and parser
reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)
node_parser = DoclingNodeParser()

# Use SimpleDirectoryReader
dir_reader = SimpleDirectoryReader(
    input_dir=tmp_dir_path,
    file_extractor={".pdf": reader},
)

# Build index
vector_store = MilvusVectorStore(
    uri=str(Path(mkdtemp()) / "docling.db"),
    dim=embed_dim,
    overwrite=True,
)

index = VectorStoreIndex.from_documents(
    documents=dir_reader.load_data(),
    transformations=[node_parser],
    storage_context=StorageContext.from_defaults(vector_store=vector_store),
    embed_model=EMBED_MODEL,
)

result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")

Complete Example

import os
from pathlib import Path
from tempfile import mkdtemp

from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.node_parser.docling import DoclingNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore

# Configuration
SOURCE = "https://arxiv.org/pdf/2408.09869"
QUERY = "Which are the main AI models in Docling?"

# Setup models
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
gen_model = HuggingFaceInferenceAPI(
    token=os.getenv("HF_TOKEN"),
    model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
)
embed_dim = len(embed_model.get_text_embedding("hi"))

# Load and parse documents
reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)
node_parser = DoclingNodeParser()

# Create vector store
vector_store = MilvusVectorStore(
    uri=str(Path(mkdtemp()) / "docling.db"),
    dim=embed_dim,
    overwrite=True,
)

# Build index
index = VectorStoreIndex.from_documents(
    documents=reader.load_data(SOURCE),
    transformations=[node_parser],
    storage_context=StorageContext.from_defaults(vector_store=vector_store),
    embed_model=embed_model,
)

# Query
result = index.as_query_engine(llm=gen_model).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")
print("\nSources:")
for i, n in enumerate(result.source_nodes, 1):
    print(f"{i}. {n.text[:150]}...")
    print(f"   Page {n.metadata.get('doc_items', [{}])[0].get('prov', [{}])[0].get('page_no', 'N/A')}")

Export Types

  • DoclingReader.ExportType.JSON: Exports to Docling’s native format with rich metadata (recommended for grounding)
  • Default (Markdown): Exports to Markdown format for simple use cases

Tech Stack

ComponentTechnologyExecution
EmbeddingHugging Face / Sentence TransformersLocal
Vector storeMilvusLocal
Gen AIHugging Face Inference APIRemote
For best conversion speed, use GPU acceleration when available.

Build docs developers (and LLMs) love