Build production-ready RAG pipelines using LlamaIndex’s Docling integration with document-native chunking and grounding.
Overview
This example demonstrates:
- Using
DoclingReader for document loading
- Using
DoclingNodeParser for document-native chunking
- Vector storage with Milvus
- Retrieval-augmented generation with HuggingFace models
- Rich metadata including page numbers and bounding boxes
Installation
pip install llama-index-core llama-index-readers-docling \
llama-index-node-parser-docling llama-index-embeddings-huggingface \
llama-index-llms-huggingface-api llama-index-vector-stores-milvus
Configuration
import os
from pathlib import Path
from tempfile import mkdtemp
from warnings import filterwarnings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
filterwarnings(action="ignore", category=UserWarning, module="pydantic")
filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
EMBED_MODEL = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
MILVUS_URI = str(Path(mkdtemp()) / "docling.db")
GEN_MODEL = HuggingFaceInferenceAPI(
token=os.getenv("HF_TOKEN"),
model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
)
SOURCE = "https://arxiv.org/pdf/2408.09869" # Docling Technical Report
QUERY = "Which are the main AI models in Docling?"
embed_dim = len(EMBED_MODEL.get_text_embedding("hi"))
Using Markdown Export
Create Reader
Use DoclingReader with default Markdown export.
Parse Documents
Use standard MarkdownNodeParser for chunking.
Build Index
Create vector store index with embeddings.
Query
Ask questions and retrieve answers with sources.
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.node_parser import MarkdownNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
# Create reader and parser
reader = DoclingReader()
node_parser = MarkdownNodeParser()
# Create vector store
vector_store = MilvusVectorStore(
uri=MILVUS_URI,
dim=embed_dim,
overwrite=True,
)
# Build index
index = VectorStoreIndex.from_documents(
documents=reader.load_data(SOURCE),
transformations=[node_parser],
storage_context=StorageContext.from_defaults(vector_store=vector_store),
embed_model=EMBED_MODEL,
)
# Query
result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")
# Show sources
for n in result.source_nodes:
print(f"\nSource: {n.text[:200]}...")
print(f"Metadata: {n.metadata}")
from llama_index.node_parser.docling import DoclingNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core import StorageContext, VectorStoreIndex
# Create reader with JSON export
reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)
node_parser = DoclingNodeParser()
# Create vector store
vector_store = MilvusVectorStore(
uri=MILVUS_URI,
dim=embed_dim,
overwrite=True,
)
# Build index
index = VectorStoreIndex.from_documents(
documents=reader.load_data(SOURCE),
transformations=[node_parser],
storage_context=StorageContext.from_defaults(vector_store=vector_store),
embed_model=EMBED_MODEL,
)
# Query
result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")
Document-Level Grounding
When using DoclingNodeParser, sources include rich metadata:
for n in result.source_nodes:
print(f"Text: {n.text[:150]}...")
# Access metadata
meta = n.metadata
# Document origin
origin = meta.get('origin', {})
print(f"File: {origin.get('filename')}")
# Section headings
headings = meta.get('headings', [])
print(f"Section: {' / '.join(headings)}")
# Page and bounding box
doc_items = meta.get('doc_items', [])
if doc_items:
prov = doc_items[0].get('prov', [])
if prov:
bbox = prov[0].get('bbox', {})
print(f"Page: {prov[0].get('page_no')}")
print(f"Bbox: [l={bbox.get('l')}, t={bbox.get('t')}, "
f"r={bbox.get('r')}, b={bbox.get('b')}]")
With SimpleDirectoryReader
You can also use DoclingReader with LlamaIndex’s SimpleDirectoryReader:
from pathlib import Path
from tempfile import mkdtemp
import requests
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.node_parser.docling import DoclingNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
# Download document to temp directory
tmp_dir_path = Path(mkdtemp())
r = requests.get(SOURCE)
with open(tmp_dir_path / "document.pdf", "wb") as out_file:
out_file.write(r.content)
# Create reader and parser
reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)
node_parser = DoclingNodeParser()
# Use SimpleDirectoryReader
dir_reader = SimpleDirectoryReader(
input_dir=tmp_dir_path,
file_extractor={".pdf": reader},
)
# Build index
vector_store = MilvusVectorStore(
uri=str(Path(mkdtemp()) / "docling.db"),
dim=embed_dim,
overwrite=True,
)
index = VectorStoreIndex.from_documents(
documents=dir_reader.load_data(),
transformations=[node_parser],
storage_context=StorageContext.from_defaults(vector_store=vector_store),
embed_model=EMBED_MODEL,
)
result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")
Complete Example
import os
from pathlib import Path
from tempfile import mkdtemp
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.node_parser.docling import DoclingNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
# Configuration
SOURCE = "https://arxiv.org/pdf/2408.09869"
QUERY = "Which are the main AI models in Docling?"
# Setup models
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
gen_model = HuggingFaceInferenceAPI(
token=os.getenv("HF_TOKEN"),
model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
)
embed_dim = len(embed_model.get_text_embedding("hi"))
# Load and parse documents
reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)
node_parser = DoclingNodeParser()
# Create vector store
vector_store = MilvusVectorStore(
uri=str(Path(mkdtemp()) / "docling.db"),
dim=embed_dim,
overwrite=True,
)
# Build index
index = VectorStoreIndex.from_documents(
documents=reader.load_data(SOURCE),
transformations=[node_parser],
storage_context=StorageContext.from_defaults(vector_store=vector_store),
embed_model=embed_model,
)
# Query
result = index.as_query_engine(llm=gen_model).query(QUERY)
print(f"Q: {QUERY}")
print(f"A: {result.response.strip()}")
print("\nSources:")
for i, n in enumerate(result.source_nodes, 1):
print(f"{i}. {n.text[:150]}...")
print(f" Page {n.metadata.get('doc_items', [{}])[0].get('prov', [{}])[0].get('page_no', 'N/A')}")
Export Types
DoclingReader.ExportType.JSON: Exports to Docling’s native format with rich metadata (recommended for grounding)
- Default (Markdown): Exports to Markdown format for simple use cases
Tech Stack
| Component | Technology | Execution |
|---|
| Embedding | Hugging Face / Sentence Transformers | Local |
| Vector store | Milvus | Local |
| Gen AI | Hugging Face Inference API | Remote |
For best conversion speed, use GPU acceleration when available.