Build production-ready RAG pipelines using LangChain’s Docling integration with document-native chunking.
Overview
This example demonstrates:
Using DoclingLoader with LangChain
Document chunking with HybridChunker
Vector storage with Milvus
Retrieval-augmented generation with HuggingFace models
Document-native grounding with metadata
Quick Start
Install Dependencies
Install LangChain with Docling integration and required packages.
Load Documents
Use DoclingLoader to convert and chunk documents.
Create Vector Store
Embed and store document chunks in Milvus.
Build RAG Chain
Create a retrieval-augmented generation pipeline.
Installation
pip install langchain-docling langchain-core langchain-huggingface \
langchain_milvus langchain python-dotenv
Configuration
import os
from pathlib import Path
from tempfile import mkdtemp
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_docling.loader import ExportType
load_dotenv()
# Configuration
HF_TOKEN = os.getenv( "HF_TOKEN" ) # Optional, for increased quota
FILE_PATH = [ "https://arxiv.org/pdf/2408.09869" ] # Docling Technical Report
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
GEN_MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
EXPORT_TYPE = ExportType. DOC_CHUNKS # or ExportType.MARKDOWN
QUESTION = "Which are the main AI models in Docling?"
TOP_K = 3
MILVUS_URI = str (Path(mkdtemp()) / "docling.db" )
PROMPT = PromptTemplate.from_template(
"Context information is below. \n "
"--------------------- \n {context} \n --------------------- \n "
"Given the context information and not prior knowledge, "
"answer the query. \n Query: {input} \n Answer: \n "
)
Document Loading
With Doc Chunks (Recommended)
With Markdown Export
from langchain_docling import DoclingLoader
from docling.chunking import HybridChunker
# Load and chunk documents
loader = DoclingLoader(
file_path = FILE_PATH ,
export_type = ExportType. DOC_CHUNKS ,
chunker = HybridChunker( tokenizer = EMBED_MODEL_ID ),
)
docs = loader.load()
splits = docs # Already chunked
Vector Store and Embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_milvus import Milvus
# Create embeddings
embedding = HuggingFaceEmbeddings( model_name = EMBED_MODEL_ID )
# Create vector store
vectorstore = Milvus.from_documents(
documents = splits,
embedding = embedding,
collection_name = "docling_demo" ,
connection_args = { "uri" : MILVUS_URI },
index_params = { "index_type" : "FLAT" },
drop_old = True ,
)
RAG Pipeline
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_huggingface import HuggingFaceEndpoint
# Create retriever
retriever = vectorstore.as_retriever( search_kwargs = { "k" : TOP_K })
# Create LLM
llm = HuggingFaceEndpoint(
repo_id = GEN_MODEL_ID ,
huggingfacehub_api_token = HF_TOKEN ,
)
# Create RAG chain
question_answer_chain = create_stuff_documents_chain(llm, PROMPT )
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
# Query
resp_dict = rag_chain.invoke({ "input" : QUESTION })
print ( f "Question: { resp_dict[ 'input' ] } " )
print ( f "Answer: { resp_dict[ 'answer' ][: 200 ] } ..." )
# Show sources
for i, doc in enumerate (resp_dict[ "context" ]):
print ( f " \n Source { i + 1 } :" )
print ( f " Text: { doc.page_content[: 200 ] } ..." )
print ( f " Metadata: { doc.metadata.get( 'dl_meta' , {}).get( 'headings' , []) } " )
Document-Native Grounding
When using ExportType.DOC_CHUNKS, sources include rich metadata:
# Access chunk metadata
for doc in resp_dict[ "context" ]:
dl_meta = doc.metadata.get( 'dl_meta' , {})
# Document origin
origin = dl_meta.get( 'origin' , {})
print ( f "File: { origin.get( 'filename' ) } " )
# Section headings
headings = dl_meta.get( 'headings' , [])
print ( f "Section: { ' > ' .join(headings) } " )
# Document items with provenance
doc_items = dl_meta.get( 'doc_items' , [])
for item in doc_items:
prov = item.get( 'prov' , [])
if prov:
print ( f "Page: { prov[ 0 ].get( 'page_no' ) } " )
bbox = prov[ 0 ].get( 'bbox' , {})
print ( f "Bbox: [ { bbox.get( 'l' ) } , { bbox.get( 't' ) } , "
f " { bbox.get( 'r' ) } , { bbox.get( 'b' ) } ]" )
Complete Example
import os
from pathlib import Path
from tempfile import mkdtemp
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import PromptTemplate
from langchain_docling import DoclingLoader
from langchain_docling.loader import ExportType
from langchain_huggingface import HuggingFaceEndpoint
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_milvus import Milvus
from docling.chunking import HybridChunker
# Configuration
HF_TOKEN = os.getenv( "HF_TOKEN" )
FILE_PATH = [ "https://arxiv.org/pdf/2408.09869" ]
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
GEN_MODEL_ID = "mistralai/Mixtral-8x7B-Instruct-v0.1"
QUESTION = "Which are the main AI models in Docling?"
TOP_K = 3
# Load documents with chunking
loader = DoclingLoader(
file_path = FILE_PATH ,
export_type = ExportType. DOC_CHUNKS ,
chunker = HybridChunker( tokenizer = EMBED_MODEL_ID ),
)
docs = loader.load()
# Create vector store
embedding = HuggingFaceEmbeddings( model_name = EMBED_MODEL_ID )
vectorstore = Milvus.from_documents(
documents = docs,
embedding = embedding,
collection_name = "docling_demo" ,
connection_args = { "uri" : str (Path(mkdtemp()) / "docling.db" )},
index_params = { "index_type" : "FLAT" },
drop_old = True ,
)
# Create RAG chain
retriever = vectorstore.as_retriever( search_kwargs = { "k" : TOP_K })
llm = HuggingFaceEndpoint(
repo_id = GEN_MODEL_ID ,
huggingfacehub_api_token = HF_TOKEN ,
)
prompt = PromptTemplate.from_template(
"Context: {context} \n Question: {input} \n Answer:"
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
# Query
result = rag_chain.invoke({ "input" : QUESTION })
print ( f "Q: { QUESTION } " )
print ( f "A: { result[ 'answer' ] } " )
Export Types
ExportType.DOC_CHUNKS (recommended): Each chunk is a separate LangChain document with rich metadata including page numbers, bounding boxes, and section headings
ExportType.MARKDOWN : Each input document becomes a single LangChain document in Markdown format
Tech Stack
Component Technology Execution Embedding Hugging Face / Sentence Transformers Local Vector store Milvus Local Gen AI Hugging Face Inference API Remote
For best conversion speed, use GPU acceleration when available (e.g., Colab GPU runtime).