Qdrant is a high-performance vector database optimized for production RAG pipelines. It features scalar and binary quantization for memory efficiency, payload-based multi-tenancy with tenant optimization, and Maximal Marginal Relevance (MMR) for diverse results.
Key features
Quantization : Scalar (4x) or binary (32x) memory reduction
MMR diversity : Balance relevance with result diversity
Tenant optimization : Efficient filtering for high-cardinality tenants (Qdrant 1.16+)
Named vectors : Multiple vector spaces per collection
gRPC protocol : Lower latency than HTTP
Hybrid search : RRF fusion of dense and sparse vectors
Installation
pip install qdrant-client
Connection
Qdrant Cloud
from vectordb.databases.qdrant import QdrantVectorDB
db = QdrantVectorDB(
config = {
"qdrant" : {
"url" : "https://cloud.qdrant.io" ,
"api_key" : "your-api-key" ,
"collection_name" : "documents"
}
}
)
Self-hosted Qdrant
db = QdrantVectorDB(
config = {
"qdrant" : {
"url" : "http://localhost:6333" ,
"collection_name" : "my_collection" ,
"prefer_grpc" : True # Use gRPC for better performance
}
}
)
From config file
db = QdrantVectorDB( config_path = "configs/qdrant.yaml" )
qdrant :
url : ${QDRANT_URL}
api_key : ${QDRANT_API_KEY}
collection_name : "haystack_collection"
timeout : 60.0
prefer_grpc : true
dense_vector_name : "dense"
sparse_vector_name : "sparse"
quantization :
type : "scalar" # or "binary"
quantile : 0.99
always_ram : true
Collection creation
Basic collection
db.create_collection(
dimension = 768 ,
distance = "Cosine" # or "Euclidean", "Dot"
)
With sparse vectors for hybrid search
db.create_collection(
dimension = 768 ,
use_sparse = True , # Enables named vectors
distance = "Cosine"
)
With scalar quantization
Reduces memory usage by 4x with minimal accuracy loss:
db = QdrantVectorDB(
config = {
"qdrant" : {
"url" : "http://localhost:6333" ,
"collection_name" : "quantized_docs" ,
"quantization" : {
"type" : "scalar" ,
"quantile" : 0.99 , # Keep 99th percentile precision
"always_ram" : True # Keep quantized vectors in RAM
}
}
}
)
db.create_collection( dimension = 768 )
With binary quantization
Reduces memory usage by 32x (best for high-dimensional vectors):
db = QdrantVectorDB(
config = {
"qdrant" : {
"collection_name" : "binary_docs" ,
"quantization" : {
"type" : "binary" ,
"always_ram" : True
}
}
}
)
db.create_collection( dimension = 768 )
Recreate collection
db.create_collection(
dimension = 768 ,
recreate = True # Deletes existing collection first
)
Payload indexing
Basic payload index
db.create_payload_index(
field_name = "category" ,
field_schema = "keyword" # or "text", "integer", "float", "bool"
)
Tenant optimization index
Critical for multi-tenant performance (Qdrant 1.16+):
db.create_namespace_index(
namespace_field = "tenant_id"
)
# Or manually:
db.create_payload_index(
field_name = "tenant_id" ,
field_schema = "keyword" ,
is_tenant = True # Enables tenant optimization
)
The is_tenant=True flag enables specialized indexing for high-cardinality tenant filtering, dramatically improving query performance in multi-tenant scenarios.
Indexing documents
From Haystack documents
from haystack import Document
documents = [
Document(
content = "Qdrant supports quantization" ,
embedding = [ 0.1 , 0.2 , ... ],
meta = { "category" : "database" , "priority" : 1 }
)
]
db.index_documents(
documents = documents,
scope = "tenant_1" ,
batch_size = 100
)
With sparse embeddings
from haystack.dataclasses import SparseEmbedding
doc = Document(
content = "Hybrid search example" ,
embedding = [ 0.1 , 0.2 , ... ], # Dense
sparse_embedding = SparseEmbedding(
indices = [ 100 , 250 , 500 ],
values = [ 0.5 , 0.3 , 0.2 ]
)
)
db.index_documents([doc], scope = "tenant_1" )
Searching
Dense vector search
results = db.search(
query_vector = [ 0.1 , 0.2 , ... ],
top_k = 10 ,
scope = "tenant_1" ,
include_vectors = False
)
for doc in results:
print ( f "Score: { doc.score } , Content: { doc.content } " )
Hybrid search with RRF
results = db.search(
query_vector = {
"dense" : dense_embedding,
"sparse" : sparse_embedding
},
search_type = "hybrid" ,
top_k = 10 ,
scope = "tenant_1"
)
MMR for diverse results
Maximal Marginal Relevance balances relevance with diversity:
results = db.search(
query_vector = embedding,
search_type = "mmr" ,
mmr_diversity = 0.5 , # 0.0 = pure relevance, 1.0 = pure diversity
top_k = 10
)
MMR is ideal for:
Summarization tasks (avoid redundant content)
Exploratory search (cover multiple aspects)
Recommendation diversity
# Simple equality
results = db.search(
query_vector = vec,
filters = { "category" : "technology" },
top_k = 10
)
# Range queries
results = db.search(
query_vector = vec,
filters = { "priority" : { "$gte" : 5 }},
top_k = 10
)
# Multiple conditions
results = db.search(
query_vector = vec,
filters = {
"category" : "tech" ,
"score" : { "$gt" : 0.8 }
}
)
# Set membership
results = db.search(
query_vector = vec,
filters = {
"status" : { "$in" : [ "active" , "pending" ]}
}
)
# Negation
results = db.search(
query_vector = vec,
filters = {
"status" : { "$ne" : "archived" }
}
)
Supported filter operators
$eq: Equal to
$ne: Not equal to
$gt: Greater than
$gte: Greater than or equal
$lt: Less than
$lte: Less than or equal
$in: Value in list
$nin: Value not in list
Multi-tenancy
Setup tenant isolation
# Create collection
db.create_collection( dimension = 768 )
# Create tenant index (critical for performance)
db.create_namespace_index( namespace_field = "tenant_id" )
# Index with tenant
db.index_documents(documents, scope = "tenant_1" )
# Search within tenant
results = db.search(
query_vector = vec,
scope = "tenant_1" ,
top_k = 10
)
Qdrant’s payload-based filtering allows flexible tenant strategies:
# Small tenants: Use payload filters
db.index_documents(small_tenant_docs, scope = "small_tenant_123" )
# Large tenants: Promote to dedicated collection
db_large = QdrantVectorDB( config = { "qdrant" : { "collection_name" : "tenant_large" }})
db_large.create_collection( dimension = 768 )
db_large.index_documents(large_tenant_docs)
Deleting documents
Delete by tenant
db.delete_documents( scope = "tenant_old" )
Delete by filter
db.delete_documents(
scope = "tenant_1" ,
filters = { "status" : "archived" }
)
Always provide a scope or filter to avoid accidentally deleting all documents.
Building filters
# Build filter programmatically
filter_obj = db.build_filter({
"category" : "tech" ,
"priority" : { "$gt" : 5 }
})
# Use in custom queries
from qdrant_client import QdrantClient
client = db.client
results = client.search(
collection_name = db.collection_name,
query_vector = vec,
query_filter = filter_obj,
limit = 10
)
Advanced features
Named vectors
Store multiple vector types in one collection:
# Configured automatically with use_sparse=True
db.create_collection( dimension = 768 , use_sparse = True )
# Named vectors: "dense" and "sparse"
db.index_documents(docs_with_both_embeddings)
# Search using specific vector
results = db.search(
query_vector = {db.dense_vector_name: embedding},
top_k = 10
)
Custom vector names
db = QdrantVectorDB(
config = {
"qdrant" : {
"collection_name" : "custom" ,
"dense_vector_name" : "semantic" ,
"sparse_vector_name" : "lexical"
}
}
)
db.create_collection( dimension = 768 , use_sparse = True )
Best practices
Choose the right quantization
Balance memory reduction with accuracy requirements: # Scalar quantization: 4x reduction, <1% accuracy loss
config = {
"quantization" : {
"type" : "scalar" ,
"quantile" : 0.99 , # Higher = better accuracy
"always_ram" : True
}
}
# Binary quantization: 32x reduction, ~5% accuracy loss
# Best for: High dimensions (>512), large datasets, memory-critical
config = {
"quantization" : {
"type" : "binary" ,
"always_ram" : True
}
}
Always create a tenant index before multi-tenant workloads: # MUST do this for good performance
db.create_namespace_index()
# Then index and search
db.index_documents(docs, scope = "tenant_1" )
db.search(vec, scope = "tenant_1" )
gRPC provides better throughput than HTTP: db = QdrantVectorDB(
config = {
"qdrant" : {
"url" : "http://localhost:6333" ,
"prefer_grpc" : True # Default
}
}
)
Adjust lambda based on use case: # High relevance (lambda=0.8-1.0)
# Use for: Precise question answering
results = db.search(vec, search_type = "mmr" , mmr_diversity = 0.9 )
# Balanced (lambda=0.4-0.6)
# Use for: General search
results = db.search(vec, search_type = "mmr" , mmr_diversity = 0.5 )
# High diversity (lambda=0.0-0.2)
# Use for: Summarization, exploratory search
results = db.search(vec, search_type = "mmr" , mmr_diversity = 0.2 )
Error handling
try :
db.create_collection( dimension = 768 )
db.index_documents(documents)
except ValueError as e:
print ( f "Configuration error: { e } " )
except ConnectionError as e:
print ( f "Qdrant connection failed: { e } " )
except Exception as e:
print ( f "Unexpected error: { e } " )
Source reference
Implementation: src/vectordb/databases/qdrant.py
Key classes and methods:
QdrantVectorDB.__init__(): src/vectordb/databases/qdrant.py:114
create_collection(): src/vectordb/databases/qdrant.py:185
create_payload_index(): src/vectordb/databases/qdrant.py:288
create_namespace_index(): src/vectordb/databases/qdrant.py:347
index_documents(): src/vectordb/databases/qdrant.py:390
search(): src/vectordb/databases/qdrant.py:467
_mmr_rerank(): src/vectordb/databases/qdrant.py:662