Skip to main content
Qdrant is a high-performance vector database optimized for production RAG pipelines. It features scalar and binary quantization for memory efficiency, payload-based multi-tenancy with tenant optimization, and Maximal Marginal Relevance (MMR) for diverse results.

Key features

  • Quantization: Scalar (4x) or binary (32x) memory reduction
  • MMR diversity: Balance relevance with result diversity
  • Tenant optimization: Efficient filtering for high-cardinality tenants (Qdrant 1.16+)
  • Named vectors: Multiple vector spaces per collection
  • gRPC protocol: Lower latency than HTTP
  • Hybrid search: RRF fusion of dense and sparse vectors

Installation

pip install qdrant-client

Connection

Qdrant Cloud

from vectordb.databases.qdrant import QdrantVectorDB

db = QdrantVectorDB(
    config={
        "qdrant": {
            "url": "https://cloud.qdrant.io",
            "api_key": "your-api-key",
            "collection_name": "documents"
        }
    }
)

Self-hosted Qdrant

db = QdrantVectorDB(
    config={
        "qdrant": {
            "url": "http://localhost:6333",
            "collection_name": "my_collection",
            "prefer_grpc": True  # Use gRPC for better performance
        }
    }
)

From config file

db = QdrantVectorDB(config_path="configs/qdrant.yaml")
configs/qdrant.yaml
qdrant:
  url: ${QDRANT_URL}
  api_key: ${QDRANT_API_KEY}
  collection_name: "haystack_collection"
  timeout: 60.0
  prefer_grpc: true
  dense_vector_name: "dense"
  sparse_vector_name: "sparse"
  quantization:
    type: "scalar"  # or "binary"
    quantile: 0.99
    always_ram: true

Collection creation

Basic collection

db.create_collection(
    dimension=768,
    distance="Cosine"  # or "Euclidean", "Dot"
)
db.create_collection(
    dimension=768,
    use_sparse=True,  # Enables named vectors
    distance="Cosine"
)

With scalar quantization

Reduces memory usage by 4x with minimal accuracy loss:
db = QdrantVectorDB(
    config={
        "qdrant": {
            "url": "http://localhost:6333",
            "collection_name": "quantized_docs",
            "quantization": {
                "type": "scalar",
                "quantile": 0.99,  # Keep 99th percentile precision
                "always_ram": True  # Keep quantized vectors in RAM
            }
        }
    }
)

db.create_collection(dimension=768)

With binary quantization

Reduces memory usage by 32x (best for high-dimensional vectors):
db = QdrantVectorDB(
    config={
        "qdrant": {
            "collection_name": "binary_docs",
            "quantization": {
                "type": "binary",
                "always_ram": True
            }
        }
    }
)

db.create_collection(dimension=768)

Recreate collection

db.create_collection(
    dimension=768,
    recreate=True  # Deletes existing collection first
)

Payload indexing

Basic payload index

db.create_payload_index(
    field_name="category",
    field_schema="keyword"  # or "text", "integer", "float", "bool"
)

Tenant optimization index

Critical for multi-tenant performance (Qdrant 1.16+):
db.create_namespace_index(
    namespace_field="tenant_id"
)

# Or manually:
db.create_payload_index(
    field_name="tenant_id",
    field_schema="keyword",
    is_tenant=True  # Enables tenant optimization
)
The is_tenant=True flag enables specialized indexing for high-cardinality tenant filtering, dramatically improving query performance in multi-tenant scenarios.

Indexing documents

From Haystack documents

from haystack import Document

documents = [
    Document(
        content="Qdrant supports quantization",
        embedding=[0.1, 0.2, ...],
        meta={"category": "database", "priority": 1}
    )
]

db.index_documents(
    documents=documents,
    scope="tenant_1",
    batch_size=100
)

With sparse embeddings

from haystack.dataclasses import SparseEmbedding

doc = Document(
    content="Hybrid search example",
    embedding=[0.1, 0.2, ...],  # Dense
    sparse_embedding=SparseEmbedding(
        indices=[100, 250, 500],
        values=[0.5, 0.3, 0.2]
    )
)

db.index_documents([doc], scope="tenant_1")

Searching

results = db.search(
    query_vector=[0.1, 0.2, ...],
    top_k=10,
    scope="tenant_1",
    include_vectors=False
)

for doc in results:
    print(f"Score: {doc.score}, Content: {doc.content}")

Hybrid search with RRF

results = db.search(
    query_vector={
        "dense": dense_embedding,
        "sparse": sparse_embedding
    },
    search_type="hybrid",
    top_k=10,
    scope="tenant_1"
)

MMR for diverse results

Maximal Marginal Relevance balances relevance with diversity:
results = db.search(
    query_vector=embedding,
    search_type="mmr",
    mmr_diversity=0.5,  # 0.0 = pure relevance, 1.0 = pure diversity
    top_k=10
)
MMR is ideal for:
  • Summarization tasks (avoid redundant content)
  • Exploratory search (cover multiple aspects)
  • Recommendation diversity

Metadata filtering

# Simple equality
results = db.search(
    query_vector=vec,
    filters={"category": "technology"},
    top_k=10
)

# Range queries
results = db.search(
    query_vector=vec,
    filters={"priority": {"$gte": 5}},
    top_k=10
)

# Multiple conditions
results = db.search(
    query_vector=vec,
    filters={
        "category": "tech",
        "score": {"$gt": 0.8}
    }
)

# Set membership
results = db.search(
    query_vector=vec,
    filters={
        "status": {"$in": ["active", "pending"]}
    }
)

# Negation
results = db.search(
    query_vector=vec,
    filters={
        "status": {"$ne": "archived"}
    }
)

Supported filter operators

  • $eq: Equal to
  • $ne: Not equal to
  • $gt: Greater than
  • $gte: Greater than or equal
  • $lt: Less than
  • $lte: Less than or equal
  • $in: Value in list
  • $nin: Value not in list

Multi-tenancy

Setup tenant isolation

# Create collection
db.create_collection(dimension=768)

# Create tenant index (critical for performance)
db.create_namespace_index(namespace_field="tenant_id")

# Index with tenant
db.index_documents(documents, scope="tenant_1")

# Search within tenant
results = db.search(
    query_vector=vec,
    scope="tenant_1",
    top_k=10
)

Tiered tenant promotion

Qdrant’s payload-based filtering allows flexible tenant strategies:
# Small tenants: Use payload filters
db.index_documents(small_tenant_docs, scope="small_tenant_123")

# Large tenants: Promote to dedicated collection
db_large = QdrantVectorDB(config={"qdrant": {"collection_name": "tenant_large"}})
db_large.create_collection(dimension=768)
db_large.index_documents(large_tenant_docs)

Deleting documents

Delete by tenant

db.delete_documents(scope="tenant_old")

Delete by filter

db.delete_documents(
    scope="tenant_1",
    filters={"status": "archived"}
)
Always provide a scope or filter to avoid accidentally deleting all documents.

Building filters

# Build filter programmatically
filter_obj = db.build_filter({
    "category": "tech",
    "priority": {"$gt": 5}
})

# Use in custom queries
from qdrant_client import QdrantClient

client = db.client
results = client.search(
    collection_name=db.collection_name,
    query_vector=vec,
    query_filter=filter_obj,
    limit=10
)

Advanced features

Named vectors

Store multiple vector types in one collection:
# Configured automatically with use_sparse=True
db.create_collection(dimension=768, use_sparse=True)

# Named vectors: "dense" and "sparse"
db.index_documents(docs_with_both_embeddings)

# Search using specific vector
results = db.search(
    query_vector={db.dense_vector_name: embedding},
    top_k=10
)

Custom vector names

db = QdrantVectorDB(
    config={
        "qdrant": {
            "collection_name": "custom",
            "dense_vector_name": "semantic",
            "sparse_vector_name": "lexical"
        }
    }
)

db.create_collection(dimension=768, use_sparse=True)

Best practices

Balance memory reduction with accuracy requirements:
# Scalar quantization: 4x reduction, <1% accuracy loss
config = {
    "quantization": {
        "type": "scalar",
        "quantile": 0.99,  # Higher = better accuracy
        "always_ram": True
    }
}

# Binary quantization: 32x reduction, ~5% accuracy loss
# Best for: High dimensions (>512), large datasets, memory-critical
config = {
    "quantization": {
        "type": "binary",
        "always_ram": True
    }
}
Always create a tenant index before multi-tenant workloads:
# MUST do this for good performance
db.create_namespace_index()

# Then index and search
db.index_documents(docs, scope="tenant_1")
db.search(vec, scope="tenant_1")
gRPC provides better throughput than HTTP:
db = QdrantVectorDB(
    config={
        "qdrant": {
            "url": "http://localhost:6333",
            "prefer_grpc": True  # Default
        }
    }
)
Adjust lambda based on use case:
# High relevance (lambda=0.8-1.0)
# Use for: Precise question answering
results = db.search(vec, search_type="mmr", mmr_diversity=0.9)

# Balanced (lambda=0.4-0.6)
# Use for: General search
results = db.search(vec, search_type="mmr", mmr_diversity=0.5)

# High diversity (lambda=0.0-0.2)
# Use for: Summarization, exploratory search
results = db.search(vec, search_type="mmr", mmr_diversity=0.2)

Error handling

try:
    db.create_collection(dimension=768)
    db.index_documents(documents)
except ValueError as e:
    print(f"Configuration error: {e}")
except ConnectionError as e:
    print(f"Qdrant connection failed: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

Source reference

Implementation: src/vectordb/databases/qdrant.py Key classes and methods:
  • QdrantVectorDB.__init__(): src/vectordb/databases/qdrant.py:114
  • create_collection(): src/vectordb/databases/qdrant.py:185
  • create_payload_index(): src/vectordb/databases/qdrant.py:288
  • create_namespace_index(): src/vectordb/databases/qdrant.py:347
  • index_documents(): src/vectordb/databases/qdrant.py:390
  • search(): src/vectordb/databases/qdrant.py:467
  • _mmr_rerank(): src/vectordb/databases/qdrant.py:662

Build docs developers (and LLMs) love