Milvus is a cloud-native vector database designed for billion-scale vector search. It supports both Zilliz Cloud (managed) and self-hosted deployments with advanced features like partition-key multi-tenancy and JSON path filtering.
Key features
Partition-key isolation : Physical data separation for millions of tenants
Hybrid search : RRF or weighted fusion of dense and sparse vectors
JSON path indexing : Efficient filtering on nested metadata
HNSW indexing : Fast approximate nearest neighbor search
Zilliz Cloud : Fully managed service with auto-scaling
Self-hosted : Open-source deployment with Docker or Kubernetes
Installation
Connection
Zilliz Cloud
from vectordb.databases.milvus import MilvusVectorDB
db = MilvusVectorDB(
uri = "https://in03-xxxxxxxx.api.gcp-us-west1.zillizcloud.com" ,
token = "your-api-token" ,
collection_name = "documents"
)
Self-hosted Milvus
db = MilvusVectorDB(
uri = "http://localhost:19530" ,
token = "" , # Empty for local
collection_name = "my_collection"
)
db = MilvusVectorDB(
host = "localhost" ,
port = "19530" ,
collection_name = "documents"
)
Collection creation
Basic collection
db.create_collection(
collection_name = "articles" ,
dimension = 768 ,
description = "Article embeddings"
)
With sparse vectors for hybrid search
db.create_collection(
collection_name = "hybrid_docs" ,
dimension = 768 ,
use_sparse = True # Enables sparse_embedding field
)
With partition keys for multi-tenancy
db.create_collection(
collection_name = "tenant_docs" ,
dimension = 768 ,
use_sparse = True ,
use_partition_key = True ,
partition_key_field = "tenant_id"
)
Recreate existing collection
db.create_collection(
collection_name = "articles" ,
dimension = 768 ,
recreate = True # Drops existing collection first
)
JSON path indexing
Create indexes on nested metadata fields for fast filtering:
# Index a category field
db.create_json_index(
collection_name = "articles" ,
json_path = 'metadata["category"]' ,
cast_type = "VARCHAR"
)
# Index a numeric priority field
db.create_json_index(
json_path = 'metadata["priority"]' ,
cast_type = "DOUBLE"
)
# Index nested fields
db.create_json_index(
json_path = 'metadata["attributes"]["priority"]' ,
cast_type = "DOUBLE"
)
Supported cast types:
VARCHAR: String values
DOUBLE: Numeric values
BOOL: Boolean values
Inserting documents
From Haystack documents
from haystack import Document
documents = [
Document(
content = "Milvus supports partition keys" ,
embedding = [ 0.1 , 0.2 , ... ], # Dense vector
meta = { "category" : "database" , "priority" : 1 }
)
]
db.insert_documents(
documents = documents,
namespace = "tenant_1"
)
With sparse embeddings
from haystack.dataclasses import SparseEmbedding
doc = Document(
content = "Hybrid search example" ,
embedding = [ 0.1 , 0.2 , ... ], # Dense
sparse_embedding = SparseEmbedding(
indices = [ 100 , 250 , 500 ],
values = [ 0.5 , 0.3 , 0.2 ]
),
meta = { "category" : "example" }
)
db.insert_documents([doc])
Searching
Dense vector search
results = db.search(
query_embedding = [ 0.1 , 0.2 , ... ],
top_k = 10 ,
scope = "tenant_1" , # Partition key filter
include_vectors = False
)
for doc in results:
print ( f "Score: { doc.score } , Content: { doc.content } " )
Hybrid search with RRF
Reciprocal Rank Fusion (default, parameter-free):
from haystack.dataclasses import SparseEmbedding
results = db.search(
query_embedding = dense_vec,
query_sparse_embedding = SparseEmbedding(
indices = [ 100 , 250 ],
values = [ 0.5 , 0.3 ]
),
top_k = 10 ,
ranker_type = "rrf"
)
Hybrid search with weighted ranker
Explicit control over dense vs. sparse balance:
results = db.search(
query_embedding = dense_vec,
query_sparse_embedding = sparse_vec,
top_k = 10 ,
ranker_type = "weighted" ,
weights = [ 0.7 , 0.3 ] # [dense_weight, sparse_weight]
)
# Simple equality
results = db.search(
query_embedding = vec,
filters = { "category" : "technology" },
top_k = 10
)
# Range queries
results = db.search(
query_embedding = vec,
filters = { "priority" : { "$gt" : 5 }},
top_k = 10
)
# Multiple conditions (AND)
results = db.search(
query_embedding = vec,
filters = {
"category" : { "$eq" : "tech" },
"priority" : { "$gt" : 3 }
}
)
# Set membership
results = db.search(
query_embedding = vec,
filters = {
"status" : { "$in" : [ "active" , "pending" ]}
}
)
# JSON contains
results = db.search(
query_embedding = vec,
filters = {
"tags" : { "$contains" : "important" }
}
)
Supported filter operators
$eq: Equal to
$gt: Greater than
$lt: Less than
$in: Value in list
$contains: JSON array contains value
Sparse-only search
results = db.search(
query_sparse_embedding = sparse_vec,
top_k = 10
)
results = db.search(
filters = { "category" : "tech" },
top_k = 10
)
Multi-tenancy with partition keys
Setup partition-based isolation
db.create_collection(
"tenant_data" ,
dimension = 768 ,
use_partition_key = True ,
partition_key_field = "tenant_id"
)
# Insert with namespace
db.insert_documents(
documents,
namespace = "tenant_1"
)
# Search within tenant
results = db.search(
query_embedding = vec,
scope = "tenant_1" , # or namespace="tenant_1"
top_k = 10
)
Partition keys provide physical data isolation at the storage layer, making multi-tenant queries significantly faster than post-filtering. Milvus supports millions of tenants efficiently.
Deleting documents
Delete by IDs
db.delete_documents(
ids = [ 1 , 2 , 3 ],
collection_name = "articles"
)
Delete by filter
filter_expr = db.build_filter_expression(
{ "status" : "archived" }
)
db.delete_documents(
filter_expr = filter_expr,
collection_name = "articles"
)
Delete namespace
filter_expr = 'tenant_id == "tenant_old"'
db.delete_documents( filter_expr = filter_expr)
Building filter expressions
# Build expression from dict
filters = {
"category" : "tech" ,
"priority" : { "$gt" : 5 }
}
expr = db.build_filter_expression(filters)
print (expr) # 'metadata["category"] == "tech" and metadata["priority"] > 5'
# Use in search
results = db.search(
query_embedding = vec,
filters = filters
)
Dropping collections
db.drop_collection( "old_collection" )
This permanently deletes all data in the collection. Cannot be undone.
Advanced configuration
HNSW index parameters
Milvus creates HNSW indexes automatically with these defaults:
# Default parameters (in create_collection)
index_params = {
"M" : 16 , # Max connections per layer
"efConstruction" : 500 # Build-time effort
}
To customize, modify src/vectordb/databases/milvus.py:253.
Sparse index configuration
Sparse vectors use SPARSE_INVERTED_INDEX with inner product metric:
# Automatically configured when use_sparse=True
index_params.add_index(
field_name = "sparse_embedding" ,
index_type = "SPARSE_INVERTED_INDEX" ,
metric_type = "IP" # Inner Product
)
Best practices
Partition keys for large-scale multi-tenancy
Use partition keys when serving thousands to millions of tenants: # Efficient: Physical isolation
db.create_collection(
"tenant_data" ,
use_partition_key = True ,
partition_key_field = "tenant_id"
)
db.insert_documents(docs, namespace = "tenant_1" )
db.search(vec, scope = "tenant_1" ) # Fast pre-filtered search
# Inefficient: Post-filtering
db.search(vec, filters = { "tenant_id" : "tenant_1" }) # Slower
JSON path indexes for complex filtering
Create indexes on frequently filtered fields: # Before querying on category
db.create_json_index(
json_path = 'metadata["category"]' ,
cast_type = "VARCHAR"
)
# Now filtering is fast
results = db.search(
vec,
filters = { "category" : "technology" }
)
Choose the right ranker for hybrid search
RRF : Best for most use cases, no tuning needed
Weighted : When you know the optimal dense/sparse balance
# RRF: Automatic, robust
results = db.search(
dense_vec, sparse_vec,
ranker_type = "rrf"
)
# Weighted: Explicit control
results = db.search(
dense_vec, sparse_vec,
ranker_type = "weighted" ,
weights = [ 0.7 , 0.3 ] # Favor semantic over keyword
)
Milvus loads collections into memory for fast search:
Monitor memory usage with large collections
Use drop_collection() to free memory
Consider Qdrant with quantization for memory-constrained environments
Error handling
try :
db.create_collection( "articles" , dimension = 768 )
db.insert_documents(documents)
except ValueError as e:
print ( f "Configuration error: { e } " )
except Exception as e:
print ( f "Milvus error: { e } " )
Source reference
Implementation: src/vectordb/databases/milvus.py
Key classes and methods:
MilvusVectorDB.__init__(): src/vectordb/databases/milvus.py:103
create_collection(): src/vectordb/databases/milvus.py:150
create_json_index(): src/vectordb/databases/milvus.py:272
insert_documents(): src/vectordb/databases/milvus.py:332
search(): src/vectordb/databases/milvus.py:398
delete_documents(): src/vectordb/databases/milvus.py:742