Pinecone is a fully managed vector database optimized for production RAG pipelines. It provides native hybrid search combining dense and sparse vectors without requiring external fusion logic.
Key features
Serverless deployment : Auto-scaling with pay-per-use pricing
Native hybrid search : Built-in sparse-dense vector fusion
Namespace isolation : Logical partitioning for multi-tenancy
Metadata filtering : Automatic flattening for nested structures
Distance metrics : Cosine, Euclidean, dotproduct
Pod-based deployment : Dedicated infrastructure for consistent performance
Installation
Pinecone requires the gRPC client for production use:
pip install pinecone-client[grpc]
Connection
Cloud configuration
Direct initialization
Environment variables
Config file
from vectordb.databases.pinecone import PineconeVectorDB
db = PineconeVectorDB(
api_key = "pc-xxx" ,
index_name = "my-index"
)
Configuration options
pinecone :
api_key : ${PINECONE_API_KEY}
index_name : "my-index"
host : null # Optional custom host
proxy_url : null # Optional proxy
ssl_verify : true
pool_threads : 1
Index creation
Serverless index
Recommended for most use cases with automatic scaling:
from pinecone import ServerlessSpec
db.create_index(
dimension = 768 ,
metric = "cosine" , # or "euclidean", "dotproduct"
spec = ServerlessSpec(
cloud = "aws" , # or "gcp", "azure"
region = "us-east-1"
)
)
Pod-based index
For consistent performance and advanced features:
from pinecone import PodSpec
db.create_index(
dimension = 768 ,
metric = "cosine" ,
spec = PodSpec(
environment = "us-east-1-aws" ,
pod_type = "p1.x1" ,
pods = 1
)
)
Recreate existing index
db.create_index(
dimension = 768 ,
metric = "cosine" ,
recreate = True # Deletes existing index first
)
Setting recreate=True permanently deletes all data in the existing index. Use with caution in production.
Upserting documents
From Haystack documents
from haystack import Document
documents = [
Document(
content = "Pinecone is a managed vector database" ,
embedding = [ 0.1 , 0.2 , ... ], # 768-dim vector
meta = { "category" : "database" , "priority" : 1 }
)
]
db.upsert(
data = documents,
namespace = "production" ,
batch_size = 100 ,
show_progress = True
)
From raw dictionaries
data = [
{
"id" : "doc-1" ,
"values" : [ 0.1 , 0.2 , ... ],
"metadata" : { "category" : "tech" }
}
]
db.upsert( data = data, namespace = "production" )
Pinecone requires scalar metadata values. Nested dictionaries are automatically flattened:
doc = Document(
content = "Example" ,
meta = {
"user" : { "id" : 123 , "name" : "Alice" },
"tags" : [ "tech" , "ai" ]
}
)
# Automatically flattened to:
# {"user_id": 123, "user_name": "Alice", "tags": ["tech", "ai"]}
db.upsert([doc])
Querying
Dense vector search
results = db.query(
vector = [ 0.1 , 0.2 , ... ],
top_k = 10 ,
namespace = "production" ,
include_metadata = True ,
include_vectors = False
)
# Returns List[Document] with scores
for doc in results:
print ( f "Score: { doc.score } , Content: { doc.content } " )
Hybrid search
Combine dense and sparse vectors for semantic + keyword matching:
from haystack.dataclasses import SparseEmbedding
# Option 1: Using SparseEmbedding
sparse = SparseEmbedding(
indices = [ 100 , 250 , 500 ],
values = [ 0.5 , 0.3 , 0.2 ]
)
results = db.query_with_sparse(
vector = dense_embedding,
sparse_vector = sparse,
top_k = 10 ,
namespace = "production"
)
# Option 2: Using dict format
results = db.query_with_sparse(
vector = dense_embedding,
sparse_vector = { "indices" : [ 100 , 250 ], "values" : [ 0.5 , 0.3 ]},
top_k = 10
)
# Option 3: Using hybrid_search convenience method
results = db.hybrid_search(
query_embedding = dense_embedding,
query_sparse_embedding = sparse,
top_k = 10 ,
namespace = "production"
)
Filter results using Pinecone’s query syntax:
# Build simple filter
filter_dict = db.build_filter( "category" , "$eq" , "technology" )
results = db.query(
vector = embedding,
filter = filter_dict,
top_k = 10
)
# Build compound filter
f1 = db.build_filter( "category" , "$eq" , "science" )
f2 = db.build_filter( "year" , "$gte" , 2020 )
compound = db.build_compound_filter([f1, f2], logic = "AND" )
results = db.query( vector = embedding, filter = compound)
Supported filter operators
# Equality
db.build_filter( "status" , "$eq" , "active" )
# Inequality
db.build_filter( "status" , "$ne" , "archived" )
# Comparison
db.build_filter( "priority" , "$gt" , 5 )
db.build_filter( "score" , "$gte" , 0.8 )
db.build_filter( "age" , "$lt" , 30 )
db.build_filter( "rating" , "$lte" , 4.5 )
# Set membership
db.build_filter( "category" , "$in" , [ "tech" , "science" ])
db.build_filter( "status" , "$nin" , [ "draft" , "deleted" ])
Multi-tenancy with namespaces
Create namespace-isolated data
# Upsert to different namespaces
db.upsert(tenant_a_docs, namespace = "tenant_a" )
db.upsert(tenant_b_docs, namespace = "tenant_b" )
# Query specific namespace
results = db.query(
vector = embedding,
namespace = "tenant_a" ,
top_k = 10
)
List namespaces
namespaces = db.list_namespaces()
print (namespaces) # ["tenant_a", "tenant_b", ""]
Delete namespace
db.delete_namespace( "tenant_old" )
Pinecone supports up to 100,000 namespaces per index. For more tenants, use payload filtering with Qdrant or partition keys with Milvus.
Fetching documents
Retrieve documents by ID without vector search:
result = db.fetch(
ids = [ "doc-1" , "doc-2" , "doc-3" ],
namespace = "production"
)
print (result[ "vectors" ][ "doc-1" ])
Deleting documents
Delete by IDs
db.delete(
ids = [ "doc-1" , "doc-2" ],
namespace = "production"
)
Delete all in namespace
db.delete(
delete_all = True ,
namespace = "tenant_old"
)
Index statistics
stats = db.describe_index_stats()
print ( f "Total vectors: { stats[ 'total_vector_count' ] } " )
print ( f "Dimension: { stats[ 'dimension' ] } " )
print ( f "Index fullness: { stats[ 'index_fullness' ] } " )
# Per-namespace counts
for ns, info in stats[ 'namespaces' ].items():
print ( f "Namespace ' { ns } ': { info[ 'vector_count' ] } vectors" )
Advanced features
Estimate match count
# Returns approximate count of vectors matching filter
count = db.estimate_match_count(
filter = { "category" : { "$eq" : "technology" }},
namespace = "production"
)
Pinecone does not return exact filtered counts. This method returns the total namespace vector count as a fallback.
Wait for index readiness
db.create_index( dimension = 768 )
db.wait_for_index_ready( timeout = 300 ) # Wait up to 5 minutes
Best practices
Batch upserts for efficiency
Use larger batch sizes (100-500) for faster ingestion: db.upsert(
data = large_document_list,
batch_size = 500 ,
show_progress = True
)
Namespace strategy for multi-tenancy
Use namespaces for logical isolation, not physical isolation:
Good : Separating dev/staging/prod environments
Good : Isolating 100-10,000 tenants
Avoid : Millions of namespaces (use Milvus partition keys instead)
Choose the right metric for your embeddings:
Cosine : Normalized vectors (most common)
Euclidean : Unnormalized vectors with absolute distances
Dotproduct : Maximum inner product search
Error handling
try :
db.create_index( dimension = 768 )
except ValueError as e:
print ( f "Configuration error: { e } " )
except TimeoutError as e:
print ( f "Index creation timeout: { e } " )
except Exception as e:
print ( f "Unexpected error: { e } " )
Source reference
Implementation: src/vectordb/databases/pinecone.py
Key classes and methods:
PineconeVectorDB.__init__(): src/vectordb/databases/pinecone.py:71
create_index(): src/vectordb/databases/pinecone.py:167
upsert(): src/vectordb/databases/pinecone.py:267
query(): src/vectordb/databases/pinecone.py:334
query_with_sparse(): src/vectordb/databases/pinecone.py:377
hybrid_search(): src/vectordb/databases/pinecone.py:424