Python SDK Guide
The VecLabs Python SDK provides a Pinecone-compatible API for storing and querying vector embeddings on Solana with cryptographic verification.
Installation
Import the client
from solvec import SolVec
Quick Start
Basic Usage
With Wallet
Custom RPC
from solvec import SolVec
# Initialize client
sv = SolVec( network = "devnet" )
# Create a collection (equivalent to Pinecone index)
col = sv.collection( "agent-memory" , dimensions = 1536 )
# Upsert vectors
col.upsert([
{
"id" : "mem_001" ,
"values" : [ ... ], # 1536-dim embedding
"metadata" : { "text" : "User prefers dark mode" }
}
])
# Query for nearest neighbors
results = col.query( vector = [ ... ], top_k = 5 )
for match in results.matches:
print (match.id, match.score)
Client Configuration
Network Options
sv = SolVec( network = "devnet" )
The SDK automatically connects to default RPC endpoints for each network. Use rpc_url to override with your own endpoint (Helius, QuickNode, etc.)
Distance Metrics
from solvec import SolVec, DistanceMetric
col = sv.collection(
"embeddings" ,
dimensions = 1536 ,
metric = DistanceMetric. COSINE # or "cosine"
)
# Available metrics:
# - DistanceMetric.COSINE (default)
# - DistanceMetric.EUCLIDEAN
# - DistanceMetric.DOT
Collections
Collections are equivalent to Pinecone indexes. Each collection has a fixed dimension and distance metric.
Creating Collections
col = sv.collection(
"agent-memory" ,
dimensions = 1536 , # OpenAI text-embedding-3-small
metric = "cosine" # 'cosine', 'euclidean', or 'dot'
)
Default dimension is 1536 (OpenAI embeddings). Default metric is cosine.
Listing Collections
collections = sv.list_collections()
print (collections) # ['agent-memory', 'user-profiles', ...]
Upserting Vectors
Single Vector
col.upsert([{
"id" : "mem_001" ,
"values" : [ 0.1 , 0.2 , 0.3 , ... ], # Must match collection dimensions
"metadata" : { "text" : "User is Alex" , "timestamp" : 1678901234 }
}])
Batch Upsert
vectors = [
{ "id" : "vec_1" , "values" : [ ... ], "metadata" : { "category" : "product" }},
{ "id" : "vec_2" , "values" : [ ... ], "metadata" : { "category" : "review" }},
{ "id" : "vec_3" , "values" : [ ... ], "metadata" : { "category" : "product" }},
]
response = col.upsert(vectors)
print ( f "Upserted { response.upserted_count } vectors" )
All vectors must match the collection’s dimension. Mismatched dimensions raise a ValueError.
Upsert with OpenAI Embeddings
from openai import OpenAI
from solvec import SolVec
openai_client = OpenAI()
sv = SolVec( network = "devnet" )
col = sv.collection( "documents" , dimensions = 1536 )
text = "VecLabs is a decentralized vector database on Solana"
response = openai_client.embeddings.create(
model = "text-embedding-3-small" ,
input = text
)
embedding = response.data[ 0 ].embedding
col.upsert([{
"id" : "doc_001" ,
"values" : embedding,
"metadata" : { "text" : text, "source" : "docs" }
}])
Using Dataclasses
from solvec.types import UpsertRecord
vec = UpsertRecord(
id = "mem_001" ,
values = [ 0.1 , 0.2 , 0.3 ],
metadata = { "text" : "hello" }
)
col.upsert([vec])
Idempotency
# First upsert
col.upsert([{ "id" : "a" , "values" : [ 1.0 , 0.0 , 0.0 ]}])
# Second upsert with same ID updates the vector
col.upsert([{ "id" : "a" , "values" : [ 0.0 , 1.0 , 0.0 ]}])
stats = col.describe_index_stats()
print (stats.vector_count) # 1 (not 2)
Upsert is idempotent. Inserting a vector with an existing ID updates the vector instead of creating a duplicate.
Querying Vectors
Basic Query
results = col.query(
vector = query_embedding,
top_k = 5
)
for match in results.matches:
print (match.id, match.score, match.metadata)
Query Options
results = col.query(
vector = [ ... ],
top_k = 10 ,
include_metadata = True , # default: True
include_values = False # default: False (saves bandwidth)
)
for match in results.matches:
print (match.id) # str
print (match.score) # float (0.0 to 1.0)
print (match.metadata) # dict
print (match.values) # None (not included)
Set include_values=True only when you need the raw vectors. This significantly increases response size.
# Upsert vectors with metadata
col.upsert([
{ "id" : "a" , "values" : [ 1.0 , 0.0 , 0.0 ], "metadata" : { "type" : "memory" }},
{ "id" : "b" , "values" : [ 0.9 , 0.1 , 0.0 ], "metadata" : { "type" : "fact" }},
])
# Query with filter
results = col.query(
vector = [ 1.0 , 0.0 , 0.0 ],
top_k = 5 ,
filter = { "type" : "memory" } # Only return vectors with type="memory"
)
for match in results.matches:
assert match.metadata[ "type" ] == "memory"
Semantic Search Example
from openai import OpenAI
from solvec import SolVec
openai_client = OpenAI()
sv = SolVec( network = "devnet" )
col = sv.collection( "knowledge-base" , dimensions = 1536 )
# User query
user_query = "How do I verify vector integrity?"
# Generate embedding
response = openai_client.embeddings.create(
model = "text-embedding-3-small" ,
input = user_query
)
query_vector = response.data[ 0 ].embedding
# Search
results = col.query( vector = query_vector, top_k = 3 )
for i, match in enumerate (results.matches, 1 ):
print ( f " { i } . { match.metadata[ 'text' ] } (score: { match.score :.3f} )" )
Score Interpretation
Cosine Similarity Range: 0.0 to 1.0 1.0 = identical direction 0.0 = orthogonal
Dot Product Range: unbounded Higher = more similar Works best with normalized vectors
Euclidean Range: 0.0 to 1.0 (inverted) 1.0 = closest 0.0 = farthest
Fetching Vectors
result = col.fetch([ "vec_1" , "vec_2" ])
for vec_id, vec_data in result[ "vectors" ].items():
print (vec_id)
print (vec_data[ "values" ]) # Raw vector
print (vec_data[ "metadata" ]) # Metadata
Deleting Vectors
Delete by ID
col.delete([ "mem_001" , "mem_002" ])
Verify Deletion
col.delete([ "vec_1" ])
# Verify it's gone
results = col.query( vector = [ 1.0 , 0.0 , 0.0 ], top_k = 100 )
assert not any (m.id == "vec_1" for m in results.matches)
Collection Statistics
stats = col.describe_index_stats()
print (stats.vector_count) # 1542
print (stats.dimension) # 1536
print (stats.metric) # DistanceMetric.COSINE
print (stats.name) # 'agent-memory'
print (stats.merkle_root) # 'a3f2c1...'
print (stats.last_updated) # 1678901234
print (stats.is_frozen) # False
Type Definitions
UpsertRecord
@dataclass
class UpsertRecord :
id : str
values: list[ float ]
metadata: dict[ str , Any] = field( default_factory = dict )
QueryMatch
@dataclass
class QueryMatch :
id : str
score: float
metadata: dict[ str , Any] = field( default_factory = dict )
values: Optional[list[ float ]] = None
QueryResponse
@dataclass
class QueryResponse :
matches: list[QueryMatch] # Results sorted by score descending
namespace: str # Collection name
CollectionStats
@dataclass
class CollectionStats :
vector_count: int
dimension: int
metric: DistanceMetric
name: str
merkle_root: str
last_updated: int
is_frozen: bool
Error Handling
try :
col.upsert([{ "id" : "vec" , "values" : [ 1 , 2 ]}])
except ValueError as e:
if "Dimension mismatch" in str (e):
print ( "Vector dimension does not match collection" )
Dimension Mismatch
Query Dimension Mismatch
# Collection expects 1536 dimensions
col = sv.collection( "test" , dimensions = 1536 )
# This raises ValueError
col.upsert([{ "id" : "bad" , "values" : [ 1 , 2 , 3 ]}])
# ValueError: Dimension mismatch for id 'bad': expected 1536, got 3
Working with NumPy
import numpy as np
from solvec import SolVec
sv = SolVec( network = "devnet" )
col = sv.collection( "numpy-vecs" , dimensions = 128 )
# NumPy arrays work seamlessly
vector = np.random.rand( 128 )
col.upsert([{
"id" : "np_vec_1" ,
"values" : vector.tolist(), # Convert to list
"metadata" : { "type" : "random" }
}])
# Query with NumPy
query_vec = np.random.rand( 128 )
results = col.query( vector = query_vec.tolist(), top_k = 5 )
Best Practices
Use meaningful IDs
Use descriptive IDs like doc_001, user_alex_mem_5 instead of random UUIDs. This helps with debugging.
Batch upserts
Upsert multiple vectors at once instead of one-by-one for better performance.
Keep metadata small
Store only essential metadata. Large metadata increases storage costs and query latency.
Use dataclasses for type safety
Use UpsertRecord and other dataclasses for better IDE autocomplete and type checking.
Handle errors gracefully
Always validate vector dimensions before upserting to avoid runtime errors.
Testing
import pytest
from solvec import SolVec
@pytest.fixture
def sv ():
return SolVec( network = "devnet" )
def test_upsert_and_query ( sv ):
col = sv.collection( "test" , dimensions = 4 )
col.upsert([
{ "id" : "a" , "values" : [ 1.0 , 0.0 , 0.0 , 0.0 ], "metadata" : { "text" : "alpha" }},
{ "id" : "b" , "values" : [ 0.9 , 0.1 , 0.0 , 0.0 ], "metadata" : { "text" : "beta" }},
])
results = col.query( vector = [ 1.0 , 0.0 , 0.0 , 0.0 ], top_k = 2 )
assert len (results.matches) == 2
assert results.matches[ 0 ].id == "a"
assert results.matches[ 0 ].score == pytest.approx( 1.0 , abs = 1e-3 )
Next Steps
TypeScript SDK Learn how to use VecLabs with TypeScript
Verification Verify your vectors on-chain with Merkle proofs
Performance Tuning Optimize query speed and recall
Collections Advanced collection management