Skip to main content

Python SDK Guide

The VecLabs Python SDK provides a Pinecone-compatible API for storing and querying vector embeddings on Solana with cryptographic verification.

Installation

1

Install the SDK

pip install solvec
2

Import the client

from solvec import SolVec

Quick Start

from solvec import SolVec

# Initialize client
sv = SolVec(network="devnet")

# Create a collection (equivalent to Pinecone index)
col = sv.collection("agent-memory", dimensions=1536)

# Upsert vectors
col.upsert([
    {
        "id": "mem_001",
        "values": [...],  # 1536-dim embedding
        "metadata": {"text": "User prefers dark mode"}
    }
])

# Query for nearest neighbors
results = col.query(vector=[...], top_k=5)
for match in results.matches:
    print(match.id, match.score)

Client Configuration

Network Options

sv = SolVec(network="devnet")
The SDK automatically connects to default RPC endpoints for each network. Use rpc_url to override with your own endpoint (Helius, QuickNode, etc.)

Distance Metrics

from solvec import SolVec, DistanceMetric

col = sv.collection(
    "embeddings",
    dimensions=1536,
    metric=DistanceMetric.COSINE  # or "cosine"
)

# Available metrics:
# - DistanceMetric.COSINE (default)
# - DistanceMetric.EUCLIDEAN
# - DistanceMetric.DOT

Collections

Collections are equivalent to Pinecone indexes. Each collection has a fixed dimension and distance metric.

Creating Collections

col = sv.collection(
    "agent-memory",
    dimensions=1536,  # OpenAI text-embedding-3-small
    metric="cosine"   # 'cosine', 'euclidean', or 'dot'
)
Default dimension is 1536 (OpenAI embeddings). Default metric is cosine.

Listing Collections

collections = sv.list_collections()
print(collections)  # ['agent-memory', 'user-profiles', ...]

Upserting Vectors

Single Vector

col.upsert([{
    "id": "mem_001",
    "values": [0.1, 0.2, 0.3, ...],  # Must match collection dimensions
    "metadata": {"text": "User is Alex", "timestamp": 1678901234}
}])

Batch Upsert

vectors = [
    {"id": "vec_1", "values": [...], "metadata": {"category": "product"}},
    {"id": "vec_2", "values": [...], "metadata": {"category": "review"}},
    {"id": "vec_3", "values": [...], "metadata": {"category": "product"}},
]

response = col.upsert(vectors)
print(f"Upserted {response.upserted_count} vectors")
All vectors must match the collection’s dimension. Mismatched dimensions raise a ValueError.

Upsert with OpenAI Embeddings

from openai import OpenAI
from solvec import SolVec

openai_client = OpenAI()
sv = SolVec(network="devnet")
col = sv.collection("documents", dimensions=1536)

text = "VecLabs is a decentralized vector database on Solana"

response = openai_client.embeddings.create(
    model="text-embedding-3-small",
    input=text
)

embedding = response.data[0].embedding

col.upsert([{
    "id": "doc_001",
    "values": embedding,
    "metadata": {"text": text, "source": "docs"}
}])

Using Dataclasses

from solvec.types import UpsertRecord

vec = UpsertRecord(
    id="mem_001",
    values=[0.1, 0.2, 0.3],
    metadata={"text": "hello"}
)

col.upsert([vec])

Idempotency

# First upsert
col.upsert([{"id": "a", "values": [1.0, 0.0, 0.0]}])

# Second upsert with same ID updates the vector
col.upsert([{"id": "a", "values": [0.0, 1.0, 0.0]}])

stats = col.describe_index_stats()
print(stats.vector_count)  # 1 (not 2)
Upsert is idempotent. Inserting a vector with an existing ID updates the vector instead of creating a duplicate.

Querying Vectors

Basic Query

results = col.query(
    vector=query_embedding,
    top_k=5
)

for match in results.matches:
    print(match.id, match.score, match.metadata)

Query Options

results = col.query(
    vector=[...],
    top_k=10,
    include_metadata=True,  # default: True
    include_values=False    # default: False (saves bandwidth)
)

for match in results.matches:
    print(match.id)         # str
    print(match.score)      # float (0.0 to 1.0)
    print(match.metadata)   # dict
    print(match.values)     # None (not included)
Set include_values=True only when you need the raw vectors. This significantly increases response size.

Metadata Filtering

# Upsert vectors with metadata
col.upsert([
    {"id": "a", "values": [1.0, 0.0, 0.0], "metadata": {"type": "memory"}},
    {"id": "b", "values": [0.9, 0.1, 0.0], "metadata": {"type": "fact"}},
])

# Query with filter
results = col.query(
    vector=[1.0, 0.0, 0.0],
    top_k=5,
    filter={"type": "memory"}  # Only return vectors with type="memory"
)

for match in results.matches:
    assert match.metadata["type"] == "memory"

Semantic Search Example

from openai import OpenAI
from solvec import SolVec

openai_client = OpenAI()
sv = SolVec(network="devnet")
col = sv.collection("knowledge-base", dimensions=1536)

# User query
user_query = "How do I verify vector integrity?"

# Generate embedding
response = openai_client.embeddings.create(
    model="text-embedding-3-small",
    input=user_query
)

query_vector = response.data[0].embedding

# Search
results = col.query(vector=query_vector, top_k=3)

for i, match in enumerate(results.matches, 1):
    print(f"{i}. {match.metadata['text']} (score: {match.score:.3f})")

Score Interpretation

Cosine Similarity

Range: 0.0 to 1.01.0 = identical direction0.0 = orthogonal

Dot Product

Range: unboundedHigher = more similarWorks best with normalized vectors

Euclidean

Range: 0.0 to 1.0 (inverted)1.0 = closest0.0 = farthest

Fetching Vectors

result = col.fetch(["vec_1", "vec_2"])

for vec_id, vec_data in result["vectors"].items():
    print(vec_id)
    print(vec_data["values"])    # Raw vector
    print(vec_data["metadata"])  # Metadata

Deleting Vectors

Delete by ID

col.delete(["mem_001", "mem_002"])

Verify Deletion

col.delete(["vec_1"])

# Verify it's gone
results = col.query(vector=[1.0, 0.0, 0.0], top_k=100)
assert not any(m.id == "vec_1" for m in results.matches)

Collection Statistics

stats = col.describe_index_stats()

print(stats.vector_count)   # 1542
print(stats.dimension)      # 1536
print(stats.metric)         # DistanceMetric.COSINE
print(stats.name)           # 'agent-memory'
print(stats.merkle_root)    # 'a3f2c1...'
print(stats.last_updated)   # 1678901234
print(stats.is_frozen)      # False

Type Definitions

UpsertRecord

@dataclass
class UpsertRecord:
    id: str
    values: list[float]
    metadata: dict[str, Any] = field(default_factory=dict)

QueryMatch

@dataclass
class QueryMatch:
    id: str
    score: float
    metadata: dict[str, Any] = field(default_factory=dict)
    values: Optional[list[float]] = None

QueryResponse

@dataclass
class QueryResponse:
    matches: list[QueryMatch]  # Results sorted by score descending
    namespace: str             # Collection name

CollectionStats

@dataclass
class CollectionStats:
    vector_count: int
    dimension: int
    metric: DistanceMetric
    name: str
    merkle_root: str
    last_updated: int
    is_frozen: bool

Error Handling

try:
    col.upsert([{"id": "vec", "values": [1, 2]}])
except ValueError as e:
    if "Dimension mismatch" in str(e):
        print("Vector dimension does not match collection")
# Collection expects 1536 dimensions
col = sv.collection("test", dimensions=1536)

# This raises ValueError
col.upsert([{"id": "bad", "values": [1, 2, 3]}])
# ValueError: Dimension mismatch for id 'bad': expected 1536, got 3

Working with NumPy

import numpy as np
from solvec import SolVec

sv = SolVec(network="devnet")
col = sv.collection("numpy-vecs", dimensions=128)

# NumPy arrays work seamlessly
vector = np.random.rand(128)

col.upsert([{
    "id": "np_vec_1",
    "values": vector.tolist(),  # Convert to list
    "metadata": {"type": "random"}
}])

# Query with NumPy
query_vec = np.random.rand(128)
results = col.query(vector=query_vec.tolist(), top_k=5)

Best Practices

1

Use meaningful IDs

Use descriptive IDs like doc_001, user_alex_mem_5 instead of random UUIDs. This helps with debugging.
2

Batch upserts

Upsert multiple vectors at once instead of one-by-one for better performance.
3

Keep metadata small

Store only essential metadata. Large metadata increases storage costs and query latency.
4

Use dataclasses for type safety

Use UpsertRecord and other dataclasses for better IDE autocomplete and type checking.
5

Handle errors gracefully

Always validate vector dimensions before upserting to avoid runtime errors.

Testing

import pytest
from solvec import SolVec

@pytest.fixture
def sv():
    return SolVec(network="devnet")

def test_upsert_and_query(sv):
    col = sv.collection("test", dimensions=4)
    col.upsert([
        {"id": "a", "values": [1.0, 0.0, 0.0, 0.0], "metadata": {"text": "alpha"}},
        {"id": "b", "values": [0.9, 0.1, 0.0, 0.0], "metadata": {"text": "beta"}},
    ])
    
    results = col.query(vector=[1.0, 0.0, 0.0, 0.0], top_k=2)
    assert len(results.matches) == 2
    assert results.matches[0].id == "a"
    assert results.matches[0].score == pytest.approx(1.0, abs=1e-3)

Next Steps

TypeScript SDK

Learn how to use VecLabs with TypeScript

Verification

Verify your vectors on-chain with Merkle proofs

Performance Tuning

Optimize query speed and recall

Collections

Advanced collection management

Build docs developers (and LLMs) love