Skip to main content
The query module provides functions to build expressive search queries combining semantic similarity, keyword matching, and filters.

Overview

TopK queries are built using a fluent API:
from topk_sdk.query import select, field, filter, fn

results = client.collection("books").query(
    select(
        "title", "year",
        similarity=fn.semantic_similarity("title", "classic novel")
    )
    .filter(field("year") > 1900)
    .topk(field("similarity"), 10)
)

Query Stages

select()

Create a query with a select stage. Specify which fields to return and compute additional expressions.
from topk_sdk.query import select, field, fn

# Select specific fields
query = select("title", "author")

# Select fields and compute expressions
query = select(
    "title",
    year=field("published_year"),
    similarity=fn.semantic_similarity("title", "animal")
)
*args
str
Field names to include in the results.
**kwargs
LogicalExpr | FunctionExpr
Named expressions to compute. The name becomes the field name in results.
return
Query
A new query with the select stage.

filter()

Create a query with a filter stage. Only documents matching the filter are returned.
from topk_sdk.query import filter, field

# Filter by field value
query = filter(field("published_year") > 1980)

# Combine filters
query = filter(
    (field("year") >= 1900) & (field("year") <= 2000)
)
expr
LogicalExpr | TextExpr
required
The filter expression. Documents must satisfy this expression to be included.
return
Query
A new query with the filter stage.

Query.filter()

Add a filter stage to an existing query.
query = select("title").filter(field("year") > 2000)
expr
LogicalExpr | TextExpr
required
The filter expression.
return
Query
The query with the filter stage added.

Query.sort()

Add a sort stage to sort results by an expression.
from topk_sdk.query import select, field

# Sort ascending (default)
query = select("title", "year").sort(field("year"))

# Sort descending
query = select("title", "year").sort(field("year"), asc=False)
expr
LogicalExpr
required
The expression to sort by.
asc
bool
default:"true"
Sort in ascending order if True, descending if False.
return
Query
The query with the sort stage added.

Query.topk()

Add a top-k stage to return the top k results by a scoring expression.
from topk_sdk.query import select, field, fn

query = select(
    "title",
    score=fn.semantic_similarity("title", "dystopian")
).topk(field("score"), 10)
expr
LogicalExpr
required
The scoring expression. Results are ranked by this value.
k
int
required
The number of top results to return.
asc
bool
default:"false"
If True, return k results with lowest scores. If False (default), return highest scores.
return
Query
The query with the top-k stage added.

Query.limit()

Add a limit stage to restrict the number of results.
query = select("title").limit(10)
k
int
required
The maximum number of results to return.
return
Query
The query with the limit stage added.

Query.count()

Add a count stage to return the count of matching documents instead of the documents themselves.
count_query = filter(field("year") > 2000).count()
return
Query
The query with the count stage added.

Query.rerank()

Add a rerank stage to re-score results using a reranking model.
query = select("title", "description").rerank(
    model="cohere/rerank-english-v3.0",
    query="best science fiction books",
    fields=["title", "description"],
    topk_multiple=3
)
model
str
The reranking model to use.
query
str
The query text to rerank against.
fields
Sequence[str]
default:"[]"
Fields to consider during reranking.
topk_multiple
int
Retrieve this multiple of k candidates before reranking.
return
Query
The query with the rerank stage added.

Field References

field()

Reference a field from the document.
from topk_sdk.query import field

# Use in filters
filter(field("year") > 2000)

# Use in select expressions
select("title", publication_year=field("year"))

# Use in topk
select("title", score=fn.semantic_similarity("title", "novel"))
    .topk(field("score"), 10)
name
str
required
The name of the field to reference.
return
LogicalExpr
An expression referencing the field.

literal()

Create a literal expression from a value.
from topk_sdk.query import literal, field

# Usually not needed, values are auto-converted
filter(field("year").eq(literal(2000)))

# This is equivalent:
filter(field("year") == 2000)
value
Any
required
The literal value.
return
LogicalExpr
A literal expression.

Logical Expressions

LogicalExpr objects support rich operations for building filters and computed fields.

Comparison Operators

from topk_sdk.query import field

# Equality
field("genre").eq("fiction")      # or field("genre") == "fiction"
field("genre").ne("non-fiction")  # or field("genre") != "non-fiction"

# Ordering
field("year").lt(2000)   # or field("year") < 2000
field("year").lte(2000)  # or field("year") <= 2000
field("year").gt(2000)   # or field("year") > 2000
field("year").gte(2000)  # or field("year") >= 2000

Arithmetic Operators

from topk_sdk.query import field, select

# Addition, subtraction, multiplication, division
select(
    "title",
    discounted_price=field("price") * 0.8,
    price_diff=field("price") - field("cost"),
    total=field("price") + field("tax")
)

Logical Operators

from topk_sdk.query import field, filter

# AND
filter(field("year").gt(1900) & field("year").lt(2000))

# OR
filter(field("genre").eq("fiction") | field("genre").eq("mystery"))

String Operators

from topk_sdk.query import field, filter

# Check if string starts with prefix
filter(field("title").starts_with("The"))

# Check if list/string contains value
filter(field("tags").contains("bestseller"))

# Check if value is in list
filter(field("genre").in_(["fiction", "mystery", "thriller"]))

# Regular expression matching
filter(field("title").regexp_match(r"^The.*", flags="i"))

Keyword Matching

from topk_sdk.query import field, filter

# Match any term (OR)
filter(field("title").match_any(["science", "fiction"]))

# Match all terms (AND)
filter(field("title").match_all(["science", "fiction"]))

Null Handling

from topk_sdk.query import field, filter

# Check for null
filter(field("description").is_null())
filter(field("description").is_not_null())

# Coalesce null values
select("title", rating=field("rating").coalesce(0.0))

Math Functions

from topk_sdk.query import field, select

select(
    "title",
    abs_diff=field("price").sub(field("cost")).abs(),
    sqrt_price=field("price").sqrt(),
    squared=field("rating").square(),
    log_price=field("price").ln(),
    exp_value=field("value").exp()
)

Conditional Expressions

from topk_sdk.query import field, select

# Choose between two values based on condition
select(
    "title",
    status=field("in_stock").choose("Available", "Out of Stock")
)

# Boost scoring based on condition
select(
    "title",
    score=fn.semantic_similarity("title", "novel").boost(
        field("is_bestseller"),
        2.0  # 2x boost for bestsellers
    )
)

# Min/max
select(
    "title",
    min_value=field("price").min(field("msrp")),
    max_value=field("rating").max(4.5)
)

match()

Perform keyword search for documents containing specific keywords or phrases. Use with fields that have a keyword_index().
from topk_sdk.query import match, filter, select, fn

# Simple keyword match
query = filter(match("science"))

# Match on specific field
query = filter(match("fiction", field="genre"))

# Match with weight
query = filter(match("bestseller", weight=2.0))

# Match all terms (AND)
query = filter(match("science fiction", all=True))

# Combine with BM25 scoring
query = select(
    "title",
    text_score=fn.bm25_score()
).filter(
    match("dystopian") | match("future")
).topk(field("text_score"), 10)
token
str
required
The keyword or phrase to search for.
field
str
The specific field to search in. If not provided, searches all keyword-indexed fields.
weight
float
default:"1.0"
Weight for this term in scoring.
all
bool
default:"false"
If True, all tokens must match (AND). If False, any token can match (OR).
return
LogicalExpr
A keyword match expression.

Logical Combinators

all()

Create a logical AND expression combining multiple conditions.
from topk_sdk.query import field, all, filter

query = filter(all([
    field("published_year") >= 1900,
    field("published_year") <= 2000,
    field("title").is_not_null()
]))
exprs
Sequence[LogicalExpr]
required
List of expressions to combine with AND.
return
LogicalExpr
A logical AND expression.

any()

Create a logical OR expression combining multiple conditions.
from topk_sdk.query import field, any, filter

query = filter(any([
    field("genre") == "fiction",
    field("genre") == "mystery",
    field("genre") == "thriller"
]))
exprs
Sequence[LogicalExpr]
required
List of expressions to combine with OR.
return
LogicalExpr
A logical OR expression.

not_()

Negate a logical expression.
from topk_sdk.query import field, not_, filter

query = filter(not_(field("title").contains("Catcher")))
expr
LogicalExpr
required
The expression to negate.
return
LogicalExpr
The negated expression.

Function Expressions

The fn class provides functions for semantic similarity, vector distance, and keyword scoring.

fn.semantic_similarity()

Calculate semantic similarity between a field and a query string. Requires a semantic_index() on the field.
from topk_sdk.query import select, field, fn

results = client.collection("books").query(
    select(
        "title",
        similarity=fn.semantic_similarity("title", "animal story")
    ).topk(field("similarity"), 10)
)
field
str
required
The field name with a semantic index.
query
str
required
The query text to compare against.
return
FunctionExpr
A semantic similarity scoring function.

fn.vector_distance()

Calculate vector distance between a field and a query vector. Requires a vector_index() on the field.
from topk_sdk.query import select, field, fn

results = client.collection("books").query(
    select(
        "title",
        distance=fn.vector_distance(
            "title_embedding",
            [0.1, 0.2, 0.3, ...]  # Your embedding vector
        )
    ).topk(field("distance"), 10)
)
field
str
required
The field name containing vectors with a vector index.
vector
list[int] | list[float] | dict[int, float] | dict[int, int] | SparseVector | List
required
The query vector. Can be:
  • Dense vector: [0.1, 0.2, 0.3, ...]
  • Sparse vector: {0: 0.5, 10: 0.8, 50: 0.3}
  • SparseVector or List instance from topk_sdk.data
skip_refine
bool
default:"false"
Skip the refinement step for approximate search.
return
FunctionExpr
A vector distance scoring function.

fn.multi_vector_distance()

Calculate multi-vector distance between a matrix field and a query matrix. Requires a multi_vector_index() on the field.
from topk_sdk.query import select, field, fn

results = client.collection("documents").query(
    select(
        "title",
        distance=fn.multi_vector_distance(
            "colbert_embeddings",
            [[0.1, 0.2, ...], [0.3, 0.4, ...], ...],
            candidates=100
        )
    ).topk(field("distance"), 10)
)
field
str
required
The field name containing matrices with a multi-vector index.
matrix
Matrix | ndarray | list[list[float]] | list[list[int]]
required
The query matrix. Can be:
  • List of lists: [[0.1, 0.2], [0.3, 0.4]]
  • Numpy array: np.array([[0.1, 0.2], [0.3, 0.4]])
  • Matrix instance from topk_sdk.data
candidates
int
Limit the number of candidate vectors considered during search.
return
FunctionExpr
A multi-vector distance scoring function.

fn.bm25_score()

Calculate BM25 score for keyword search. Use with match() filters.
from topk_sdk.query import select, field, fn, match

results = client.collection("books").query(
    select(
        "title",
        text_score=fn.bm25_score()
    )
    .filter(match("dystopian") | match("future"))
    .topk(field("text_score"), 10)
)
return
FunctionExpr
A BM25 scoring function.

Helper Functions

min()

Compute the minimum of two expressions.
from topk_sdk.query import field, min, select

query = select(
    "title",
    best_price=min(field("price"), field("sale_price"))
)
left
int | float | str | LogicalExpr
required
First value to compare.
right
int | float | str | LogicalExpr
required
Second value to compare.
return
LogicalExpr
The minimum value.

max()

Compute the maximum of two expressions.
from topk_sdk.query import field, max, select

query = select(
    "title",
    max_rating=max(field("user_rating"), field("critic_rating"))
)
left
int | float | str | LogicalExpr
required
First value to compare.
right
int | float | str | LogicalExpr
required
Second value to compare.
return
LogicalExpr
The maximum value.

abs()

Compute the absolute value of an expression.
from topk_sdk.query import field, abs, filter

query = filter(abs(field("temperature")) > 30)
expr
LogicalExpr
required
The expression to compute the absolute value of.
return
LogicalExpr
The absolute value.

Complete Examples

from topk_sdk.query import select, field, fn

results = client.collection("books").query(
    select(
        "title", "author", "year",
        similarity=fn.semantic_similarity("title", "space exploration")
    )
    .filter(field("year") > 1950)
    .topk(field("similarity"), 10)
)

Hybrid Search (Semantic + Keyword)

from topk_sdk.query import select, field, fn, match

results = client.collection("books").query(
    select(
        "title",
        semantic_score=fn.semantic_similarity("title", "artificial intelligence"),
        keyword_score=fn.bm25_score(),
        hybrid_score=fn.semantic_similarity("title", "AI") + fn.bm25_score()
    )
    .filter(match("technology") | match("computer"))
    .topk(field("hybrid_score"), 10)
)

Vector Search with Filters

from topk_sdk.query import select, field, fn

results = client.collection("products").query(
    select(
        "name", "price", "category",
        similarity=fn.vector_distance("image_embedding", user_query_vector)
    )
    .filter(
        (field("price") < 100) & 
        (field("in_stock") == True) &
        field("category").in_(["electronics", "gadgets"])
    )
    .topk(field("similarity"), 20)
)

Multi-Vector Search (ColBERT)

from topk_sdk.query import select, field, fn
import numpy as np

query_matrix = np.random.rand(10, 128).astype(np.float32)

results = client.collection("documents").query(
    select(
        "title", "content",
        relevance=fn.multi_vector_distance(
            "colbert_embeddings",
            query_matrix,
            candidates=500
        )
    ).topk(field("relevance"), 10)
)

Build docs developers (and LLMs) love