Learn how to use semantic operators for LLM-powered data transformations
Semantic operators are Fenic’s core capability for LLM-powered data transformations. They allow you to apply natural language instructions to transform, extract, classify, and process data at scale.
Semantic operators offload inference work from your agent’s context window to Fenic’s execution layer. Your agent receives structured results without consuming tokens for the transformation itself.
from typing import Listclass Triple(BaseModel): subject: str = Field(description="The subject of the triple") predicate: str = Field(description="The predicate or relation") object: str = Field(description="The object of the triple")class KGResult(BaseModel): triples: List[Triple] = Field(description="List of extracted knowledge graph triples") entities: list[str] = Field(description="Flat list of all detected named entities")df.select( fc.semantic.extract("blurb", KGResult).alias("kg")).unnest("kg").explode("triples")
# Summarize with templatedf.select( fc.semantic.map( "Write a compelling one-line description for {{ name }}: {{ details }}", name=fc.col("name"), details=fc.col("details") ).alias("description"))
from fenic.core.types import MapExampleCollection, MapExampleexamples = MapExampleCollection()examples.create_example(MapExample( input={"name": "GlowMate", "details": "A rechargeable bedside lamp with adjustable color temperatures"}, output="The modern touch-controlled lamp for better sleep and style."))examples.create_example(MapExample( input={"name": "AquaPure", "details": "A compact water filter that attaches to your faucet"}, output="Clean, great-tasting water straight from your tap."))df.select( fc.semantic.map( "Write a compelling one-line description for {{ name }}: {{ details }}", name=fc.col("name"), details=fc.col("details"), examples=examples ).alias("tagline"))
from textwrap import dedent# Filter products using semantic reasoningwireless_products = df.filter( fc.semantic.predicate( dedent('''\ Product: {{ description }} Is this product wireless or battery-powered?'''), description=fc.col("product_description") ))
from fenic.core.types import PredicateExampleCollection, PredicateExampleexamples = PredicateExampleCollection()examples.create_example(PredicateExample( input={"ticket": "I was charged twice for my subscription"}, output=True))examples.create_example(PredicateExample( input={"ticket": "How do I reset my password?"}, output=False))billing_tickets = df.filter( fc.semantic.predicate( "Ticket: {{ ticket }}\nThis ticket is about billing.", ticket=fc.col("ticket_text"), examples=examples ))
Aggregate multiple texts into a single summary or synthesis.
# Group and reducedf.group_by("category").agg( fc.semantic.reduce( "Summarize these documents in 2-3 sentences", fc.col("document_text") ).alias("summary"))# With orderingdf.group_by("conversation_id").agg( fc.semantic.reduce( "Summarize this conversation chronologically", fc.col("message"), order_by=[fc.col("timestamp")] ).alias("summary"))# With group contextdf.group_by("department", "region").agg( fc.semantic.reduce( "Summarize these {{department}} reports from {{region}}", fc.col("document_text"), group_context={ "department": fc.col("department"), "region": fc.col("region") } ).alias("summary"))
# Match users to contentuser_article_matches = users_df.semantic.join( articles_df, predicate="A person with interests '{{left_on}}' would be interested in '{{right_on}}'", left_on=fc.col("interests"), right_on=fc.col("description"))
# Use specific modelfc.semantic.extract( "text", MySchema, model_alias="gpt-4o-mini")# Use model profilesfrom fenic.core.types.semantic import ModelAliasfc.semantic.map( "Analyze: {{ text }}", text=fc.col("text"), model_alias=ModelAlias(name="o4", profile="thorough"))