By default, mo.sql() uses DuckDB, a fast in-process SQL database:
import marimo as moimport pandas as pd# Create a DataFramedf = pd.DataFrame({ "name": ["Alice", "Bob", "Charlie"], "age": [25, 30, 35], "city": ["NYC", "SF", "LA"]})# Query the DataFrame directlyresult = mo.sql(""" SELECT name, age FROM df WHERE age >= 30 ORDER BY age DESC""")
DuckDB can directly query DataFrames in your Python namespace - just reference them by variable name!
import marimo as moimport ibis# Connect to any Ibis-supported backendconn = ibis.connect("duckdb://my_database.db")result = mo.sql( "SELECT product, SUM(sales) as total FROM orders GROUP BY product", engine=conn)
import marimo as moimport pandas as pdsales = pd.read_csv("sales.csv")products = pd.read_csv("products.csv")# Join DataFrames with SQLresult = mo.sql(""" SELECT p.name, SUM(s.amount) as total_sales FROM sales s JOIN products p ON s.product_id = p.id GROUP BY p.name ORDER BY total_sales DESC""")
import marimo as moimport polars as pl# Returns polars.LazyFrame - no data loaded yetresult = mo.sql("SELECT * FROM 'huge_file.parquet'")type(result) # polars.LazyFrame# Data is only loaded when neededfiltered = result.filter(pl.col("value") > 100).collect()
Load DuckDB extensions for additional functionality:
import duckdbimport marimo as moconn = duckdb.connect()# Install and load extensionsconn.execute("INSTALL httpfs")conn.execute("LOAD httpfs")conn.execute("INSTALL spatial")conn.execute("LOAD spatial")# Now you can query S3 and use spatial functionsmo.sql(""" SELECT ST_AsText(geometry) FROM 's3://bucket/geo_data.parquet'""", engine=conn)
import marimo as moresult = mo.sql(""" WITH high_value_customers AS ( SELECT customer_id, SUM(amount) as total FROM orders GROUP BY customer_id HAVING total > 1000 ) SELECT c.name, hvc.total FROM high_value_customers hvc JOIN customers c ON hvc.customer_id = c.id ORDER BY hvc.total DESC""")
import marimo as moresult = mo.sql(""" SELECT date, revenue, SUM(revenue) OVER ( ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW ) as rolling_7day FROM daily_sales ORDER BY date""")
import marimo as moimport pandas as pd# 1. Load datasales = pd.read_csv("sales.csv")customers = pd.read_csv("customers.csv")# 2. Transform with SQLsummary = mo.sql(""" SELECT c.segment, COUNT(DISTINCT s.customer_id) as customer_count, SUM(s.amount) as total_revenue, AVG(s.amount) as avg_order_value FROM sales s JOIN customers c ON s.customer_id = c.id WHERE s.date >= '2024-01-01' GROUP BY c.segment ORDER BY total_revenue DESC""")# 3. Visualize resultsimport altair as altchart = ( alt.Chart(summary) .mark_bar() .encode( x="segment", y="total_revenue", color="segment" ))mo.ui.altair_chart(chart)
This workflow demonstrates loading data, transforming it with SQL, and visualizing the results - all with reactive updates.