Overview
SELECT is used to query data in Materialize. It can be used interactively for ad hoc queries or embedded in view definitions.
Syntax
[WITH <cte> [, ...]]
SELECT [DISTINCT | ALL] <select_expr> [, ...]
FROM <table_expr> [, ...]
[WHERE <condition>]
[GROUP BY <expr> [, ...]]
[HAVING <condition>]
[ORDER BY <expr> [ASC | DESC] [NULLS {FIRST | LAST}] [, ...]]
[LIMIT <count>]
[OFFSET <start>]
[OPTIONS (<option> = <value> [, ...])];
Key Concepts
Query Execution Modes
Materialize executes SELECT queries in different ways:
Reading from Indexes (Fastest)
-- Create indexed view
CREATE VIEW user_stats AS
SELECT user_id, COUNT(*) as order_count
FROM orders
GROUP BY user_id;
CREATE INDEX user_stats_idx ON user_stats (user_id);
-- Fast lookup from index
SELECT * FROM user_stats WHERE user_id = 123;
Reading from Materialized Views (Fast)
CREATE MATERIALIZED VIEW daily_totals AS
SELECT date, SUM(amount) as total
FROM orders
GROUP BY date;
-- Read from durable storage
SELECT * FROM daily_totals;
Ad Hoc Queries (Computes on demand)
-- Creates ephemeral dataflow
SELECT product_id, AVG(price)
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY product_id;
Basic Queries
Simple Selection
SELECT id, name, email
FROM users;
With WHERE Clause
SELECT *
FROM orders
WHERE amount > 100
AND status = 'completed';
With ORDER BY
SELECT name, total_spent
FROM customers
ORDER BY total_spent DESC
LIMIT 10;
With DISTINCT
SELECT DISTINCT country
FROM customers
ORDER BY country;
Aggregations
Basic Aggregation
SELECT
COUNT(*) as order_count,
SUM(amount) as total_revenue,
AVG(amount) as avg_order_value,
MIN(amount) as min_order,
MAX(amount) as max_order
FROM orders;
GROUP BY
SELECT
product_id,
COUNT(*) as times_ordered,
SUM(quantity) as total_quantity,
AVG(price) as avg_price
FROM order_items
GROUP BY product_id;
HAVING Clause
SELECT
customer_id,
COUNT(*) as order_count
FROM orders
GROUP BY customer_id
HAVING COUNT(*) > 10;
Joins
Inner Join
SELECT
c.name,
o.order_id,
o.amount
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id;
Left Join
SELECT
c.name,
COUNT(o.order_id) as order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.name;
Multiple Joins
SELECT
c.name as customer_name,
p.name as product_name,
oi.quantity,
oi.price
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id;
Window Functions
ROW_NUMBER
SELECT
name,
amount,
ROW_NUMBER() OVER (ORDER BY amount DESC) as rank
FROM orders;
RANK and DENSE_RANK
SELECT
product_id,
revenue,
RANK() OVER (ORDER BY revenue DESC) as rank,
DENSE_RANK() OVER (ORDER BY revenue DESC) as dense_rank
FROM product_revenue;
Partition By
SELECT
category,
product_name,
sales,
RANK() OVER (PARTITION BY category ORDER BY sales DESC) as category_rank
FROM products;
LAG and LEAD
SELECT
date,
revenue,
LAG(revenue) OVER (ORDER BY date) as prev_day_revenue,
LEAD(revenue) OVER (ORDER BY date) as next_day_revenue
FROM daily_revenue;
Common Table Expressions (CTEs)
Basic CTE
WITH high_value_customers AS (
SELECT customer_id, SUM(amount) as total_spent
FROM orders
GROUP BY customer_id
HAVING SUM(amount) > 10000
)
SELECT
c.name,
hvc.total_spent
FROM high_value_customers hvc
JOIN customers c ON hvc.customer_id = c.customer_id;
Multiple CTEs
WITH
monthly_sales AS (
SELECT
date_trunc('month', order_date) as month,
SUM(amount) as total
FROM orders
GROUP BY month
),
avg_monthly AS (
SELECT AVG(total) as avg_total
FROM monthly_sales
)
SELECT
ms.month,
ms.total,
ms.total - am.avg_total as diff_from_avg
FROM monthly_sales ms
CROSS JOIN avg_monthly am;
Recursive CTEs
See Recursive CTEs for detailed documentation.
Subqueries
Scalar Subquery
SELECT
name,
amount,
amount - (SELECT AVG(amount) FROM orders) as diff_from_avg
FROM orders;
IN Subquery
SELECT *
FROM products
WHERE category_id IN (
SELECT category_id
FROM categories
WHERE active = true
);
EXISTS Subquery
SELECT *
FROM customers c
WHERE EXISTS (
SELECT 1
FROM orders o
WHERE o.customer_id = c.customer_id
AND o.order_date >= '2024-01-01'
);
Query Hints
Optimize query execution with hints:
SELECT
user_id,
COUNT(*) as event_count
FROM events
GROUP BY user_id
OPTIONS (AGGREGATE INPUT GROUP SIZE = 100);
AGGREGATE INPUT GROUP SIZE
Expected rows per group key in aggregations. Helps optimize MIN and MAX expressions.
DISTINCT ON INPUT GROUP SIZE
Expected rows per group in DISTINCT ON. Optimizes Top-K patterns.
Expected rows per group for LIMIT. Optimizes Top-K patterns.
Set Operations
UNION
SELECT email FROM customers
UNION
SELECT email FROM prospects;
UNION ALL
SELECT 'customer' as type, email FROM customers
UNION ALL
SELECT 'prospect' as type, email FROM prospects;
INTERSECT
SELECT email FROM customers
INTERSECT
SELECT email FROM newsletter_subscribers;
EXCEPT
SELECT email FROM customers
EXCEPT
SELECT email FROM unsubscribed;
Advanced Examples
Top N Per Group
WITH ranked AS (
SELECT
category,
product_name,
sales,
ROW_NUMBER() OVER (
PARTITION BY category
ORDER BY sales DESC
) as rank
FROM products
)
SELECT category, product_name, sales
FROM ranked
WHERE rank <= 5;
Moving Average
SELECT
date,
revenue,
AVG(revenue) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as moving_avg_7_days
FROM daily_revenue;
Cumulative Sum
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) as cumulative_revenue
FROM orders;
Pivot Table (Manual)
SELECT
product_id,
SUM(CASE WHEN region = 'North' THEN sales ELSE 0 END) as north_sales,
SUM(CASE WHEN region = 'South' THEN sales ELSE 0 END) as south_sales,
SUM(CASE WHEN region = 'East' THEN sales ELSE 0 END) as east_sales,
SUM(CASE WHEN region = 'West' THEN sales ELSE 0 END) as west_sales
FROM regional_sales
GROUP BY product_id;
Use Indexes
-- Create indexed view for frequent queries
CREATE VIEW user_orders AS
SELECT user_id, COUNT(*) as order_count
FROM orders
GROUP BY user_id;
CREATE INDEX user_orders_idx ON user_orders (user_id);
Use Materialized Views
-- For expensive computations
CREATE MATERIALIZED VIEW product_stats AS
SELECT
p.product_id,
p.name,
COUNT(oi.order_id) as times_ordered,
SUM(oi.quantity) as total_quantity
FROM products p
LEFT JOIN order_items oi ON p.product_id = oi.product_id
GROUP BY p.product_id, p.name;
Filter Early
-- Good: Filter before join
SELECT *
FROM (
SELECT * FROM orders WHERE order_date >= '2024-01-01'
) recent_orders
JOIN customers ON recent_orders.customer_id = customers.customer_id;
Best Practices
-
Create Indexes for Frequent Queries
CREATE INDEX ON my_view (frequently_filtered_column);
-
Use Materialized Views for Complex Queries
CREATE MATERIALIZED VIEW complex_report AS
SELECT ... -- complex query
-
Use CTEs for Readability
WITH step1 AS (...), step2 AS (...)
SELECT ... FROM step2;
-
**Avoid SELECT ***
-- Better: specify columns
SELECT id, name, email FROM users;
-
Use Query Hints for Optimization
OPTIONS (AGGREGATE INPUT GROUP SIZE = 1000)
Related Pages