USE REFERENCE dev IN nessie;-- Create a table in dev branchCREATE TABLE nessie.db.experiments ( id bigint, data string, created_at timestamp) USING iceberg;-- Insert dataINSERT INTO nessie.db.experiments VALUES (1, 'test data', current_timestamp());-- Changes are isolated to dev branch
Nessie enables loosely coupled multi-table transactions using branches:
-- Create a branch for the transactionCREATE BRANCH etl_job IN nessie FROM main;USE REFERENCE etl_job IN nessie;-- Perform multiple operationsINSERT INTO nessie.warehouse.inventory SELECT * FROM staging.new_inventory;UPDATE nessie.warehouse.products SET stock = stock - sold_quantity;INSERT INTO nessie.analytics.sales_summarySELECT product_id, SUM(quantity), SUM(revenue)FROM nessie.warehouse.salesGROUP BY product_id;-- Atomically merge all changesMERGE BRANCH etl_job INTO main IN nessie;
While this provides atomic visibility of changes, each operation is still a separate Iceberg transaction. This is different from true ACID multi-table transactions.
Test schema changes or partition evolution safely:
-- Create experiment branchCREATE BRANCH partition_experiment IN nessie FROM main;USE REFERENCE partition_experiment IN nessie;-- Test partition evolutionALTER TABLE nessie.db.events SET PARTITION SPEC (days(created_at));-- Run performance testsSELECT COUNT(*) FROM nessie.db.events WHERE created_at >= current_date() - interval 7 days;-- If successful, merge; otherwise drop the branchMERGE BRANCH partition_experiment INTO main IN nessie;-- ORDROP BRANCH partition_experiment IN nessie;
-- List all commitsSHOW LOG IN nessie;-- Use a specific commit hashUSE REFERENCE '0123456789abcdef' IN nessie;-- Query all tables at that point in timeSELECT * FROM nessie.db.table1;SELECT * FROM nessie.db.table2;
Nessie provides additional SQL commands for repository management:
-- Show current referenceSHOW CURRENT REFERENCE IN nessie;-- Show commit logSHOW LOG IN nessie;-- Show log for specific referenceSHOW LOG dev IN nessie;-- List all references (branches and tags)LIST REFERENCES IN nessie;-- Create reference from specific hashCREATE TAG snapshot_v1 IN nessie FROM main AT '0123456789abcdef';-- Assign reference to specific hashASSIGN BRANCH main TO '0123456789abcdef' IN nessie;
Create a branch for each pipeline run, validate results, and merge only if tests pass:
CREATE BRANCH pipeline_run_123 IN nessie FROM main;-- Run pipeline transformations-- Run data quality checks-- If passed: MERGE BRANCH pipeline_run_123 INTO main IN nessie;
Multi-table ETL
Update fact and dimension tables atomically:
CREATE BRANCH etl_2024_03_15 IN nessie FROM main;-- Update dim_customer, dim_product, fact_salesMERGE BRANCH etl_2024_03_15 INTO main IN nessie;
Environment Isolation
Maintain separate dev, staging, and prod branches:
CREATE BRANCH dev IN nessie FROM main;CREATE BRANCH staging IN nessie FROM main;-- Promote: MERGE BRANCH dev INTO staging-- Release: MERGE BRANCH staging INTO main
Compliance & Auditing
Tag snapshots for regulatory requirements:
CREATE TAG quarter_end_2024_q1 IN nessie FROM main;-- Access data later for auditUSE REFERENCE quarter_end_2024_q1 IN nessie;