Skip to main content

Overview

INSERT adds new rows to user-defined tables. It supports inserting literal values or results from SELECT queries.

Syntax

INSERT INTO <table_name> [(<column> [, ...])]
  { VALUES (<value> [, ...]) [, ...] | SELECT ... };

Parameters

table_name
identifier
required
Name of the table to insert into
column
identifier
Column names for the insert. If omitted, all columns must be provided in order.
value
expression
required
Values to insert. Can be literals, expressions, or NULL.

Basic INSERT

Insert Single Row

CREATE TABLE users (
  id integer,
  name text,
  email text
);

INSERT INTO users VALUES (1, 'Alice', '[email protected]');

Insert Multiple Rows

INSERT INTO users VALUES
  (2, 'Bob', '[email protected]'),
  (3, 'Charlie', '[email protected]'),
  (4, 'Diana', '[email protected]');

Column Specification

Specify Columns

-- Specify column order
INSERT INTO users (name, id, email)
VALUES ('Eve', 5, '[email protected]');

Partial Columns

CREATE TABLE products (
  id integer NOT NULL,
  name text NOT NULL,
  description text,
  price numeric
);

-- Only provide required columns
INSERT INTO products (id, name)
VALUES (1, 'Widget');
-- description and price will be NULL

INSERT with NULL Values

CREATE TABLE events (
  id integer NOT NULL,
  event_type text NOT NULL,
  metadata jsonb
);

INSERT INTO events VALUES
  (1, 'login', '{"ip": "192.168.1.1"}'::jsonb),
  (2, 'logout', NULL);  -- NULL metadata

INSERT with DEFAULT Values

CREATE TABLE logs (
  id integer NOT NULL,
  message text NOT NULL,
  created_at timestamp DEFAULT now()
);

-- created_at will use DEFAULT value
INSERT INTO logs (id, message)
VALUES (1, 'Application started');

-- Explicitly use DEFAULT
INSERT INTO logs VALUES (2, 'User logged in', DEFAULT);

INSERT … SELECT

Insert results from a query:
CREATE TABLE customers (
  id integer,
  name text,
  email text
);

CREATE TABLE archived_customers (
  id integer,
  name text,
  email text,
  archived_date date
);

-- Insert from SELECT
INSERT INTO archived_customers (id, name, email)
SELECT id, name, email
FROM customers
WHERE last_order_date < '2020-01-01';

INSERT with Transformation

CREATE TABLE user_summary (
  user_id integer,
  order_count integer,
  total_spent numeric
);

INSERT INTO user_summary
SELECT
  customer_id,
  COUNT(*) as order_count,
  SUM(amount) as total_spent
FROM orders
GROUP BY customer_id;

Examples

Load Dimensional Data

CREATE TABLE product_categories (
  category_id integer NOT NULL,
  category_name text NOT NULL,
  description text
);

INSERT INTO product_categories VALUES
  (1, 'Electronics', 'Electronic devices and accessories'),
  (2, 'Clothing', 'Apparel and fashion'),
  (3, 'Home & Garden', 'Home improvement and garden supplies'),
  (4, 'Books', 'Physical and digital books');

Insert Complex Types

CREATE TABLE user_profiles (
  user_id integer NOT NULL,
  preferences jsonb,
  tags text[]
);

INSERT INTO user_profiles VALUES
  (
    1,
    '{"theme": "dark", "language": "en", "notifications": true}'::jsonb,
    ARRAY['premium', 'verified', 'early-adopter']
  );

Insert from Another Table

CREATE TABLE daily_metrics (
  date date,
  metric_name text,
  value numeric
);

CREATE TABLE monthly_metrics (
  month date,
  metric_name text,
  total_value numeric
);

-- Aggregate and insert
INSERT INTO monthly_metrics
SELECT
  date_trunc('month', date) as month,
  metric_name,
  SUM(value) as total_value
FROM daily_metrics
GROUP BY month, metric_name;

Bulk Load from External Data

-- Assuming you have a temporary source
CREATE SOURCE csv_import
  FROM LOAD GENERATOR COUNTER;

INSERT INTO target_table (col1, col2, col3)
SELECT
  field1,
  field2,
  field3
FROM csv_import;

Known Limitations

Performance Limitations:
  • INSERT has low performance compared to sources
  • Blocks other INSERT, UPDATE, DELETE operations
  • Not optimized for high-throughput writes

INSERT … SELECT Restrictions

-- Cannot reference sources directly
INSERT INTO my_table
SELECT * FROM kafka_source;  -- ERROR

-- Can only reference tables
INSERT INTO my_table
SELECT * FROM other_table;  -- OK

No Transaction Support

-- This will fail
BEGIN;
INSERT INTO users VALUES (1, 'Alice', '[email protected]');
COMMIT;  -- ERROR: INSERT cannot be used in transactions

Best Practices

  1. Use for Dimensional Data
    -- Good: Small reference tables
    INSERT INTO countries VALUES
      ('US', 'United States'),
      ('CA', 'Canada'),
      ('MX', 'Mexico');
    
  2. Batch Multiple Rows
    -- Better: Single INSERT with multiple rows
    INSERT INTO logs VALUES
      (1, 'Message 1'),
      (2, 'Message 2'),
      (3, 'Message 3');
    
    -- Avoid: Multiple single-row INSERTs
    INSERT INTO logs VALUES (1, 'Message 1');
    INSERT INTO logs VALUES (2, 'Message 2');
    INSERT INTO logs VALUES (3, 'Message 3');
    
  3. Specify Columns
    -- Good: Explicit columns
    INSERT INTO users (id, name) VALUES (1, 'Alice');
    
    -- Risky: Relying on column order
    INSERT INTO users VALUES (1, 'Alice', NULL, NULL);
    
  4. Use Sources for High Volume
    -- For high-throughput data, use sources instead
    CREATE SOURCE events
      FROM KAFKA CONNECTION kafka_conn (TOPIC 'events')
      FORMAT JSON;
    
  5. Check Constraints
    -- Ensure NOT NULL columns are provided
    INSERT INTO users (id, name, email)  -- All NOT NULL
    VALUES (1, 'Alice', '[email protected]');
    

Alternatives for High-Volume Data

For high-volume data ingestion, use sources:
-- Instead of many INSERTs, use a source
CREATE SOURCE orders_stream
  FROM KAFKA CONNECTION kafka_conn (TOPIC 'orders')
  FORMAT JSON;

CREATE MATERIALIZED VIEW orders AS
  SELECT
    (data->>'order_id')::integer as order_id,
    (data->>'customer_id')::integer as customer_id,
    (data->>'amount')::numeric as amount
  FROM orders_stream;

Build docs developers (and LLMs) love