What are Common Table Expressions (CTEs)?

Answer

Common Table Expressions (CTEs) are temporary named result sets that exist only within the scope of a single SQL statement. They provide a way to write more readable and maintainable complex queries.

Basic Syntax

WITH cte_name (column1, column2, ...) AS (
    -- CTE query definition
    SELECT column1, column2, ...
    FROM table_name
    WHERE condition
)
-- Main query using the CTE
SELECT * FROM cte_name WHERE condition;

Simple CTE Example

-- Without CTE (subquery in FROM clause)
SELECT dept_name, avg_salary
FROM (
    SELECT department_id, AVG(salary) as avg_salary
    FROM employees
    GROUP BY department_id
) emp_avg
JOIN departments d ON emp_avg.department_id = d.department_id
WHERE avg_salary > 50000;

-- With CTE (more readable)
WITH department_averages AS (
    SELECT department_id, AVG(salary) as avg_salary
    FROM employees
    GROUP BY department_id
)
SELECT d.dept_name, da.avg_salary
FROM department_averages da
JOIN departments d ON da.department_id = d.department_id
WHERE da.avg_salary > 50000;

Multiple CTEs

WITH 
high_earners AS (
    SELECT employee_id, name, salary, department_id
    FROM employees
    WHERE salary > 75000
),
department_stats AS (
    SELECT 
        department_id,
        COUNT(*) as total_employees,
        AVG(salary) as avg_salary
    FROM employees
    GROUP BY department_id
)
SELECT 
    he.name,
    he.salary,
    ds.avg_salary,
    ds.total_employees
FROM high_earners he
JOIN department_stats ds ON he.department_id = ds.department_id;

Recursive CTEs

Purpose: Handle hierarchical or tree-structured data.

Employee Hierarchy Example

-- Sample hierarchical data
CREATE TABLE employees (
    employee_id INT PRIMARY KEY,
    name VARCHAR(100),
    manager_id INT,
    salary DECIMAL(10,2)
);

-- Recursive CTE to find all subordinates
WITH RECURSIVE employee_hierarchy AS (
    -- Anchor: Start with top-level managers
    SELECT 
        employee_id,
        name,
        manager_id,
        salary,
        0 as level,
        CAST(name AS VARCHAR(1000)) as hierarchy_path
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- Recursive: Find direct reports
    SELECT 
        e.employee_id,
        e.name,
        e.manager_id,
        e.salary,
        eh.level + 1,
        CONCAT(eh.hierarchy_path, ' -> ', e.name)
    FROM employees e
    JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
)
SELECT 
    employee_id,
    name,
    level,
    hierarchy_path
FROM employee_hierarchy
ORDER BY level, name;

Organizational Chart

WITH RECURSIVE org_chart AS (
    -- Find CEO (no manager)
    SELECT 
        employee_id,
        name,
        manager_id,
        1 as level,
        name as path
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- Find all subordinates recursively
    SELECT 
        e.employee_id,
        e.name,
        e.manager_id,
        oc.level + 1,
        CONCAT(oc.path, ' > ', e.name)
    FROM employees e
    JOIN org_chart oc ON e.manager_id = oc.employee_id
    WHERE oc.level < 10  -- Prevent infinite recursion
)
SELECT * FROM org_chart ORDER BY level, name;

Practical Business Examples

Sales Analysis

WITH 
monthly_sales AS (
    SELECT 
        DATE_FORMAT(order_date, '%Y-%m') as month,
        SUM(total_amount) as monthly_total
    FROM orders
    WHERE order_date >= '2024-01-01'
    GROUP BY DATE_FORMAT(order_date, '%Y-%m')
),
sales_with_growth AS (
    SELECT 
        month,
        monthly_total,
        LAG(monthly_total) OVER (ORDER BY month) as prev_month_total,
        monthly_total - LAG(monthly_total) OVER (ORDER BY month) as growth
    FROM monthly_sales
)
SELECT 
    month,
    monthly_total,
    prev_month_total,
    growth,
    ROUND((growth / prev_month_total) * 100, 2) as growth_percentage
FROM sales_with_growth
WHERE prev_month_total IS NOT NULL
ORDER BY month;

Customer Segmentation

WITH 
customer_metrics AS (
    SELECT 
        c.customer_id,
        c.customer_name,
        COUNT(o.order_id) as order_count,
        SUM(o.total_amount) as total_spent,
        AVG(o.total_amount) as avg_order_value,
        MAX(o.order_date) as last_order_date
    FROM customers c
    LEFT JOIN orders o ON c.customer_id = o.customer_id
    GROUP BY c.customer_id, c.customer_name
),
customer_segments AS (
    SELECT 
        *,
        CASE 
            WHEN total_spent > 10000 AND order_count > 20 THEN 'VIP'
            WHEN total_spent > 5000 AND order_count > 10 THEN 'Premium'
            WHEN total_spent > 1000 AND order_count > 5 THEN 'Regular'
            WHEN order_count > 0 THEN 'Occasional'
            ELSE 'Inactive'
        END as segment
    FROM customer_metrics
)
SELECT 
    segment,
    COUNT(*) as customer_count,
    AVG(total_spent) as avg_total_spent,
    AVG(order_count) as avg_order_count
FROM customer_segments
GROUP BY segment
ORDER BY avg_total_spent DESC;

CTE vs Subqueries vs Temporary Tables

CTE Advantages:

  • Readability: More readable than nested subqueries
  • Reusability: Can reference the same CTE multiple times
  • Recursion: Supports recursive operations
  • Maintainability: Easier to debug and modify

CTE vs Subquery

-- Complex subquery (hard to read)
SELECT *
FROM (
    SELECT department_id, AVG(salary) as avg_sal
    FROM employees
    GROUP BY department_id
) dept_avg
WHERE dept_avg.avg_sal > (
    SELECT AVG(salary) * 1.1
    FROM employees
);

-- CTE version (more readable)
WITH 
company_avg AS (
    SELECT AVG(salary) * 1.1 as threshold
    FROM employees
),
dept_averages AS (
    SELECT department_id, AVG(salary) as avg_sal
    FROM employees
    GROUP BY department_id
)
SELECT *
FROM dept_averages da
CROSS JOIN company_avg ca
WHERE da.avg_sal > ca.threshold;

CTE vs Temporary Table

-- Temporary table approach
CREATE TEMPORARY TABLE temp_dept_stats AS
SELECT department_id, AVG(salary) as avg_salary
FROM employees
GROUP BY department_id;

SELECT * FROM temp_dept_stats WHERE avg_salary > 50000;
DROP TEMPORARY TABLE temp_dept_stats;

-- CTE approach (simpler)
WITH dept_stats AS (
    SELECT department_id, AVG(salary) as avg_salary
    FROM employees
    GROUP BY department_id
)
SELECT * FROM dept_stats WHERE avg_salary > 50000;

Performance Considerations

CTE Materialization

-- CTE may be materialized (stored temporarily)
WITH expensive_cte AS (
    SELECT customer_id, SUM(order_amount) as total
    FROM large_orders_table
    GROUP BY customer_id
)
SELECT c1.customer_id, c1.total
FROM expensive_cte c1
JOIN expensive_cte c2 ON c1.total = c2.total  -- CTE used twice
WHERE c1.customer_id != c2.customer_id;

Optimization Tips

-- Use indexes on CTE source tables
WITH recent_orders AS (
    SELECT customer_id, order_date, total_amount
    FROM orders
    WHERE order_date >= '2024-01-01'  -- Make sure there's an index on order_date
)
SELECT * FROM recent_orders WHERE customer_id = 12345;

Advanced CTE Patterns

Running Totals

WITH daily_sales AS (
    SELECT 
        order_date,
        SUM(total_amount) as daily_total
    FROM orders
    GROUP BY order_date
),
running_totals AS (
    SELECT 
        order_date,
        daily_total,
        SUM(daily_total) OVER (ORDER BY order_date) as running_total
    FROM daily_sales
)
SELECT * FROM running_totals ORDER BY order_date;

Data Validation

WITH data_quality_checks AS (
    SELECT 
        'Missing emails' as check_name,
        COUNT(*) as issue_count
    FROM customers
    WHERE email IS NULL OR email = ''
    
    UNION ALL
    
    SELECT 
        'Negative prices' as check_name,
        COUNT(*) as issue_count
    FROM products
    WHERE price < 0
    
    UNION ALL
    
    SELECT 
        'Future order dates' as check_name,
        COUNT(*) as issue_count
    FROM orders
    WHERE order_date > CURRENT_DATE
)
SELECT * FROM data_quality_checks WHERE issue_count > 0;

Interview Tips

  • Understand that CTEs improve query readability and maintainability
  • Know the difference between regular and recursive CTEs
  • Practice writing hierarchical queries with recursive CTEs
  • Understand when to use CTEs vs subqueries vs temporary tables
  • Be familiar with CTE performance characteristics and limitations

Test Your Knowledge

Take a quick quiz to test your understanding of this topic.

Test Your SQL Knowledge

Ready to put your skills to the test? Take our interactive SQL quiz and get instant feedback on your answers.