What are subqueries and how do they work?
Answer
A subquery (also called an inner query or nested query) is a query embedded within another SQL statement. The subquery executes first, and its result is used by the outer query.
Basic Syntax
SELECT column1, column2
FROM table1
WHERE column1 = (SELECT column1 FROM table2 WHERE condition);
Types of Subqueries
1. Scalar Subquery
Returns a single value (one row, one column).
-- Find employees with salary higher than average
SELECT name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
-- Get employee with highest salary in each department
SELECT name, department, salary
FROM employees e1
WHERE salary = (
SELECT MAX(salary)
FROM employees e2
WHERE e2.department = e1.department
);
2. Row Subquery
Returns a single row with multiple columns.
-- Find employee with same department and salary as John Doe
SELECT name, department, salary
FROM employees
WHERE (department, salary) = (
SELECT department, salary
FROM employees
WHERE name = 'John Doe'
);
3. Table Subquery
Returns multiple rows and columns.
-- Find employees in departments with more than 5 employees
SELECT name, department
FROM employees
WHERE department IN (
SELECT department
FROM employees
GROUP BY department
HAVING COUNT(*) > 5
);
Subquery Locations
1. WHERE Clause
-- Employees earning more than department average
SELECT name, salary, department
FROM employees e1
WHERE salary > (
SELECT AVG(salary)
FROM employees e2
WHERE e2.department = e1.department
);
2. FROM Clause (Derived Table)
-- Department statistics
SELECT dept_stats.department, dept_stats.avg_salary
FROM (
SELECT department, AVG(salary) as avg_salary
FROM employees
GROUP BY department
) AS dept_stats
WHERE dept_stats.avg_salary > 50000;
3. SELECT Clause
-- Employee details with department average
SELECT
name,
salary,
department,
(SELECT AVG(salary) FROM employees e2 WHERE e2.department = e1.department) as dept_avg
FROM employees e1;
4. HAVING Clause
-- Departments with average salary higher than company average
SELECT department, AVG(salary) as dept_avg
FROM employees
GROUP BY department
HAVING AVG(salary) > (SELECT AVG(salary) FROM employees);
Common Subquery Operators
EXISTS / NOT EXISTS
-- Customers who have placed orders
SELECT customer_name
FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
);
-- Customers who haven't placed orders
SELECT customer_name
FROM customers c
WHERE NOT EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
);
IN / NOT IN
-- Products in specific categories
SELECT product_name, price
FROM products
WHERE category_id IN (
SELECT category_id
FROM categories
WHERE category_name IN ('Electronics', 'Books')
);
-- Handle NULL values carefully with NOT IN
SELECT product_name
FROM products
WHERE category_id NOT IN (
SELECT category_id
FROM categories
WHERE category_id IS NOT NULL -- Important!
);
ANY / SOME
-- Products more expensive than ANY book
SELECT product_name, price
FROM products
WHERE price > ANY (
SELECT price
FROM products
WHERE category = 'Books'
);
ALL
-- Products more expensive than ALL books
SELECT product_name, price
FROM products
WHERE price > ALL (
SELECT price
FROM products
WHERE category = 'Books'
);
Practical Examples
E-commerce Scenarios
-- Sample tables
CREATE TABLE customers (
customer_id INT PRIMARY KEY,
customer_name VARCHAR(100),
city VARCHAR(50)
);
CREATE TABLE orders (
order_id INT PRIMARY KEY,
customer_id INT,
order_date DATE,
total_amount DECIMAL(10,2)
);
CREATE TABLE products (
product_id INT PRIMARY KEY,
product_name VARCHAR(100),
category VARCHAR(50),
price DECIMAL(10,2)
);
Complex Business Queries
-- 1. Find customers who spent more than average in the last year
SELECT customer_name
FROM customers c
WHERE (
SELECT SUM(total_amount)
FROM orders o
WHERE o.customer_id = c.customer_id
AND o.order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR)
) > (
SELECT AVG(yearly_total)
FROM (
SELECT SUM(total_amount) as yearly_total
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR)
GROUP BY customer_id
) as customer_totals
);
-- 2. Products never ordered
SELECT product_name
FROM products p
WHERE NOT EXISTS (
SELECT 1
FROM order_items oi
WHERE oi.product_id = p.product_id
);
-- 3. Top 3 customers by order value in each city
SELECT customer_name, city, total_orders
FROM (
SELECT
c.customer_name,
c.city,
SUM(o.total_amount) as total_orders,
ROW_NUMBER() OVER (PARTITION BY c.city ORDER BY SUM(o.total_amount) DESC) as rn
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name, c.city
) ranked
WHERE rn <= 3;
Performance Considerations
Correlated vs Non-Correlated Subqueries
-- Non-correlated (executes once)
SELECT name FROM employees
WHERE department_id IN (SELECT id FROM departments WHERE location = 'NYC');
-- Correlated (executes for each outer row)
SELECT name FROM employees e1
WHERE salary > (SELECT AVG(salary) FROM employees e2 WHERE e2.dept_id = e1.dept_id);
Optimization Tips
-- Instead of correlated subquery
SELECT e1.name, e1.salary
FROM employees e1
WHERE e1.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department = e1.department
);
-- Use window function (often faster)
SELECT name, salary
FROM (
SELECT
name,
salary,
department,
AVG(salary) OVER (PARTITION BY department) as dept_avg
FROM employees
) t
WHERE salary > dept_avg;
Common Pitfalls
1. NULL Handling with NOT IN
-- Dangerous: Returns no results if subquery contains NULL
SELECT * FROM products
WHERE category_id NOT IN (SELECT category_id FROM categories);
-- Safe: Filter out NULLs
SELECT * FROM products
WHERE category_id NOT IN (
SELECT category_id FROM categories WHERE category_id IS NOT NULL
);
2. Multiple Row Subquery with Scalar Operator
-- Error: Subquery returns multiple rows
SELECT * FROM employees
WHERE salary = (SELECT salary FROM employees WHERE department = 'IT');
-- Correct: Use IN or ANY
SELECT * FROM employees
WHERE salary IN (SELECT salary FROM employees WHERE department = 'IT');
Subquery vs JOIN Performance
-- Subquery approach
SELECT customer_name
FROM customers
WHERE customer_id IN (SELECT customer_id FROM orders);
-- JOIN approach (often faster)
SELECT DISTINCT c.customer_name
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id;
Interview Tips
- Understand the difference between correlated and non-correlated subqueries
- Know when to use EXISTS vs IN
- Be aware of NULL handling issues with NOT IN
- Practice converting subqueries to JOINs and vice versa
- Understand performance implications of different subquery types
Test Your Knowledge
Take a quick quiz to test your understanding of this topic.