Let's create a realistic company database with employees and sales data:
-- Create our company database
CREATE TABLE employees (
employee_id INT PRIMARY KEY,
first_name VARCHAR(50),
last_name VARCHAR(50),
department VARCHAR(50),
salary DECIMAL(10, 2),
hire_date DATE,
manager_id INT
);
CREATE TABLE sales (
sale_id INT PRIMARY KEY,
employee_id INT,
sale_date DATE,
amount DECIMAL(10, 2),
product_category VARCHAR(50),
FOREIGN KEY (employee_id) REFERENCES employees(employee_id)
);
-- Insert sample data
INSERT INTO employees VALUES
(1, 'Sarah', 'Johnson', 'Sales', 75000, '2020-01-15', NULL),
(2, 'Mike', 'Chen', 'Sales', 82000, '2019-03-22', 1),
(3, 'Emily', 'Davis', 'Sales', 68000, '2021-06-10', 1),
(4, 'James', 'Wilson', 'Marketing', 65000, '2020-11-05', NULL),
(5, 'Lisa', 'Anderson', 'Sales', 95000, '2018-07-20', 1),
(6, 'Tom', 'Brown', 'IT', 78000, '2019-09-15', NULL),
(7, 'Jessica', 'Taylor', 'Sales', 71000, '2021-02-28', 1),
(8, 'David', 'Lee', 'Marketing', 62000, '2022-01-10', 4);
INSERT INTO sales VALUES
(1, 2, '2024-01-15', 15000, 'Electronics'),
(2, 2, '2024-01-20', 22000, 'Electronics'),
(3, 3, '2024-01-18', 8000, 'Accessories'),
(4, 5, '2024-01-10', 35000, 'Electronics'),
(5, 5, '2024-01-25', 28000, 'Software'),
(6, 7, '2024-01-22', 12000, 'Accessories'),
(7, 2, '2024-02-05', 18000, 'Software'),
(8, 3, '2024-02-10', 9500, 'Electronics'),
(9, 5, '2024-02-15', 42000, 'Electronics'),
(10, 7, '2024-02-20', 16000, 'Software');
Let's start with our first detective mission:
-- First, let's find the average salary
SELECT AVG(salary) as avg_salary FROM employees;
-- Result: 74375.00
-- Now, find employees earning above average using a subquery
SELECT
first_name,
last_name,
department,
salary,
ROUND(salary - (SELECT AVG(salary) FROM employees), 2) as above_average_by
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees)
ORDER BY salary DESC;
These return exactly one value:
-- Find the highest-paid employee
SELECT
first_name,
last_name,
salary
FROM employees
WHERE salary = (SELECT MAX(salary) FROM employees);
-- Find employees in the same department as Lisa Anderson
SELECT
first_name,
last_name,
department
FROM employees
WHERE department = (
SELECT department
FROM employees
WHERE first_name = 'Lisa' AND last_name = 'Anderson'
)
AND NOT (first_name = 'Lisa' AND last_name = 'Anderson');
These return multiple values and use operators like IN, ANY, or ALL:
-- Find employees who have made sales
SELECT
first_name,
last_name,
department
FROM employees
WHERE employee_id IN (
SELECT DISTINCT employee_id
FROM sales
);
-- Find employees earning more than ALL marketing employees
SELECT
first_name,
last_name,
department,
salary
FROM employees
WHERE salary > ALL (
SELECT salary
FROM employees
WHERE department = 'Marketing'
);
Let's identify our sales superstars:
-- Find top 3 sales performers by total sales amount
SELECT
e.first_name,
e.last_name,
(SELECT SUM(amount)
FROM sales s
WHERE s.employee_id = e.employee_id) as total_sales
FROM employees e
WHERE e.department = 'Sales'
AND EXISTS (
SELECT 1
FROM sales s
WHERE s.employee_id = e.employee_id
)
ORDER BY total_sales DESC
LIMIT 3;
Let's find unusual patterns in our data:
-- Find sales that are significantly above average
WITH sales_stats AS (
SELECT
AVG(amount) as avg_sale,
STDDEV(amount) as stddev_sale
FROM sales
)
SELECT
s.sale_id,
e.first_name,
e.last_name,
s.amount,
s.product_category,
ROUND((s.amount - ss.avg_sale) / ss.stddev_sale, 2) as z_score
FROM sales s
JOIN employees e ON s.employee_id = e.employee_id
CROSS JOIN sales_stats ss
WHERE ABS((s.amount - ss.avg_sale) / ss.stddev_sale) > 2
ORDER BY z_score DESC;
Execute once and provide the same result for all rows:
-- Find employees hired after the average hire date
SELECT
first_name,
last_name,
hire_date
FROM employees
WHERE hire_date > (
SELECT AVG(hire_date)
FROM employees
);
Execute once for each row in the outer query:
-- Find employees earning more than their department average
SELECT
e1.first_name,
e1.last_name,
e1.department,
e1.salary,
(SELECT AVG(salary)
FROM employees e2
WHERE e2.department = e1.department) as dept_avg
FROM employees e1
WHERE e1.salary > (
SELECT AVG(salary)
FROM employees e2
WHERE e2.department = e1.department
);
-- Comprehensive sales performance analysis
WITH performance_metrics AS (
SELECT
e.employee_id,
e.first_name,
e.last_name,
COUNT(s.sale_id) as total_transactions,
SUM(s.amount) as total_sales,
AVG(s.amount) as avg_sale_size,
MAX(s.amount) as largest_sale
FROM employees e
LEFT JOIN sales s ON e.employee_id = s.employee_id
WHERE e.department = 'Sales'
GROUP BY e.employee_id, e.first_name, e.last_name
)
SELECT
*,
CASE
WHEN total_sales > (SELECT AVG(total_sales) FROM performance_metrics) * 1.5
THEN 'Star Performer'
WHEN total_sales > (SELECT AVG(total_sales) FROM performance_metrics)
THEN 'Above Average'
WHEN total_sales > 0
THEN 'Active'
ELSE 'No Sales'
END as performance_category
FROM performance_metrics
ORDER BY total_sales DESC;
-- Identify employees with improving sales performance
WITH monthly_sales AS (
SELECT
employee_id,
DATE_FORMAT(sale_date, '%Y-%m') as month,
SUM(amount) as monthly_total
FROM sales
GROUP BY employee_id, DATE_FORMAT(sale_date, '%Y-%m')
)
SELECT DISTINCT
e.first_name,
e.last_name,
(SELECT monthly_total
FROM monthly_sales ms1
WHERE ms1.employee_id = e.employee_id
AND ms1.month = '2024-02') as feb_sales,
(SELECT monthly_total
FROM monthly_sales ms2
WHERE ms2.employee_id = e.employee_id
AND ms2.month = '2024-01') as jan_sales,
ROUND(
((SELECT monthly_total FROM monthly_sales ms1 WHERE ms1.employee_id = e.employee_id AND ms1.month = '2024-02') -
(SELECT monthly_total FROM monthly_sales ms2 WHERE ms2.employee_id = e.employee_id AND ms2.month = '2024-01')) /
(SELECT monthly_total FROM monthly_sales ms2 WHERE ms2.employee_id = e.employee_id AND ms2.month = '2024-01') * 100,
2) as growth_percentage
FROM employees e
WHERE e.employee_id IN (SELECT DISTINCT employee_id FROM sales)
AND EXISTS (
SELECT 1 FROM monthly_sales
WHERE employee_id = e.employee_id
AND month = '2024-02'
);
-- ❌ Less efficient
SELECT * FROM employees
WHERE employee_id IN (SELECT employee_id FROM sales);
-- ✅ More efficient
SELECT * FROM employees e
WHERE EXISTS (SELECT 1 FROM sales s WHERE s.employee_id = e.employee_id);
-- Using subquery
SELECT * FROM employees
WHERE department = (SELECT department FROM employees WHERE employee_id = 5);
-- Using JOIN (often faster)
SELECT e1.* FROM employees e1
JOIN employees e2 ON e1.department = e2.department
WHERE e2.employee_id = 5;