Welcome, Data Analyst! The finance team needs your help building a powerful expense tracking dashboard. You'll analyze spending patterns, identify budget overruns, and help employees understand their expenses better.
Before we start analyzing expenses, let's set up our expense tracking database!
-- Create expense categories table
CREATE TABLE categories (
category_id INTEGER PRIMARY KEY,
category_name TEXT NOT NULL,
budget_limit DECIMAL(10,2),
category_type TEXT CHECK(category_type IN ('Essential', 'Discretionary'))
);
-- Create employees table
CREATE TABLE employees (
employee_id INTEGER PRIMARY KEY,
first_name TEXT NOT NULL,
last_name TEXT NOT NULL,
department TEXT,
email TEXT UNIQUE,
monthly_budget DECIMAL(10,2) DEFAULT 1000.00
);
-- Create expenses table
CREATE TABLE expenses (
expense_id INTEGER PRIMARY KEY,
employee_id INTEGER,
category_id INTEGER,
expense_date DATE NOT NULL,
amount DECIMAL(10,2) NOT NULL,
description TEXT,
vendor TEXT,
payment_method TEXT CHECK(payment_method IN ('Credit Card', 'Cash', 'Bank Transfer', 'Company Card')),
status TEXT DEFAULT 'Pending' CHECK(status IN ('Pending', 'Approved', 'Rejected', 'Reimbursed')),
FOREIGN KEY (employee_id) REFERENCES employees(employee_id),
FOREIGN KEY (category_id) REFERENCES categories(category_id)
);
-- Insert expense categories
INSERT INTO categories (category_name, budget_limit, category_type) VALUES
('Travel', 2000.00, 'Discretionary'),
('Meals', 500.00, 'Discretionary'),
('Office Supplies', 300.00, 'Essential'),
('Software', 1000.00, 'Essential'),
('Training', 1500.00, 'Discretionary'),
('Transportation', 400.00, 'Essential'),
('Entertainment', 200.00, 'Discretionary');
-- Insert employees
INSERT INTO employees (first_name, last_name, department, email) VALUES
('Sarah', 'Johnson', 'Marketing', 'sarah.j@company.com'),
('Mike', 'Chen', 'IT', 'mike.c@company.com'),
('Emily', 'Davis', 'Sales', 'emily.d@company.com'),
('David', 'Wilson', 'Finance', 'david.w@company.com'),
('Lisa', 'Anderson', 'HR', 'lisa.a@company.com');
-- Insert sample expenses
INSERT INTO expenses (employee_id, category_id, expense_date, amount, description, vendor, payment_method, status) VALUES
(1, 1, '2024-01-15', 450.00, 'Client meeting in NYC', 'Delta Airlines', 'Company Card', 'Approved'),
(1, 2, '2024-01-15', 85.50, 'Business lunch with client', 'The Capital Grille', 'Company Card', 'Approved'),
(2, 4, '2024-01-10', 129.99, 'IntelliJ IDEA license', 'JetBrains', 'Credit Card', 'Reimbursed'),
(3, 1, '2024-01-20', 325.00, 'Sales conference travel', 'Southwest Airlines', 'Company Card', 'Approved'),
(2, 3, '2024-01-12', 67.45, 'Ergonomic keyboard', 'Amazon', 'Credit Card', 'Pending'),
(4, 3, '2024-01-08', 234.00, 'Office furniture', 'Office Depot', 'Company Card', 'Approved'),
(5, 5, '2024-01-22', 899.00, 'HR certification course', 'SHRM', 'Bank Transfer', 'Approved'),
(3, 2, '2024-01-18', 156.75, 'Team dinner', 'Olive Garden', 'Cash', 'Pending'),
(1, 6, '2024-01-16', 45.00, 'Uber to client office', 'Uber', 'Credit Card', 'Reimbursed'),
(2, 4, '2024-01-25', 49.99, 'GitHub subscription', 'GitHub', 'Credit Card', 'Approved');
Think of SELECT as your control panel for data exploration. Here's how it works:
SELECT column1, column2, ... -- Choose what to see
FROM table_name -- Choose where to look
WHERE condition -- Filter what you find
ORDER BY column -- Sort your results
LIMIT number; -- Control how much you see
Try retrieving all expenses:
SELECT * FROM expenses;
:emoji: Challenge: Modify this to show only the amount and description columns!
The finance team needs you to investigate different expense patterns. Let's use WHERE clauses to filter our data!
-- Find expenses exactly $100
SELECT * FROM expenses WHERE amount = 100.00;
-- Find expenses over $200
SELECT * FROM expenses WHERE amount > 200;
-- Find expenses $100 or less
SELECT * FROM expenses WHERE amount <= 100;
-- Find expenses between $50 and $150
SELECT * FROM expenses WHERE amount BETWEEN 50 AND 150;
-- Find specific payment methods
SELECT * FROM expenses WHERE payment_method IN ('Company Card', 'Credit Card');
-- Exclude certain statuses
SELECT * FROM expenses WHERE status NOT IN ('Rejected', 'Pending');
Challenge 1: Find all expenses over $500
-- Your query here!
Challenge 2: Find all approved expenses
-- Your query here!
Challenge 3: Find expenses between $100 and $300
-- Your query here!
Now let's handle more complex queries that the finance team needs!
-- Find large approved travel expenses
SELECT * FROM expenses
WHERE category_id = 1 -- Travel category
AND amount > 300
AND status = 'Approved';
-- Find IT department software expenses
SELECT e.first_name, e.last_name, ex.amount, ex.description
FROM expenses ex
JOIN employees e ON ex.employee_id = e.employee_id
WHERE e.department = 'IT'
AND ex.category_id = 4 -- Software category
AND ex.amount > 50;
-- Find expenses paid by company card OR bank transfer
SELECT * FROM expenses
WHERE payment_method = 'Company Card'
OR payment_method = 'Bank Transfer';
-- Find high-priority expenses (Travel OR Training over $500)
SELECT * FROM expenses
WHERE (category_id = 1 OR category_id = 5)
AND amount > 500;
-- Find all non-pending expenses
SELECT * FROM expenses
WHERE NOT status = 'Pending';
-- Find expenses NOT in Travel or Entertainment
SELECT * FROM expenses
WHERE NOT (category_id = 1 OR category_id = 7);
Mission: The CFO wants to audit high-risk expenses. Find:
-- Build your query here!
SELECT * FROM expenses
WHERE (amount > 200 AND status = 'Pending')
OR status = 'Rejected';
The audit team suspects fraudulent expenses. Use pattern matching to investigate!
%
- Any number of characters (like * in file searches)_
- Exactly one character (like ? in file searches)-- Find all airline expenses
SELECT * FROM expenses
WHERE vendor LIKE '%Airlines%';
-- Find Uber or Lyft rides
SELECT * FROM expenses
WHERE vendor LIKE 'Uber%' OR vendor LIKE 'Lyft%';
-- Find expenses with "client" in description
SELECT * FROM expenses
WHERE description LIKE '%client%';
-- Find Amazon purchases
SELECT * FROM expenses
WHERE vendor LIKE 'Amazon%';
-- Find employees with company email
SELECT * FROM employees
WHERE email LIKE '%@company.com';
-- Find 3-letter department codes
SELECT * FROM employees
WHERE department LIKE '___';
-- Find suspiciously round amounts (like $100.00, $200.00)
SELECT * FROM expenses
WHERE CAST(amount AS TEXT) LIKE '%.00';
-- Find weekend expenses (potential personal use)
SELECT * FROM expenses
WHERE strftime('%w', expense_date) IN ('0', '6'); -- 0=Sunday, 6=Saturday
-- Find expenses with generic descriptions
SELECT * FROM expenses
WHERE description LIKE '%misc%'
OR description LIKE '%other%'
OR description LIKE '%various%';
Mission: Find all meal-related expenses by searching for common restaurant patterns:
-- Find expenses at restaurants (hint: look for "Grill", "Cafe", "Restaurant", etc.)
-- Your query here!
The HR team needs to identify incomplete expense records and missing information!
-- Find expenses missing descriptions
SELECT * FROM expenses
WHERE description IS NULL;
-- Find expenses WITH descriptions
SELECT * FROM expenses
WHERE description IS NOT NULL;
-- ⚠️ Common Mistake - These DON'T work!
SELECT * FROM expenses WHERE description = NULL; -- Wrong!
SELECT * FROM expenses WHERE description != NULL; -- Wrong!
-- Show "No description" for NULL descriptions
SELECT expense_id,
COALESCE(description, 'No description provided') as description,
amount
FROM expenses;
-- Calculate totals, treating NULL as 0
SELECT employee_id,
SUM(COALESCE(amount, 0)) as total_spent
FROM expenses
GROUP BY employee_id;
-- Find incomplete expense records
SELECT * FROM expenses
WHERE description IS NULL
OR vendor IS NULL;
Mission: Create a data quality report showing:
-- Your query here!
The executives need organized expense reports. Let's master sorting!
-- Show expenses from highest to lowest
SELECT * FROM expenses
ORDER BY amount DESC;
-- Show expenses from oldest to newest
SELECT * FROM expenses
ORDER BY expense_date ASC; -- ASC is optional (default)
-- Show employees alphabetically
SELECT * FROM employees
ORDER BY last_name, first_name;
-- Sort by status priority, then by amount
SELECT * FROM expenses
ORDER BY status ASC, amount DESC;
-- Create a spending leaderboard
SELECT e.first_name, e.last_name, SUM(ex.amount) as total_spent
FROM employees e
JOIN expenses ex ON e.employee_id = ex.employee_id
GROUP BY e.employee_id
ORDER BY total_spent DESC;
-- Sort by month, then by category
SELECT strftime('%Y-%m', expense_date) as month,
category_id,
SUM(amount) as monthly_total
FROM expenses
GROUP BY month, category_id
ORDER BY month DESC, monthly_total DESC;
-- Priority-based sorting for approval queue
SELECT * FROM expenses
ORDER BY
CASE
WHEN status = 'Pending' AND amount > 500 THEN 1 -- High priority
WHEN status = 'Pending' AND amount > 200 THEN 2 -- Medium priority
WHEN status = 'Pending' THEN 3 -- Low priority
ELSE 4 -- Already processed
END,
expense_date ASC;
-- Sort by vendor name length (fraud detection)
SELECT vendor, COUNT(*) as frequency
FROM expenses
GROUP BY vendor
ORDER BY LENGTH(vendor) ASC, frequency DESC;
Mission: Create an executive summary showing:
-- Your query here!
The mobile app team needs paginated results for the expense tracker app!
-- Show top 5 biggest expenses
SELECT * FROM expenses
ORDER BY amount DESC
LIMIT 5;
-- Show 3 most recent expenses
SELECT * FROM expenses
ORDER BY expense_date DESC
LIMIT 3;
-- Top spending employees
SELECT e.first_name, e.last_name, SUM(ex.amount) as total
FROM employees e
JOIN expenses ex ON e.employee_id = ex.employee_id
GROUP BY e.employee_id
ORDER BY total DESC
LIMIT 3;
-- Page 1: First 5 expenses
SELECT * FROM expenses
ORDER BY expense_date DESC
LIMIT 5 OFFSET 0;
-- Page 2: Next 5 expenses
SELECT * FROM expenses
ORDER BY expense_date DESC
LIMIT 5 OFFSET 5;
-- Page 3: Skip 10, show next 5
SELECT * FROM expenses
ORDER BY expense_date DESC
LIMIT 5 OFFSET 10;
-- Quick stats for dashboard header
SELECT
(SELECT COUNT(*) FROM expenses WHERE status = 'Pending') as pending_count,
(SELECT SUM(amount) FROM expenses WHERE status = 'Pending') as pending_total,
(SELECT COUNT(*) FROM expenses WHERE expense_date >= date('now', '-7 days')) as week_count;
-- Recent activity feed (last 10 transactions)
SELECT e.first_name, ex.amount, ex.description, ex.expense_date
FROM expenses ex
JOIN employees e ON ex.employee_id = e.employee_id
ORDER BY ex.expense_date DESC
LIMIT 10;
Mission: Create a paginated expense report with:
-- Your query here!
Now it's time to combine everything you've learned! The CEO wants a comprehensive expense analytics dashboard.
-- Total expenses by status
SELECT status,
COUNT(*) as count,
SUM(amount) as total
FROM expenses
GROUP BY status;
-- Monthly spending trend
SELECT strftime('%Y-%m', expense_date) as month,
SUM(amount) as monthly_total,
COUNT(*) as expense_count
FROM expenses
GROUP BY month
ORDER BY month DESC
LIMIT 6;
-- Employee spending rankings
SELECT e.first_name || ' ' || e.last_name as employee_name,
e.department,
COUNT(ex.expense_id) as expense_count,
SUM(ex.amount) as total_spent,
AVG(ex.amount) as avg_expense
FROM employees e
LEFT JOIN expenses ex ON e.employee_id = ex.employee_id
GROUP BY e.employee_id
ORDER BY total_spent DESC;
-- Spending by category with budget comparison
SELECT c.category_name,
c.category_type,
c.budget_limit,
SUM(e.amount) as actual_spent,
c.budget_limit - SUM(e.amount) as remaining_budget,
CASE
WHEN SUM(e.amount) > c.budget_limit THEN 'OVER BUDGET'
WHEN SUM(e.amount) > c.budget_limit * 0.8 THEN 'Warning'
ELSE 'On Track'
END as budget_status
FROM categories c
LEFT JOIN expenses e ON c.category_id = e.category_id
GROUP BY c.category_id
ORDER BY actual_spent DESC;
-- Suspicious patterns
-- Large pending expenses
SELECT 'Large Pending Expense' as alert_type,
e.first_name || ' ' || e.last_name as employee,
ex.amount,
ex.description
FROM expenses ex
JOIN employees e ON ex.employee_id = e.employee_id
WHERE ex.status = 'Pending' AND ex.amount > 500
UNION ALL
-- Weekend expenses
SELECT 'Weekend Expense' as alert_type,
e.first_name || ' ' || e.last_name as employee,
ex.amount,
ex.description
FROM expenses ex
JOIN employees e ON ex.employee_id = e.employee_id
WHERE strftime('%w', ex.expense_date) IN ('0', '6')
UNION ALL
-- Duplicate expenses (same amount, same day)
SELECT 'Potential Duplicate' as alert_type,
e.first_name || ' ' || e.last_name as employee,
ex.amount,
'Multiple expenses on ' || ex.expense_date
FROM expenses ex
JOIN employees e ON ex.employee_id = e.employee_id
GROUP BY ex.employee_id, ex.expense_date, ex.amount
HAVING COUNT(*) > 1;
Create a single query that shows:
-- Build your executive summary query here!
-- Hint: You might need to use subqueries or CTEs
-- Create indexes for common searches
CREATE INDEX idx_expense_date ON expenses(expense_date);
CREATE INDEX idx_expense_status ON expenses(status);
CREATE INDEX idx_expense_employee ON expenses(employee_id);
-- Composite index for complex queries
CREATE INDEX idx_expense_status_amount ON expenses(status, amount);
-- Weekly comparison
WITH this_week AS (
SELECT SUM(amount) as total
FROM expenses
WHERE expense_date >= date('now', '-7 days')
),
last_week AS (
SELECT SUM(amount) as total
FROM expenses
WHERE expense_date BETWEEN date('now', '-14 days') AND date('now', '-8 days')
)
SELECT
tw.total as this_week_total,
lw.total as last_week_total,
ROUND(((tw.total - lw.total) / lw.total) * 100, 2) as percent_change
FROM this_week tw, last_week lw;
-- Department budget utilization
SELECT
e.department,
COUNT(DISTINCT e.employee_id) as employee_count,
SUM(ex.amount) as total_spent,
SUM(e.monthly_budget) as total_budget,
ROUND((SUM(ex.amount) / SUM(e.monthly_budget)) * 100, 2) as utilization_percent
FROM employees e
LEFT JOIN expenses ex ON e.employee_id = ex.employee_id
GROUP BY e.department
ORDER BY utilization_percent DESC;
You've mastered the SELECT statement and built a powerful expense tracking dashboard!
In the next lesson, you'll learn about data aggregation and grouping to create even more powerful analytics!
-- Essential SELECT syntax
SELECT columns -- What to show
FROM table -- Where to look
WHERE conditions -- Filter rows
ORDER BY column -- Sort results
LIMIT n OFFSET m; -- Pagination
-- Key operators
=, !=, <, >, <=, >= -- Comparisons
AND, OR, NOT -- Logic
LIKE '%pattern%' -- Pattern matching
IS NULL, IS NOT NULL -- NULL handling
IN (list) -- List matching
BETWEEN x AND y -- Range check
Keep practicing with your expense data to become a SQL master! :rocket: