Practice and reinforce the concepts from Lesson 8
The Online Bookstore app is in crisis! With 500,000 books in the database, customers are leaving because searches take 10+ seconds. Your mission: optimize the database to make every search lightning fast (under 100ms)!
<iframehttps://codesandbox.io/embed/database-optimization-challenge-starter?fontsize=14&hidenavigation=1&theme=dark&view=editor&module=%2Fsrc%2Findex.js" style={ width: '100%', height: '600px', border: 0, borderRadius: '8px', overflow: 'hidden' } title="Database Optimization Challenge" allow="accelerometer; ambient-light-sensor; camera; encrypted-media; geolocation; gyroscope; hid; microphone; midi; payment; usb; vr; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-presentation allow-same-origin allow-scripts"
-- Check current performance baseline
CALL test_all_queries();
-- View existing indexes
SELECT table_name, index_name, column_name
FROM information_schema.statistics
WHERE table_schema = 'bookstore';
-- Monitor query execution
SET profiling = 1;
-- Run your query here
SHOW PROFILES;
Current: 8,234ms -> Target: < 50ms
-- Slow query that needs optimization
SELECT * FROM books
WHERE title LIKE '%Harry%'
ORDER BY publication_date DESC;
Current: 12,567ms -> Target: < 100ms
-- Find top 10 books by a specific author
SELECT b.*, SUM(s.quantity) as total_sales
FROM books b
JOIN book_authors ba ON b.id = ba.book_id
JOIN authors a ON ba.author_id = a.id
JOIN sales s ON b.id = s.book_id
WHERE a.name = 'Stephen King'
GROUP BY b.id
ORDER BY total_sales DESC
LIMIT 10;
Current: 15,892ms -> Target: < 200ms
-- Monthly sales by genre
SELECT
DATE_FORMAT(s.sale_date, '%Y-%m') as month,
b.genre,
SUM(s.quantity * s.price) as revenue
FROM sales s
JOIN books b ON s.book_id = b.id
WHERE s.sale_date >= DATE_SUB(NOW(), INTERVAL 1 YEAR)
GROUP BY month, b.genre
ORDER BY month DESC, revenue DESC;
Enable fast search across title, author name, AND book description simultaneously!
Use EXPLAIN to understand why queries are slow:
EXPLAIN SELECT * FROM books WHERE title LIKE '%Harry%';
What to look for:
Try These Diagnostic Queries:
-- Analyze table statistics
ANALYZE TABLE books;
-- Check table size
SELECT
table_name,
ROUND(data_length/1024/1024, 2) AS data_mb,
ROUND(index_length/1024/1024, 2) AS index_mb
FROM information_schema.tables
WHERE table_schema = 'bookstore';
-- Find missing indexes
SELECT
s.sql_text,
s.execution_count,
s.total_latency
FROM performance_schema.statements_summary_by_digest s
WHERE s.sql_text LIKE '%books%'
ORDER BY s.total_latency DESC;
-- Example index creation
CREATE INDEX idx_books_title ON books(title);
-- But wait! Will this help with LIKE '%Harry%'? ๐ค
Index Strategy Tips:
Progressive Index Building:
-- Step 1: Basic single-column index
CREATE INDEX idx_isbn ON books(isbn);
-- Step 2: Composite index for common queries
CREATE INDEX idx_books_genre_year ON books(genre, publication_year);
-- Step 3: Covering index to eliminate lookups
CREATE INDEX idx_sales_covering
ON sales(book_id, sale_date, quantity, price);
-- Step 4: Full-text for flexible search
ALTER TABLE books ADD FULLTEXT(title, description);
After each index:
-- Check your indexes
SHOW INDEXES FROM books;
-- Test insert performance
INSERT INTO books (title, isbn, genre)
VALUES ('Test Book', '9781234567890', 'Fiction');
-- Measure index impact
SELECT
index_name,
stat_value AS cardinality,
ROUND((stat_value / (SELECT COUNT(*) FROM books)) * 100, 2) AS selectivity_pct
FROM mysql.innodb_index_stats
WHERE database_name = 'bookstore'
AND table_name = 'books';
-- Use optimizer hints
SELECT /*+ INDEX(books idx_genre) */ *
FROM books
WHERE genre = 'Science Fiction';
-- Force index usage
SELECT * FROM books FORCE INDEX (idx_isbn)
WHERE isbn = '9781234567890';
-- Analyze query execution plan
EXPLAIN ANALYZE
SELECT b.title, COUNT(s.id) as sales_count
FROM books b
LEFT JOIN sales s ON b.id = s.book_id
GROUP BY b.id;
-- Create full-text index
ALTER TABLE books ADD FULLTEXT(title);
-- Use MATCH AGAINST for searches
SELECT * FROM books
WHERE MATCH(title) AGAINST('Harry' IN NATURAL LANGUAGE MODE);
-- Optimize the author-book relationship
CREATE INDEX idx_book_authors_composite
ON book_authors(author_id, book_id);
-- Include all needed columns in the index
CREATE INDEX idx_sales_analytics
ON sales(sale_date, book_id, quantity, price);
-- Mystery Query: Why is this so slow?
SELECT
b.title,
a.name as author,
COUNT(DISTINCT s.customer_id) as unique_buyers
FROM books b
JOIN book_authors ba ON b.id = ba.book_id
JOIN authors a ON ba.author_id = a.id
LEFT JOIN sales s ON b.id = s.book_id
WHERE b.publication_year = 2024
AND a.country = 'USA'
GROUP BY b.id, a.id
HAVING unique_buyers > 100
ORDER BY unique_buyers DESC
LIMIT 20;
-- Your mission: Create the perfect index strategy!
-- Hint: Think about join order and filtering columns
-- This table gets 1000 inserts/minute but only 10 reads/minute
CREATE TABLE activity_logs (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT,
action VARCHAR(50),
timestamp DATETIME,
ip_address VARCHAR(45),
user_agent TEXT
);
-- Common queries:
-- 1. SELECT * FROM activity_logs WHERE user_id = ? AND timestamp > ?
-- 2. SELECT COUNT(*) FROM activity_logs WHERE action = ? AND DATE(timestamp) = ?
-- Question: Which indexes would you create? Why?
-- Current slow query (3.5 seconds)
SELECT * FROM books
WHERE title LIKE '%wizard%'
OR description LIKE '%magic%'
OR title LIKE '%sorcerer%'
ORDER BY publication_date DESC
LIMIT 50;
-- Transform this into a sub-100ms query using full-text search
-- Bonus: Add relevance scoring!
Your optimizations will be tested with 10x normal traffic. Will they hold up?
Simulation:
-- Simulate high-concurrency reads
DELIMITER $
CREATE PROCEDURE simulate_black_friday()
BEGIN
DECLARE i INT DEFAULT 0;
WHILE i < 1000 DO
SELECT * FROM books WHERE genre = 'Fiction' ORDER BY RAND() LIMIT 10;
SET i = i + 1;
END WHILE;
END$
DELIMITER ;
-- Monitor performance under load
SHOW PROCESSLIST;
SHOW ENGINE INNODB STATUS;
Thousands of new books added daily. How's your insert performance?
Test Your Index Impact:
-- Measure insert performance
SET @start_time = NOW(6);
INSERT INTO books (title, isbn, genre, publication_year)
SELECT
CONCAT('Book ', n),
CONCAT('978-', LPAD(n, 10, '0')),
ELT(1 + FLOOR(RAND() * 5), 'Fiction', 'Mystery', 'Romance', 'Sci-Fi', 'History'),
2024
FROM (
SELECT a.N + b.N * 10 + c.N * 100 AS n
FROM (SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4
UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a,
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4
UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b,
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4
UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) c
) numbers;
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, NOW(6)) / 1000 AS insert_time_ms;
Need sub-100ms response for ALL queries. Can you achieve it?
Mobile-Optimized Queries:
-- Autocomplete search (must be < 50ms)
SELECT title, author_names
FROM book_search_view
WHERE title LIKE 'Harry P%'
LIMIT 10;
-- Category browse (must be < 100ms)
SELECT id, title, cover_image_url, price
FROM books
WHERE genre = 'Mystery'
AND in_stock = 1
ORDER BY popularity_score DESC
LIMIT 20 OFFSET 40;
-- Personalized recommendations (must be < 150ms)
SELECT b.*,
COUNT(DISTINCT s2.customer_id) as also_bought_count
FROM books b
JOIN sales s1 ON b.id = s1.book_id
JOIN sales s2 ON s1.customer_id = s2.customer_id AND s2.book_id != b.id
WHERE s1.book_id = 12345
GROUP BY b.id
ORDER BY also_bought_count DESC
LIMIT 10;
Track your progress with the built-in performance monitor:
Issue 1: Index Not Being Used
-- Check why MySQL ignores your index
EXPLAIN FORMAT=JSON
SELECT * FROM books WHERE title LIKE '%Harry%';
-- Solutions:
-- 1. Use full-text search for wildcards
-- 2. Check index selectivity
-- 3. Update table statistics
ANALYZE TABLE books;
Issue 2: Slow Despite Index
-- Check if index is fragmented
SELECT
table_name,
index_name,
stat_name,
stat_value
FROM mysql.innodb_index_stats
WHERE table_name = 'books';
-- Solution: Rebuild index
ALTER TABLE books DROP INDEX idx_title, ADD INDEX idx_title(title);
Issue 3: Insert Performance Degraded
-- Monitor index overhead
SHOW PROFILE FOR QUERY 1;
-- Solutions:
-- 1. Batch inserts
-- 2. Disable keys temporarily
-- 3. Use INSERT DELAYED
Which indexes did you create first?
How did you handle the trade-offs?
What surprised you most?
Post your before/after EXPLAIN results. What changed?
Example Discussion Post:
Before optimization:
- Type: ALL (full table scan)
- Rows examined: 500,000
- Extra: Using filesort
After adding idx_genre_year:
- Type: range
- Rows examined: 1,247
- Extra: Using index condition
Result: 98% reduction in rows examined!
One. When would you NOT want an index?
2. How do you handle queries with OR conditions?
-- Problem: OR prevents index usage
WHERE genre = 'Fiction' OR author_id = 123
-- Solution 1: Use UNION
(SELECT * FROM books WHERE genre = 'Fiction')
UNION
(SELECT * FROM books WHERE author_id = 123)
-- Solution 2: Use IN clause when possible
WHERE genre IN ('Fiction', 'Mystery')
3. What about indexing JSON columns?
-- Create virtual column for JSON field
ALTER TABLE books
ADD COLUMN metadata_language VARCHAR(10)
GENERATED ALWAYS AS (JSON_UNQUOTE(JSON_EXTRACT(metadata, '$.language'))) STORED;
-- Index the virtual column
CREATE INDEX idx_language ON books(metadata_language);
Share your experience! What was your biggest optimization win?
Think about a database you've worked with:
-- ๐งช Experiment Zone: Test your optimization skills!
-- Challenge 1: Fix the slow ISBN lookup
-- Current: 5,432ms
SELECT * FROM books WHERE isbn = '9781234567890';
-- Hint: What type of index would help here?
-- Challenge 2: Optimize date range queries
-- Current: 8,765ms
SELECT * FROM sales
WHERE sale_date BETWEEN '2024-01-01' AND '2024-12-31'
ORDER BY sale_date DESC;
-- Hint: Think about the WHERE and ORDER BY clauses
-- Challenge 3: Speed up the author search
-- Current: 6,234ms
SELECT DISTINCT a.* FROM authors a
JOIN book_authors ba ON a.id = ba.author_id
JOIN books b ON ba.book_id = b.id
WHERE b.genre = 'Science Fiction';
-- Hint: Which table should drive this query?
-- ๐ See the difference indexes make!
-- Before Index (Full Table Scan):
-- Books table: 500,000 rows
-- Time: 8,234ms
-- Rows examined: 500,000
-- CPU usage: 87%
-- After Index (Index Seek):
-- Time: 45ms (183x faster!)
-- Rows examined: 127
-- CPU usage: 3%
-- Try it yourself:
SHOW INDEX FROM books;
ANALYZE TABLE books;
-- Step 1: Create full-text index for flexible searching
ALTER TABLE books ADD FULLTEXT ft_title (title);
-- Step 2: Rewrite query to use full-text search
SELECT *, MATCH(title) AGAINST('Harry' IN NATURAL LANGUAGE MODE) as relevance
FROM books
WHERE MATCH(title) AGAINST('Harry' IN NATURAL LANGUAGE MODE)
ORDER BY relevance DESC, publication_date DESC;
-- Result: 8,234ms โ 42ms โ
-- Step 1: Create composite index for the join
CREATE INDEX idx_book_authors_lookup ON book_authors(author_id, book_id);
-- Step 2: Create covering index for sales aggregation
CREATE INDEX idx_sales_aggregation ON sales(book_id, quantity, price);
-- Step 3: Add index on author name for quick lookup
CREATE INDEX idx_authors_name ON authors(name);
-- Result: 12,567ms โ 89ms โ
-- Step 1: Create composite index for date filtering and grouping
CREATE INDEX idx_sales_date_analysis
ON sales(sale_date, book_id, quantity, price);
-- Step 2: Add genre index for efficient joins
CREATE INDEX idx_books_genre ON books(genre);
-- Result: 15,892ms โ 156ms โ
-- Create a combined full-text index
ALTER TABLE books
ADD FULLTEXT ft_search (title, description);
-- Create a materialized view for author names
CREATE VIEW book_search_view AS
SELECT b.*, GROUP_CONCAT(a.name) as author_names
FROM books b
LEFT JOIN book_authors ba ON b.id = ba.book_id
LEFT JOIN authors a ON ba.author_id = a.id
GROUP BY b.id;
-- Ultra-fast multi-field search
SELECT * FROM book_search_view
WHERE MATCH(title, description) AGAINST('wizard magic' IN BOOLEAN MODE)
OR author_names LIKE '%Rowling%'
ORDER BY relevance DESC;
-- Result: Complex search in 127ms! ๐
Query Type | Best Index Type | Example |
---|---|---|
Exact match | B-Tree | WHERE id = 123 |
Range queries | B-Tree | WHERE date > '2024-01-01'` |
Text search | Full-Text | WHERE title LIKE '%harry%' |
Sorting | Covering | ORDER BY date, amount |
Joins | Composite | JOIN ON a.id = b.a_id |
-- Partition sales by year for faster date queries
ALTER TABLE sales
PARTITION BY RANGE (YEAR(sale_date)) (
PARTITION p2022 VALUES LESS THAN (2023),
PARTITION p2023 VALUES LESS THAN (2024),
PARTITION p2024 VALUES LESS THAN (2025)
);
-- Force MySQL to use specific index
SELECT * FROM books USE INDEX (idx_books_genre)
WHERE genre = 'Mystery' AND year > 2020;
-- Pre-compute and index complex calculations
ALTER TABLE sales
ADD COLUMN month_year VARCHAR(7)
GENERATED ALWAYS AS (DATE_FORMAT(sale_date, '%Y-%m')) STORED;
CREATE INDEX idx_month_year ON sales(month_year);
Did you achieve all the performance targets? Here's how you did:
Ready to become a database performance wizard? Try these:
Congratulations on completing the Speed Up the Bookstore Search challenge! You've transformed those sluggish queries into lightning-fast searches! ๐