Practice and reinforce the concepts from Lesson 10
You're working on ProfilePlus, a user profile system handling 10,000 daily updates. The perfectly normalized database is creating performance bottlenecks when loading user profiles - each profile requires 6 table joins, causing 2-3 second load times.
The Challenge:
Your Mission: Apply strategic denormalization to create a fast user profile cache while maintaining data integrity.
What You'll Learn:
# Connect to your Docker PostgreSQL instance
docker exec -it web4_postgres psql -U student -d web4_dev
# Enable query timing
\timing on
-- Original normalized structure (slow but consistent)
CREATE TABLE users (
user_id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE user_profiles (
profile_id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(user_id),
display_name VARCHAR(100),
bio TEXT,
avatar_url VARCHAR(500)
);
CREATE TABLE user_stats (
user_id INTEGER REFERENCES users(user_id) PRIMARY KEY,
post_count INTEGER DEFAULT 0,
follower_count INTEGER DEFAULT 0,
following_count INTEGER DEFAULT 0,
last_active TIMESTAMPTZ
);
CREATE TABLE user_preferences (
user_id INTEGER REFERENCES users(user_id) PRIMARY KEY,
theme VARCHAR(20) DEFAULT 'light',
notifications_enabled BOOLEAN DEFAULT true,
privacy_level VARCHAR(20) DEFAULT 'public'
);
-- Insert sample users and related data
INSERT INTO users (username, email)
SELECT 'user_' || generate_series, 'user' || generate_series || '@profile.app'
FROM generate_series(1, 1000);
INSERT INTO user_profiles (user_id, display_name, bio, avatar_url)
SELECT user_id, 'User ' || user_id, 'Bio for user ' || user_id, 'avatar' || user_id || '.jpg'
FROM users;
INSERT INTO user_stats (user_id, post_count, follower_count, following_count, last_active)
SELECT user_id, (random() * 100)::INT, (random() * 500)::INT, (random() * 200)::INT, NOW() - (random() * 30)::INT * INTERVAL '1 day'
FROM users;
INSERT INTO user_preferences (user_id, theme, notifications_enabled, privacy_level)
SELECT user_id, 'light', true, 'public'
FROM users;
-- This normalized query is SLOW (4+ table joins)
EXPLAIN ANALYZE
SELECT
u.user_id,
u.username,
u.email,
p.display_name,
p.bio,
p.avatar_url,
s.post_count,
s.follower_count,
s.following_count,
s.last_active,
pr.theme,
pr.notifications_enabled
FROM users u
JOIN user_profiles p ON u.user_id = p.user_id
JOIN user_stats s ON u.user_id = s.user_id
JOIN user_preferences pr ON u.user_id = pr.user_id
WHERE u.user_id = 100;
-- Record execution time: _____ ms
-- Create denormalized table for fast profile loading
CREATE TABLE user_profile_cache (
user_id INTEGER PRIMARY KEY,
username VARCHAR(50), -- From users table
email VARCHAR(100), -- From users table
display_name VARCHAR(100), -- From user_profiles
bio TEXT, -- From user_profiles
avatar_url VARCHAR(500), -- From user_profiles
post_count INTEGER, -- From user_stats
follower_count INTEGER, -- From user_stats
following_count INTEGER, -- From user_stats
last_active TIMESTAMPTZ, -- From user_stats
theme VARCHAR(20), -- From user_preferences
notifications_enabled BOOLEAN, -- From user_preferences
cache_updated TIMESTAMPTZ DEFAULT NOW()
);
-- Populate the cache from normalized tables
INSERT INTO user_profile_cache (
user_id, username, email, display_name, bio, avatar_url,
post_count, follower_count, following_count, last_active,
theme, notifications_enabled
)
SELECT
u.user_id,
u.username,
u.email,
p.display_name,
p.bio,
p.avatar_url,
s.post_count,
s.follower_count,
s.following_count,
s.last_active,
pr.theme,
pr.notifications_enabled
FROM users u
JOIN user_profiles p ON u.user_id = p.user_id
JOIN user_stats s ON u.user_id = s.user_id
JOIN user_preferences pr ON u.user_id = pr.user_id;
-- NEW: Super fast single-table query
EXPLAIN ANALYZE
SELECT
user_id,
username,
display_name,
bio,
avatar_url,
post_count,
follower_count,
theme
FROM user_profile_cache
WHERE user_id = 100;
-- Record the new execution time: _____ ms
-- Performance improvement: _____ x faster
-- Compare query performance
\timing on
-- Test normalized query (multiple joins)
SELECT 'Normalized Query Test';
SELECT COUNT(*) FROM (
SELECT u.username, p.display_name, s.post_count
FROM users u
JOIN user_profiles p ON u.user_id = p.user_id
JOIN user_stats s ON u.user_id = s.user_id
WHERE u.user_id BETWEEN 1 AND 100
) results;
-- Test denormalized query (single table)
SELECT 'Denormalized Query Test';
SELECT COUNT(*) FROM (
SELECT username, display_name, post_count
FROM user_profile_cache
WHERE user_id BETWEEN 1 AND 100
) results;
-- Function to sync cache when profile data changes
CREATE OR REPLACE FUNCTION sync_user_profile_cache()
RETURNS TRIGGER AS $
BEGIN
-- Update the cache when any profile data changes
UPDATE user_profile_cache
SET
display_name = (SELECT display_name FROM user_profiles WHERE user_id = NEW.user_id),
bio = (SELECT bio FROM user_profiles WHERE user_id = NEW.user_id),
avatar_url = (SELECT avatar_url FROM user_profiles WHERE user_id = NEW.user_id),
cache_updated = NOW()
WHERE user_id = NEW.user_id;
RETURN NEW;
END;
$ LANGUAGE plpgsql;
-- Trigger to keep cache synchronized
CREATE TRIGGER profile_cache_sync
AFTER UPDATE ON user_profiles
FOR EACH ROW
EXECUTE FUNCTION sync_user_profile_cache();
-- Update a user profile
UPDATE user_profiles
SET display_name = 'Updated Display Name'
WHERE user_id = 100;
-- Verify cache was updated
SELECT
user_id,
display_name,
cache_updated
FROM user_profile_cache
WHERE user_id = 100;
-- Check if timestamp shows recent update
SELECT
'Cache sync test' as test,
CASE
WHEN cache_updated > NOW() - INTERVAL '1 minute'
THEN 'PASSED - Cache updated recently'
ELSE 'FAILED - Cache not synchronized'
END as result
FROM user_profile_cache
WHERE user_id = 100;
-- Check table sizes
SELECT
tablename,
pg_size_pretty(pg_total_relation_size(tablename)) as size
FROM pg_tables
WHERE tablename IN ('users', 'user_profiles', 'user_stats', 'user_preferences', 'user_profile_cache')
ORDER BY pg_total_relation_size(tablename) DESC;
-- Calculate storage trade-off
WITH storage_analysis AS (
SELECT
SUM(pg_total_relation_size(tablename)) as normalized_size
FROM pg_tables
WHERE tablename IN ('users', 'user_profiles', 'user_stats', 'user_preferences')
UNION ALL
SELECT pg_total_relation_size('user_profile_cache') as cache_size
)
SELECT
pg_size_pretty(normalized_size) as original_tables_size,
pg_size_pretty(cache_size) as cache_size,
ROUND((cache_size::NUMERIC / normalized_size * 100), 1) as cache_overhead_percent
FROM (SELECT * FROM storage_analysis LIMIT 1) normalized,
(SELECT * FROM storage_analysis OFFSET 1) cache;
-- 1. Performance improvement verification
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM user_profile_cache WHERE user_id = 50;
-- 2. Data synchronization verification
UPDATE user_profiles SET bio = 'Test sync' WHERE user_id = 50;
SELECT bio, cache_updated FROM user_profile_cache WHERE user_id = 50;
-- 3. Storage analysis
SELECT
'Storage overhead acceptable' as metric,
pg_size_pretty(pg_total_relation_size('user_profile_cache')) as cache_size,
'vs better query performance' as trade_off;
✅ Success Criteria:
Read-Heavy Workloads: When you query data much more than you update it
Performance Bottlenecks: When normalization creates unacceptable delays
Selective Denormalization: Only denormalize the problematic queries
Cache-Based Approach: Treat denormalized data as a cache layer
Benefits: Faster queries, reduced server load, better user experience Costs: More storage space, complex synchronization, potential inconsistency Decision factors: Query frequency, data update frequency, performance requirements
/* Reference links removed - files don't exist **Reference Links:** - [Database Denormalization Patterns](./Reference/denormalization-patterns.md) - [Performance Optimization Strategies](./Reference/performance-optimization.md) - [PostgreSQL Performance Tuning](./Reference/postgresql-tuning.md) */Complete this activity and submit your work through the Activity Submission Form