SQL (Structured Query Language) is the standard language for working with relational databases. Whether you're using PostgreSQL, MySQL, SQLite, or any other SQL database, understanding SELECT statements, JOINs, and GROUP BY operations is fundamental for data retrieval and analysis. This tutorial covers these core concepts with practical examples.
Database Setup for Examples
Let's start with sample tables to demonstrate SQL concepts:
sql
-- Users table
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
first_name VARCHAR(50),
last_name VARCHAR(50),
age INTEGER,
city VARCHAR(50),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Categories table
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(50) NOT NULL,
description TEXT
);
-- Posts table
CREATE TABLE posts (
id SERIAL PRIMARY KEY,
title VARCHAR(200) NOT NULL,
content TEXT,
user_id INTEGER REFERENCES users(id),
category_id INTEGER REFERENCES categories(id),
views INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
published BOOLEAN DEFAULT false
);
-- Comments table
CREATE TABLE comments (
id SERIAL PRIMARY KEY,
content TEXT NOT NULL,
user_id INTEGER REFERENCES users(id),
post_id INTEGER REFERENCES posts(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Sample data insertion
INSERT INTO users (username, email, first_name, last_name, age, city) VALUES
('john_doe', 'john@example.com', 'John', 'Doe', 28, 'New York'),
('jane_smith', 'jane@example.com', 'Jane', 'Smith', 32, 'Los Angeles'),
('bob_wilson', 'bob@example.com', 'Bob', 'Wilson', 25, 'Chicago'),
('alice_brown', 'alice@example.com', 'Alice', 'Brown', 29, 'New York');
INSERT INTO categories (name, description) VALUES
('Technology', 'Posts about technology and programming'),
('Travel', 'Travel experiences and tips'),
('Food', 'Recipes and restaurant reviews');
INSERT INTO posts (title, content, user_id, category_id, views, published) VALUES
('Getting Started with SQL', 'SQL basics for beginners', 1, 1, 150, true),
('Best Pizza in NYC', 'My favorite pizza places', 2, 3, 89, true),
('Learning PostgreSQL', 'Advanced PostgreSQL features', 1, 1, 203, true),
('Trip to Japan', 'Amazing experience in Tokyo', 3, 2, 67, false);
SELECT Statement Fundamentals
Basic SELECT Operations
sql
-- Select all columns from a table
SELECT * FROM users;
-- Select specific columns
SELECT username, email, age FROM users;
-- Select with column aliases
SELECT
username AS user_name,
email AS email_address,
CONCAT(first_name, ' ', last_name) AS full_name
FROM users;
-- Select with expressions and functions
SELECT
title,
LENGTH(title) AS title_length,
UPPER(title) AS title_uppercase,
created_at,
EXTRACT(YEAR FROM created_at) AS creation_year
FROM posts;
-- Select distinct values
SELECT DISTINCT city FROM users;
SELECT DISTINCT user_id, category_id FROM posts;
WHERE Clause and Filtering
sql
-- Basic WHERE conditions
SELECT * FROM users WHERE age > 30;
SELECT * FROM users WHERE city = 'New York';
SELECT * FROM posts WHERE published = true;
-- Multiple conditions with AND/OR
SELECT * FROM users
WHERE age > 25 AND city = 'New York';
SELECT * FROM posts
WHERE category_id = 1 OR views > 100;
-- IN and NOT IN operators
SELECT * FROM users
WHERE city IN ('New York', 'Los Angeles');
SELECT * FROM posts
WHERE category_id NOT IN (1, 2);
-- BETWEEN for ranges
SELECT * FROM users
WHERE age BETWEEN 25 AND 30;
-- LIKE for pattern matching
SELECT * FROM users
WHERE username LIKE 'j%'; -- Starts with 'j'
SELECT * FROM users
WHERE email LIKE '%@example.com'; -- Ends with '@example.com'
SELECT * FROM posts
WHERE title LIKE '%SQL%'; -- Contains 'SQL'
-- NULL checks
SELECT * FROM posts WHERE content IS NOT NULL;
SELECT * FROM users WHERE last_name IS NULL;
-- Complex conditions with parentheses
SELECT * FROM users
WHERE (age > 30 OR city = 'Chicago')
AND username LIKE '%_smith';
Sorting and Limiting Results
sql
-- ORDER BY for sorting SELECT * FROM users ORDER BY age; -- Ascending (default) SELECT * FROM users ORDER BY age DESC; -- Descending -- Multiple column sorting SELECT username, age, city FROM users ORDER BY city, age DESC; -- Sorting with expressions SELECT title, views, LENGTH(title) as title_length FROM posts ORDER BY LENGTH(title) DESC, views DESC; -- LIMIT and OFFSET for pagination SELECT * FROM posts ORDER BY views DESC LIMIT 5; -- Top 5 posts by views -- Pagination: Skip first 5 records, take next 5 SELECT * FROM users ORDER BY created_at DESC LIMIT 5 OFFSET 5; -- PostgreSQL alternative syntax SELECT * FROM users ORDER BY age LIMIT 3 OFFSET 2;
JOIN Operations
INNER JOIN
sql
-- Basic INNER JOIN
SELECT
users.username,
posts.title,
posts.views
FROM users
INNER JOIN posts ON users.id = posts.user_id;
-- Using table aliases for cleaner code
SELECT
u.username,
p.title,
p.views,
p.created_at
FROM users u
INNER JOIN posts p ON u.id = p.user_id
WHERE p.published = true;
-- Multiple JOINs
SELECT
u.username,
p.title,
c.name AS category_name,
p.views
FROM users u
INNER JOIN posts p ON u.id = p.user_id
INNER JOIN categories c ON p.category_id = c.id
ORDER BY p.views DESC;
-- Self JOIN example (users from same city)
SELECT
u1.username AS user1,
u2.username AS user2,
u1.city
FROM users u1
INNER JOIN users u2 ON u1.city = u2.city
WHERE u1.id < u2.id; -- Avoid duplicate pairs
LEFT JOIN (LEFT OUTER JOIN)
sql
-- LEFT JOIN - includes all users, even without posts
SELECT
u.username,
u.email,
p.title,
p.created_at
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
ORDER BY u.username;
-- Find users who haven't written any posts
SELECT
u.username,
u.email
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
WHERE p.id IS NULL;
-- Count posts per user (including users with 0 posts)
SELECT
u.username,
COUNT(p.id) AS post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username
ORDER BY post_count DESC;
RIGHT JOIN and FULL OUTER JOIN
sql
-- RIGHT JOIN - includes all posts, even if user doesn't exist
SELECT
u.username,
p.title,
p.views
FROM users u
RIGHT JOIN posts p ON u.id = p.user_id;
-- FULL OUTER JOIN - includes all records from both tables
SELECT
u.username,
p.title
FROM users u
FULL OUTER JOIN posts p ON u.id = p.user_id;
-- Find orphaned records
SELECT
u.username,
p.title
FROM users u
FULL OUTER JOIN posts p ON u.id = p.user_id
WHERE u.id IS NULL OR p.id IS NULL;
Complex JOIN Examples
sql
-- Posts with their authors and comment counts
SELECT
p.title,
u.username AS author,
c.name AS category,
COUNT(cm.id) AS comment_count,
p.views
FROM posts p
INNER JOIN users u ON p.user_id = u.id
INNER JOIN categories c ON p.category_id = c.id
LEFT JOIN comments cm ON p.id = cm.post_id
WHERE p.published = true
GROUP BY p.id, p.title, u.username, c.name, p.views
ORDER BY comment_count DESC, p.views DESC;
-- Users with their post and comment activity
SELECT
u.username,
u.city,
COUNT(DISTINCT p.id) AS posts_written,
COUNT(DISTINCT cm.id) AS comments_made,
COALESCE(AVG(p.views), 0) AS avg_post_views
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
LEFT JOIN comments cm ON u.id = cm.user_id
GROUP BY u.id, u.username, u.city
ORDER BY posts_written DESC, comments_made DESC;
GROUP BY and Aggregation Functions
Basic GROUP BY Operations
sql
-- Count users by city
SELECT
city,
COUNT(*) AS user_count
FROM users
GROUP BY city
ORDER BY user_count DESC;
-- Count posts by category
SELECT
c.name AS category_name,
COUNT(p.id) AS post_count
FROM categories c
LEFT JOIN posts p ON c.id = p.category_id
GROUP BY c.id, c.name
ORDER BY post_count DESC;
-- Average age by city
SELECT
city,
COUNT(*) AS user_count,
AVG(age) AS average_age,
MIN(age) AS youngest,
MAX(age) AS oldest
FROM users
GROUP BY city;
Advanced Aggregation Functions
sql
-- Post statistics by user
SELECT
u.username,
COUNT(p.id) AS total_posts,
SUM(p.views) AS total_views,
AVG(p.views) AS avg_views,
MAX(p.views) AS max_views,
MIN(p.created_at) AS first_post_date,
MAX(p.created_at) AS last_post_date
FROM users u
INNER JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username
HAVING COUNT(p.id) > 1 -- Only users with more than 1 post
ORDER BY total_views DESC;
-- Monthly post creation statistics
SELECT
EXTRACT(YEAR FROM created_at) AS year,
EXTRACT(MONTH FROM created_at) AS month,
COUNT(*) AS posts_created,
AVG(views) AS avg_views
FROM posts
GROUP BY EXTRACT(YEAR FROM created_at), EXTRACT(MONTH FROM created_at)
ORDER BY year, month;
-- String aggregation (PostgreSQL specific)
SELECT
c.name AS category,
STRING_AGG(p.title, ', ') AS post_titles,
COUNT(p.id) AS post_count
FROM categories c
INNER JOIN posts p ON c.id = p.category_id
GROUP BY c.id, c.name;
HAVING Clause
sql
-- HAVING vs WHERE: WHERE filters before grouping, HAVING filters after
SELECT
city,
COUNT(*) AS user_count,
AVG(age) AS avg_age
FROM users
WHERE age >= 25 -- Filter individual records first
GROUP BY city
HAVING COUNT(*) > 1 -- Then filter grouped results
ORDER BY user_count DESC;
-- Categories with high-view posts
SELECT
c.name AS category,
COUNT(p.id) AS post_count,
AVG(p.views) AS avg_views,
SUM(p.views) AS total_views
FROM categories c
INNER JOIN posts p ON c.id = p.category_id
GROUP BY c.id, c.name
HAVING AVG(p.views) > 100
ORDER BY avg_views DESC;
-- Users who are above-average posters
SELECT
u.username,
COUNT(p.id) AS post_count
FROM users u
INNER JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username
HAVING COUNT(p.id) > (
SELECT AVG(post_count)
FROM (
SELECT COUNT(p2.id) AS post_count
FROM users u2
INNER JOIN posts p2 ON u2.id = p2.user_id
GROUP BY u2.id
) AS subquery
);
Subqueries and Common Table Expressions
Subqueries in SELECT and WHERE
sql
-- Scalar subquery in SELECT
SELECT
username,
email,
(SELECT COUNT(*) FROM posts WHERE user_id = users.id) AS post_count
FROM users;
-- Subquery in WHERE clause
SELECT title, views
FROM posts
WHERE views > (SELECT AVG(views) FROM posts);
-- EXISTS subquery
SELECT u.username, u.email
FROM users u
WHERE EXISTS (
SELECT 1 FROM posts p
WHERE p.user_id = u.id AND p.published = true
);
-- NOT EXISTS for users without posts
SELECT u.username, u.email
FROM users u
WHERE NOT EXISTS (
SELECT 1 FROM posts p WHERE p.user_id = u.id
);
Common Table Expressions (CTEs)
sql
-- Basic CTE
WITH user_stats AS (
SELECT
u.id,
u.username,
COUNT(p.id) AS post_count,
COALESCE(SUM(p.views), 0) AS total_views
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username
)
SELECT
username,
post_count,
total_views,
CASE
WHEN total_views > 200 THEN 'High'
WHEN total_views > 50 THEN 'Medium'
ELSE 'Low'
END AS engagement_level
FROM user_stats
ORDER BY total_views DESC;
-- Multiple CTEs
WITH category_stats AS (
SELECT
c.id,
c.name,
COUNT(p.id) AS post_count,
AVG(p.views) AS avg_views
FROM categories c
LEFT JOIN posts p ON c.id = p.category_id
GROUP BY c.id, c.name
),
top_categories AS (
SELECT * FROM category_stats
WHERE post_count > 0
ORDER BY avg_views DESC
LIMIT 2
)
SELECT
tc.name,
tc.post_count,
ROUND(tc.avg_views, 2) AS avg_views
FROM top_categories tc;
Window Functions (Advanced)
sql
-- Row numbering and ranking
SELECT
username,
title,
views,
ROW_NUMBER() OVER (ORDER BY views DESC) AS row_num,
RANK() OVER (ORDER BY views DESC) AS rank,
DENSE_RANK() OVER (ORDER BY views DESC) AS dense_rank
FROM posts p
INNER JOIN users u ON p.user_id = u.id
WHERE p.published = true;
-- Partition by category
SELECT
c.name AS category,
p.title,
p.views,
ROW_NUMBER() OVER (PARTITION BY c.name ORDER BY p.views DESC) AS rank_in_category
FROM posts p
INNER JOIN categories c ON p.category_id = c.id
WHERE p.published = true
ORDER BY c.name, rank_in_category;
-- Running totals and moving averages
SELECT
created_at::DATE AS date,
views,
SUM(views) OVER (ORDER BY created_at) AS running_total,
AVG(views) OVER (ORDER BY created_at ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM posts
WHERE published = true
ORDER BY created_at;
Performance Tips and Best Practices
Query Optimization
sql
-- Use indexes for frequently queried columns CREATE INDEX idx_posts_user_id ON posts(user_id); CREATE INDEX idx_posts_category_published ON posts(category_id, published); CREATE INDEX idx_users_city ON users(city); -- Efficient WHERE clauses -- Good: Uses index SELECT * FROM posts WHERE user_id = 1; -- Less efficient: Function on indexed column SELECT * FROM users WHERE UPPER(username) = 'JOHN_DOE'; -- Better: Store normalized data or use functional index SELECT * FROM users WHERE username = 'john_doe'; -- Use LIMIT with large result sets SELECT p.title, u.username FROM posts p INNER JOIN users u ON p.user_id = u.id ORDER BY p.created_at DESC LIMIT 20; -- Avoid SELECT * in production -- Instead of: SELECT * FROM posts; SELECT id, title, user_id, created_at FROM posts;
Query Analysis
sql
-- Use EXPLAIN to analyze query performance EXPLAIN ANALYZE SELECT p.title, u.username, c.name FROM posts p INNER JOIN users u ON p.user_id = u.id INNER JOIN categories c ON p.category_id = c.id WHERE p.views > 100; -- Check query execution plan EXPLAIN (ANALYZE, BUFFERS) SELECT COUNT(*) FROM posts p INNER JOIN users u ON p.user_id = u.id WHERE u.city = 'New York';
Conclusion
Mastering SELECT, JOINs, and GROUP BY operations forms the foundation of SQL proficiency. These concepts enable you to retrieve, combine, and analyze data effectively from relational databases. Practice with different JOIN types to understand when to use each one, leverage GROUP BY for data aggregation and analysis, and always consider query performance when working with large datasets.
Remember that SQL skills build upon each other - understanding these fundamentals will prepare you for more advanced topics like stored procedures, triggers, and database optimization.