SELECT * FROM netflix_titles LIMIT 5;
PRAGMA table_info(netflix_titles);Assume netflix_titles has:
title_id, title, genre, rating, imdb_score, release_year, duration_mins,
is_original, is_documentary, is_canceled, country, language
-- What genres exist?
SELECT DISTINCT genre
FROM netflix_titles;
-- What countries produce content?
SELECT DISTINCT country
FROM netflix_titles;
-- What rating categories are used?
SELECT DISTINCT rating
FROM netflix_titles
ORDER BY rating;-- Unique genre + country combinations
SELECT DISTINCT genre, country
FROM netflix_titles
ORDER BY genre, country;
-- Unique genre + language pairs
SELECT DISTINCT genre, language
FROM netflix_titles;DISTINCT applies to the combination of all selected columns, not just the first one.
-- How many unique genres are there?
SELECT COUNT(DISTINCT genre) AS unique_genres
FROM netflix_titles;
-- How many unique countries produce content?
SELECT COUNT(DISTINCT country) AS producing_countries
FROM netflix_titles;
-- How many unique release years?
SELECT COUNT(DISTINCT release_year) AS years_on_platform
FROM netflix_titles;-- These produce the same result
SELECT DISTINCT genre FROM netflix_titles;
SELECT genre FROM netflix_titles GROUP BY genre;
-- Use DISTINCT for simple deduplication
-- Use GROUP BY when you also need aggregates (count, avg, etc.)-- Count titles per genre
SELECT
genre,
COUNT(*) AS title_count
FROM netflix_titles
GROUP BY genre;-- Genre breakdown with multiple metrics
SELECT
genre,
COUNT(*) AS total_titles,
ROUND(AVG(imdb_score), 2) AS avg_score,
ROUND(AVG(duration_mins), 0) AS avg_duration,
MIN(release_year) AS earliest,
MAX(release_year) AS latest
FROM netflix_titles
GROUP BY genre;-- Count titles per genre AND per country
SELECT
genre,
country,
COUNT(*) AS title_count
FROM netflix_titles
GROUP BY genre, country
ORDER BY genre, title_count DESC;
-- Count per release year AND is_original flag
SELECT
release_year,
is_original,
COUNT(*) AS title_count
FROM netflix_titles
GROUP BY release_year, is_original
ORDER BY release_year DESC;Every column in SELECT that is NOT an aggregate must appear in GROUP BY.
-- ❌ Wrong — genre not in GROUP BY
SELECT genre, country, COUNT(*)
FROM netflix_titles
GROUP BY country;
-- ✅ Correct
SELECT genre, country, COUNT(*)
FROM netflix_titles
GROUP BY genre, country;-- Count titles by score tier
SELECT
CASE
WHEN imdb_score >= 8.5 THEN 'Must Watch'
WHEN imdb_score >= 7.0 THEN 'Worth Watching'
WHEN imdb_score >= 5.5 THEN 'Average'
ELSE 'Skip It'
END AS tier,
COUNT(*) AS title_count
FROM netflix_titles
GROUP BY
CASE
WHEN imdb_score >= 8.5 THEN 'Must Watch'
WHEN imdb_score >= 7.0 THEN 'Worth Watching'
WHEN imdb_score >= 5.5 THEN 'Average'
ELSE 'Skip It'
END;SQLite requires repeating the full CASE in GROUP BY. You cannot use the alias
tierin GROUP BY.
-- Only genres with more than 20 titles
SELECT
genre,
COUNT(*) AS title_count
FROM netflix_titles
GROUP BY genre
HAVING COUNT(*) > 20;
-- Genres where average IMDb score is above 7.5
SELECT
genre,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
HAVING AVG(imdb_score) > 7.5;
-- Genres with more than 10 titles AND avg score above 7.0
SELECT
genre,
COUNT(*) AS total,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
HAVING COUNT(*) > 10
AND AVG(imdb_score) > 7.0;-- WHERE filters rows BEFORE grouping
-- HAVING filters groups AFTER grouping
-- Step 1: filter to originals only (WHERE)
-- Step 2: group by genre
-- Step 3: keep genres with 5+ originals (HAVING)
SELECT
genre,
COUNT(*) AS original_count
FROM netflix_titles
WHERE is_original = 'Y' -- ← filters rows first
GROUP BY genre
HAVING COUNT(*) >= 5 -- ← filters groups after
ORDER BY original_count DESC;Execution order:
1. FROM → load table
2. WHERE → filter individual rows
3. GROUP BY → group remaining rows
4. HAVING → filter groups
5. SELECT → compute output columns
6. ORDER BY → sort
7. LIMIT → cut to N rows
-- Highest rated titles first
SELECT title, imdb_score
FROM netflix_titles
ORDER BY imdb_score DESC;
-- Oldest titles first
SELECT title, release_year
FROM netflix_titles
ORDER BY release_year ASC; -- ASC is default, can be omitted
-- Alphabetical by title
SELECT title, genre
FROM netflix_titles
ORDER BY title;-- Sort by genre A→Z, then by score high→low within each genre
SELECT title, genre, imdb_score
FROM netflix_titles
ORDER BY genre ASC, imdb_score DESC;
-- Sort by release year newest first, then alphabetically within same year
SELECT title, release_year, genre
FROM netflix_titles
ORDER BY release_year DESC, title ASC;
-- Sort by country, then genre, then score
SELECT title, country, genre, imdb_score
FROM netflix_titles
ORDER BY country, genre, imdb_score DESC;-- Genres by title count, most popular first
SELECT
genre,
COUNT(*) AS title_count,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
ORDER BY title_count DESC;
-- Genres by average score, highest first
SELECT
genre,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
ORDER BY avg_score DESC;
-- In SQLite you can ORDER BY an aggregate not in SELECT
SELECT genre
FROM netflix_titles
GROUP BY genre
ORDER BY COUNT(*) DESC;-- Custom priority: Originals first, then by score
SELECT title, genre, is_original, imdb_score
FROM netflix_titles
ORDER BY
CASE is_original
WHEN 'Y' THEN 1
ELSE 2
END,
imdb_score DESC;
-- Custom genre order
SELECT title, genre
FROM netflix_titles
ORDER BY
CASE genre
WHEN 'Drama' THEN 1
WHEN 'Comedy' THEN 2
WHEN 'Documentary' THEN 3
ELSE 4
END,
title ASC;-- NULLs sort LAST in ASC (SQLite default)
SELECT title, imdb_score
FROM netflix_titles
ORDER BY imdb_score ASC; -- NULLs appear at bottom
-- NULLs sort FIRST in DESC
SELECT title, imdb_score
FROM netflix_titles
ORDER BY imdb_score DESC; -- NULLs appear at top
-- Force NULLs to always sort last regardless of direction
SELECT title, imdb_score
FROM netflix_titles
ORDER BY
CASE WHEN imdb_score IS NULL THEN 1 ELSE 0 END,
imdb_score DESC;-- Top 10 highest rated titles
SELECT title, imdb_score
FROM netflix_titles
ORDER BY imdb_score DESC
LIMIT 10;
-- 5 most recent releases
SELECT title, release_year
FROM netflix_titles
ORDER BY release_year DESC
LIMIT 5;
-- Any 1 row (quick sanity check)
SELECT * FROM netflix_titles LIMIT 1;-- Page 1: rows 1–10
SELECT title, imdb_score
FROM netflix_titles
ORDER BY imdb_score DESC
LIMIT 10 OFFSET 0;
-- Page 2: rows 11–20
SELECT title, imdb_score
FROM netflix_titles
ORDER BY imdb_score DESC
LIMIT 10 OFFSET 10;
-- Page 3: rows 21–30
SELECT title, imdb_score
FROM netflix_titles
ORDER BY imdb_score DESC
LIMIT 10 OFFSET 20;Formula:
OFFSET = (page_number - 1) * page_size
-- Top 5 genres by title count
SELECT
genre,
COUNT(*) AS title_count
FROM netflix_titles
GROUP BY genre
ORDER BY title_count DESC
LIMIT 5;
-- Top 3 genres by average score
SELECT
genre,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
ORDER BY avg_score DESC
LIMIT 3;-- Top 2 titles per genre by IMDb score
SELECT title, genre, imdb_score
FROM (
SELECT
title,
genre,
imdb_score,
ROW_NUMBER() OVER (PARTITION BY genre ORDER BY imdb_score DESC) AS rn
FROM netflix_titles
)
WHERE rn <= 2
ORDER BY genre, imdb_score DESC;LIMIT alone can't do "top N per group" — you need a window function. LIMIT applies to the whole result, not within each group.
-- Top 5 genres (with 10+ titles) ranked by avg score
-- showing count, avg score, and % that are originals
SELECT
genre,
COUNT(*) AS total_titles,
ROUND(AVG(imdb_score), 2) AS avg_score,
SUM(CASE WHEN is_original = 'Y' THEN 1 ELSE 0 END) AS originals,
ROUND(
SUM(CASE WHEN is_original = 'Y' THEN 1 ELSE 0 END) * 100.0
/ COUNT(*), 1
) AS pct_original
FROM netflix_titles
WHERE is_canceled = 'N' -- exclude canceled titles
GROUP BY genre
HAVING COUNT(*) >= 10 -- only genres with 10+ titles
ORDER BY avg_score DESC -- best rated genres first
LIMIT 5; -- top 5 only-- Unique countries, count of titles each produced, sorted
SELECT
country,
COUNT(DISTINCT genre) AS genres_produced,
COUNT(*) AS total_titles,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
WHERE country IS NOT NULL
GROUP BY country
ORDER BY total_titles DESC
LIMIT 10;| Clause | Purpose | Key Rule |
|---|---|---|
DISTINCT |
Remove duplicate rows | Applies to all selected columns together |
COUNT(DISTINCT col) |
Count unique values | Skips NULLs |
GROUP BY col |
Group rows for aggregation | Every non-aggregate SELECT col must be here |
GROUP BY col1, col2 |
Group by combination | Both columns define the group |
HAVING condition |
Filter groups | Use aggregates here, not aliases |
ORDER BY col DESC |
Sort results | Default is ASC |
ORDER BY col1, col2 |
Sort by multiple | Applied left to right |
LIMIT N |
Keep top N rows | Always pair with ORDER BY |
LIMIT N OFFSET M |
Pagination | Skip M rows, return next N |
Write order → Execution order
───────────────────────────────────────────────────────
SELECT 5 ← computed after everything else
FROM 1 ← load the table
WHERE 2 ← filter individual rows
GROUP BY 3 ← bundle rows into groups
HAVING 4 ← filter groups
ORDER BY 6 ← sort the output
LIMIT 7 ← cut to N rows
Consequence:
WHERE → can't use aggregate functions (grouping hasn't happened yet)
HAVING → use aggregate functions here instead
ORDER BY → can reference aliases from SELECT (happens after SELECT)
WHERE → cannot reference SELECT aliases (happens before SELECT)
DISTINCT vs GROUP BY:
DISTINCT → just deduplicate, no aggregation needed
GROUP BY → deduplicate AND compute aggregates
LIMIT without ORDER BY:
Returns arbitrary rows — no guarantee which N you get
Always pair LIMIT with ORDER BY for meaningful results
Top N per group:
LIMIT alone → top N from entire table
ROW_NUMBER() → top N within each group (use a subquery)