SELECT * FROM netflix_titles LIMIT 5;
SELECT * FROM netflix_directors LIMIT 5;
SELECT * FROM netflix_reviews LIMIT 5;
SELECT * FROM netflix_awards LIMIT 5;Assume the following tables:
netflix_titles
title_id, title, genre, rating, imdb_score,
release_year, duration_mins, is_original,
is_canceled, country, language
netflix_directors
director_id, title_id, director_name, nationality
netflix_reviews
review_id, title_id, reviewer, score, review_date
netflix_awards
award_id, title_id, award_name, award_year, won
A subquery is a SELECT statement nested inside another query.
It runs first, and its result is used by the outer query.
Three places a subquery can live:
WHERE → filters rows based on subquery result
FROM → acts as a temporary table (inline view)
SELECT → computes a value per row (scalar subquery)
-- Titles with above-average IMDb score
SELECT title, genre, imdb_score
FROM netflix_titles
WHERE imdb_score > (SELECT AVG(imdb_score) FROM netflix_titles)
ORDER BY imdb_score DESC;Inner query runs first:
SELECT AVG(imdb_score) FROM netflix_titles → 7.43
Outer query becomes:
WHERE imdb_score > 7.43
-- Titles longer than the average duration
SELECT title, genre, duration_mins
FROM netflix_titles
WHERE duration_mins > (SELECT AVG(duration_mins) FROM netflix_titles)
ORDER BY duration_mins DESC;
-- Titles released after the median release year
SELECT title, release_year
FROM netflix_titles
WHERE release_year > (
SELECT AVG(release_year)
FROM netflix_titles
)
ORDER BY release_year;-- Titles that have won at least one award
SELECT title, genre, imdb_score
FROM netflix_titles
WHERE title_id IN (
SELECT title_id
FROM netflix_awards
WHERE won = 'Y'
)
ORDER BY imdb_score DESC;
-- Titles that have at least one review
SELECT title, genre
FROM netflix_titles
WHERE title_id IN (
SELECT DISTINCT title_id
FROM netflix_reviews
);-- Titles that have NEVER won an award
SELECT title, genre, imdb_score
FROM netflix_titles
WHERE title_id NOT IN (
SELECT title_id
FROM netflix_awards
WHERE won = 'Y'
)
ORDER BY imdb_score DESC;
-- Titles with no reviews at all
SELECT title, genre
FROM netflix_titles
WHERE title_id NOT IN (
SELECT DISTINCT title_id
FROM netflix_reviews
);
⚠️ NOT IN fails silently when the subquery contains NULLs. Iftitle_idcan be NULL in the subquery result, use NOT EXISTS instead.
-- ✅ Safer alternative using NOT EXISTS
SELECT title, genre
FROM netflix_titles t
WHERE NOT EXISTS (
SELECT 1
FROM netflix_reviews r
WHERE r.title_id = t.title_id
);-- Titles that have at least one 5-star review
SELECT t.title, t.genre
FROM netflix_titles t
WHERE EXISTS (
SELECT 1
FROM netflix_reviews r
WHERE r.title_id = t.title_id
AND r.score = 5.0
);
-- Titles with no director on record
SELECT t.title, t.genre
FROM netflix_titles t
WHERE NOT EXISTS (
SELECT 1
FROM netflix_directors d
WHERE d.title_id = t.title_id
);EXISTS returns TRUE if the subquery produces at least one row.
SELECT 1 is a convention — the actual value doesn't matter,
only whether a row exists.
-- Average score per genre, then filter in outer query
SELECT genre, avg_score
FROM (
SELECT
genre,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
) AS genre_stats
WHERE avg_score > 7.5
ORDER BY avg_score DESC;Inner query builds a temporary table called genre_stats.
Outer query treats it exactly like a real table.
The alias (genre_stats) is required in SQLite.
-- Count how many genres have an avg score above 7.0
SELECT COUNT(*) AS qualifying_genres
FROM (
SELECT genre, AVG(imdb_score) AS avg_score
FROM netflix_titles
GROUP BY genre
HAVING AVG(imdb_score) > 7.0
) AS high_rated_genres;-- Top 2 titles per genre by IMDb score
SELECT title, genre, imdb_score, rn
FROM (
SELECT
title,
genre,
imdb_score,
ROW_NUMBER() OVER (PARTITION BY genre ORDER BY imdb_score DESC) AS rn
FROM netflix_titles
) AS ranked_titles
WHERE rn <= 2
ORDER BY genre, imdb_score DESC;-- Show each title's score vs the overall average
SELECT
title,
genre,
imdb_score,
(SELECT ROUND(AVG(imdb_score), 2) FROM netflix_titles) AS overall_avg,
ROUND(imdb_score - (SELECT AVG(imdb_score) FROM netflix_titles), 2) AS diff_from_avg
FROM netflix_titles
ORDER BY diff_from_avg DESC;-- Each title's score vs the average score for its genre
SELECT
t.title,
t.genre,
t.imdb_score,
(
SELECT ROUND(AVG(imdb_score), 2)
FROM netflix_titles
WHERE genre = t.genre -- ← references outer query alias t
) AS genre_avg
FROM netflix_titles t
ORDER BY t.genre, t.imdb_score DESC;A correlated subquery runs once per row in the outer query.
It references the outer table (t.genre) to compute a row-specific result.
This is powerful but slower on large tables — CTEs or window functions
are often more efficient for this pattern.
A CTE is a named, temporary result set defined at the top of a query.
It works like a subquery but is declared once and reused.
Syntax:
WITH cte_name AS (
SELECT ...
)
SELECT ... FROM cte_name;
-- Genres with average score above 7.5
WITH genre_stats AS (
SELECT
genre,
COUNT(*) AS total_titles,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
)
SELECT *
FROM genre_stats
WHERE avg_score > 7.5
ORDER BY avg_score DESC;-- Step 1: genre stats
-- Step 2: award counts per genre via title join
-- Step 3: combine
WITH genre_stats AS (
SELECT
genre,
COUNT(*) AS total_titles,
ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
),
award_counts AS (
SELECT
t.genre,
COUNT(a.award_id) AS nominations,
SUM(CASE WHEN a.won = 'Y' THEN 1 ELSE 0 END) AS wins
FROM netflix_titles t
LEFT JOIN netflix_awards a ON t.title_id = a.title_id
GROUP BY t.genre
)
SELECT
g.genre,
g.total_titles,
g.avg_score,
a.nominations,
a.wins
FROM genre_stats g
JOIN award_counts a ON g.genre = a.genre
ORDER BY g.avg_score DESC;-- Top 3 titles per genre by IMDb score
WITH ranked AS (
SELECT
title,
genre,
imdb_score,
ROW_NUMBER() OVER (PARTITION BY genre ORDER BY imdb_score DESC) AS rn
FROM netflix_titles
)
SELECT title, genre, imdb_score
FROM ranked
WHERE rn <= 3
ORDER BY genre, imdb_score DESC;-- Without CTE — repeat the CASE expression twice
SELECT
CASE
WHEN imdb_score >= 8.5 THEN 'Must Watch'
WHEN imdb_score >= 7.0 THEN 'Worth Watching'
ELSE 'Other'
END AS tier,
COUNT(*) AS cnt
FROM netflix_titles
GROUP BY
CASE
WHEN imdb_score >= 8.5 THEN 'Must Watch'
WHEN imdb_score >= 7.0 THEN 'Worth Watching'
ELSE 'Other'
END;
-- ✅ With CTE — define once, reference cleanly
WITH scored AS (
SELECT
title,
imdb_score,
CASE
WHEN imdb_score >= 8.5 THEN 'Must Watch'
WHEN imdb_score >= 7.0 THEN 'Worth Watching'
ELSE 'Other'
END AS tier
FROM netflix_titles
)
SELECT tier, COUNT(*) AS cnt
FROM scored
GROUP BY tier
ORDER BY cnt DESC;-- Correlated subquery version (slower)
SELECT
t.title,
t.genre,
t.imdb_score,
(SELECT AVG(imdb_score) FROM netflix_titles WHERE genre = t.genre) AS genre_avg
FROM netflix_titles t;
-- ✅ CTE version (cleaner, faster)
WITH genre_avgs AS (
SELECT genre, ROUND(AVG(imdb_score), 2) AS genre_avg
FROM netflix_titles
GROUP BY genre
)
SELECT
t.title,
t.genre,
t.imdb_score,
g.genre_avg,
ROUND(t.imdb_score - g.genre_avg, 2) AS diff_from_genre_avg
FROM netflix_titles t
JOIN genre_avgs g ON t.genre = g.genre
ORDER BY diff_from_genre_avg DESC;-- Build a full title report step by step
WITH base AS (
SELECT title_id, title, genre, imdb_score, is_original
FROM netflix_titles
WHERE is_canceled = 'N'
),
review_summary AS (
SELECT
title_id,
COUNT(*) AS review_count,
ROUND(AVG(score), 2) AS avg_review_score
FROM netflix_reviews
GROUP BY title_id
),
award_summary AS (
SELECT
title_id,
COUNT(*) AS nominations,
SUM(CASE WHEN won = 'Y' THEN 1 ELSE 0 END) AS wins
FROM netflix_awards
GROUP BY title_id
)
SELECT
b.title,
b.genre,
b.imdb_score,
b.is_original,
COALESCE(r.review_count, 0) AS reviews,
COALESCE(r.avg_review_score, 0) AS avg_review,
COALESCE(a.nominations, 0) AS nominations,
COALESCE(a.wins, 0) AS wins
FROM base b
LEFT JOIN review_summary r ON b.title_id = r.title_id
LEFT JOIN award_summary a ON b.title_id = a.title_id
ORDER BY b.imdb_score DESC;-- Goal: find genres where avg score > overall avg score
-- Style 1: Nested subquery
SELECT genre, ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
HAVING AVG(imdb_score) > (SELECT AVG(imdb_score) FROM netflix_titles);
-- Style 2: FROM subquery
SELECT genre, avg_score
FROM (
SELECT genre, ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
) AS stats
WHERE avg_score > (SELECT AVG(imdb_score) FROM netflix_titles);
-- Style 3: CTE (most readable)
WITH overall AS (
SELECT ROUND(AVG(imdb_score), 2) AS overall_avg
FROM netflix_titles
),
genre_stats AS (
SELECT genre, ROUND(AVG(imdb_score), 2) AS avg_score
FROM netflix_titles
GROUP BY genre
)
SELECT g.genre, g.avg_score, o.overall_avg
FROM genre_stats g, overall o
WHERE g.avg_score > o.overall_avg
ORDER BY g.avg_score DESC;| Pattern | Syntax | Use When |
|---|---|---|
| Scalar WHERE subquery | WHERE col > (SELECT AVG(...)) |
Compare to a single computed value |
| IN subquery | WHERE id IN (SELECT id FROM ...) |
Filter against a list from another table |
| NOT IN subquery | WHERE id NOT IN (SELECT id FROM ...) |
Find rows with no match — watch for NULLs |
| EXISTS | WHERE EXISTS (SELECT 1 FROM ... WHERE ...) |
Check if any related row exists |
| NOT EXISTS | WHERE NOT EXISTS (SELECT 1 ...) |
Safer alternative to NOT IN with NULLs |
| FROM subquery | FROM (SELECT ...) AS alias |
Use a derived table — alias required |
| SELECT subquery | SELECT (SELECT AVG(...)) |
Add a single computed value per row |
| Correlated subquery | Inner query references outer alias | Row-by-row computation — use CTE instead |
| Basic CTE | WITH name AS (SELECT ...) SELECT ... |
Name a subquery for readability |
| Multiple CTEs | WITH a AS (...), b AS (...) SELECT ... |
Break a complex query into steps |
Subquery vs CTE — choose by situation:
Subquery in WHERE → filter based on another query's result
Subquery in FROM → use a derived table (must alias it)
Subquery in SELECT → add one computed value per row
CTE → name a subquery and reuse it cleanly
When to prefer CTE over subquery:
✅ Logic is reused more than once
✅ Query has multiple steps that build on each other
✅ Correlated subquery is slowing things down
✅ Readability matters (debugging, collaboration)
NOT IN vs NOT EXISTS:
NOT IN → breaks silently if subquery has NULLs
NOT EXISTS → safe with NULLs — use this by default
CTE execution order:
WITH cte1 AS (...) ← defined first
WITH cte2 AS (...) ← can reference cte1
SELECT FROM cte1 ← final query uses both
JOIN cte2 ...