# zip pairs up elements from multiple iterables position by position
# and returns an iterator of tuples
names = ['Alice', 'Bob', 'Carol']
scores = [95, 82, 78]
list(zip(names, scores))
# → [('Alice', 95), ('Bob', 82), ('Carol', 78)]Think of zip like a physical zipper:
names → A B C
↕ ↕ ↕
scores → 95 82 78
result → (A,95) (B,82) (C,78)
titles = ['Squid Game', 'Ozark', 'Roma']
ratings = [8.0, 8.4, 7.7]
for title, rating in zip(titles, ratings):
print(f"{title}: {rating}")
# Squid Game: 8.0
# Ozark: 8.4
# Roma: 7.7titles = ['Squid Game', 'Ozark', 'Roma']
ratings = [8.0, 8.4, 7.7]
genres = ['Thriller', 'Crime', 'Drama']
years = [2021, 2017, 2018]
for title, rating, genre, year in zip(titles, ratings, genres, years):
print(f"{year} | {title} ({genre}) — {rating}")
# 2021 | Squid Game (Thriller) — 8.0
# 2017 | Ozark (Crime) — 8.4
# 2018 | Roma (Drama) — 7.7long = [1, 2, 3, 4, 5]
short = ['a', 'b', 'c']
list(zip(long, short))
# → [(1,'a'), (2,'b'), (3,'c')] ← 4 and 5 dropped silently
⚠️ If lengths differ, extra elements are silently dropped. Useitertools.zip_longest()if you need all elements.
from itertools import zip_longest
list(zip_longest(long, short, fillvalue='N/A'))
# → [(1,'a'), (2,'b'), (3,'c'), (4,'N/A'), (5,'N/A')]# Common pattern — much cleaner than a manual loop
columns = ['title', 'genre', 'score', 'year']
values = ['Squid Game', 'Thriller', 8.0, 2021]
record = dict(zip(columns, values))
# → {'title': 'Squid Game', 'genre': 'Thriller', 'score': 8.0, 'year': 2021}
# Build lookup table
titles = ['Squid Game', 'Ozark', 'Roma']
scores = [8.0, 8.4, 7.7]
score_lookup = dict(zip(titles, scores))
# → {'Squid Game': 8.0, 'Ozark': 8.4, 'Roma': 7.7}
score_lookup['Ozark'] # → 8.4headers = ['title', 'genre', 'score']
rows = [
['Squid Game', 'Thriller', 8.0],
['Ozark', 'Crime', 8.4],
['Roma', 'Drama', 7.7],
]
# Convert each row into a dict using zip
records = [dict(zip(headers, row)) for row in rows]
# → [{'title': 'Squid Game', 'genre': 'Thriller', 'score': 8.0},
# {'title': 'Ozark', 'genre': 'Crime', 'score': 8.4},
# {'title': 'Roma', 'genre': 'Drama', 'score': 7.7}]predicted = [8.0, 7.5, 9.0, 6.5]
actual = [7.8, 7.9, 8.8, 6.0]
# Calculate error per prediction
errors = [abs(p - a) for p, a in zip(predicted, actual)]
# → [0.2, 0.4, 0.2, 0.5]
# Flag where prediction was too high
overestimates = [(p, a) for p, a in zip(predicted, actual) if p > a]
# → [(8.0, 7.8), (9.0, 8.8), (6.5, 6.0)]
# Find improvements between two time periods
last_week = [6.5, 7.0, 8.0, 5.5]
this_week = [7.0, 6.8, 8.2, 5.5]
improved = [t > l for l, t in zip(last_week, this_week)]
# → [True, False, True, False]import pandas as pd
df = pd.DataFrame({
'title': ['Squid Game', 'Ozark', 'Roma'],
'score': [8.0, 8.4, 7.7]
})
# zip column names with their data type for reporting
for col, dtype in zip(df.columns, df.dtypes):
print(f"{col:15} → {dtype}")
# title → object
# score → float64prices = [9.99, 14.99, 4.99, 19.99]
quantities = [3, 1, 5, 2]
# Total cost per item
totals = [p * q for p, q in zip(prices, quantities)]
# → [29.97, 14.99, 24.95, 39.98]
grand_total = sum(totals)
# → 109.89
# Same with NumPy — zip not needed (vectorized)
import numpy as np
(np.array(prices) * np.array(quantities)).sum() # → 109.89matrix = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
]
# Unpack rows and zip them together → columns become rows
transposed = list(zip(*matrix))
# → [(1, 4, 7),
# (2, 5, 8),
# (3, 6, 9)]
# Convert to list of lists
transposed = [list(row) for row in zip(*matrix)]scores = [6.0, 7.5, 8.0, 7.0, 9.0, 8.5]
# Consecutive pairs — zip list with itself offset by 1
pairs = list(zip(scores, scores[1:]))
# → [(6.0,7.5), (7.5,8.0), (8.0,7.0), (7.0,9.0), (9.0,8.5)]
# Calculate change between consecutive scores
changes = [b - a for a, b in zip(scores, scores[1:])]
# → [1.5, 0.5, -1.0, 2.0, -0.5]
# Find where score dropped
drops = [(i+1, a, b) for i, (a, b) in enumerate(zip(scores, scores[1:])) if b < a]
# → [(3, 8.0, 7.0), (5, 9.0, 8.5)]
# index, from, totitles = ['Squid Game', 'Ozark', 'Roma', 'Bird Box']
# With enumerate (more common for just indexing)
for i, title in enumerate(titles, start=1):
print(f"{i}. {title}")
# With zip — when you have a SEPARATE rank/label list
ranks = ['#1', '#2', '#3', '#4']
for rank, title in zip(ranks, titles):
print(f"{rank} {title}")
# Custom labels
labels = ['Gold', 'Silver', 'Bronze', 'Honorable Mention']
for label, title in zip(labels, titles):
print(f"{label}: {title}")titles = ['Squid Game', 'Ozark', 'Roma']
ratings = [8.0, 8.4, 7.7]
for i, (title, rating) in enumerate(zip(titles, ratings), start=1):
print(f"Rank {i}: {title} — {rating}")
# Rank 1: Squid Game — 8.0
# Rank 2: Ozark — 8.4
# Rank 3: Roma — 7.7import pandas as pd
titles = ['Squid Game', 'Ozark', 'Roma', 'Bird Box']
genres = ['Thriller', 'Crime', 'Drama', 'Sci-Fi']
scores = [8.0, 8.4, 7.7, 6.6]
years = [2021, 2017, 2018, 2018]
df = pd.DataFrame(zip(titles, genres, scores, years),
columns=['title', 'genre', 'score', 'year'])Result:
| title | genre | score | year |
|---|---|---|---|
| Squid Game | Thriller | 8.0 | 2021 |
| Ozark | Crime | 8.4 | 2017 |
| Roma | Drama | 7.7 | 2018 |
| Bird Box | Sci-Fi | 6.6 | 2018 |
pairs = [('Squid Game', 8.0), ('Ozark', 8.4), ('Roma', 7.7)]
# Unzip using * (splat) operator
titles, scores = zip(*pairs)
print(titles) # → ('Squid Game', 'Ozark', 'Roma')
print(scores) # → (8.0, 8.4, 7.7)
# Convert to lists
titles = list(titles)
scores = list(scores)df = pd.DataFrame({'title': ['Squid Game', 'Ozark', 'Roma']})
new_cols = ['genre', 'score', 'year']
new_data = [
['Thriller', 'Crime', 'Drama'], # genres
[8.0, 8.4, 7.7], # scores
[2021, 2017, 2018] # years
]
for col, data in zip(new_cols, new_data):
df[col] = dataimport numpy as np
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
for x, y in zip(a, b):
print(x, y)
# 1 4
# 2 5
# 3 6
# ⚠️ Prefer vectorized operations over zip for large NumPy arrays
result_zip = [x + y for x, y in zip(a, b)] # slow — Python loop
result_vec = a + b # fast — vectorizeddf = pd.DataFrame({
'title': ['Squid Game', 'Ozark', 'Roma'],
'score': [8.0, 8.4, 7.7],
'year': [2021, 2017, 2018]
})
# Iterate two columns together
for title, score in zip(df['title'], df['score']):
print(f"{title}: {score}")
# Build a new column combining two existing columns
df['label'] = [f"{t} ({y})" for t, y in zip(df['title'], df['year'])]
# → ['Squid Game (2021)', 'Ozark (2017)', 'Roma (2018)']# ✅ Iterating two lists in lockstep
for name, score in zip(names, scores): ...
# ✅ Building dicts from parallel lists
dict(zip(keys, values))
# ✅ Comparing lists element by element
[a == b for a, b in zip(list1, list2)]
# ✅ Transposing a matrix of lists
list(zip(*matrix))
# ✅ Sliding window / consecutive pairs
zip(data, data[1:])
# ✅ Creating a DataFrame from separate lists
pd.DataFrame(zip(col1, col2, col3), columns=[...])
# ✅ Unzipping list of tuples
col1, col2 = zip(*list_of_pairs)# ❌ Large NumPy arrays — use vectorized operations
[x * y for x, y in zip(a, b)] # slow
a * b # fast ✅
# ❌ Large pandas columns — use .apply() or vectorized ops
[f(x, y) for x, y in zip(df['a'], df['b'])] # slow
df['a'] + df['b'] # fast ✅
df.apply(lambda row: ..., axis=1) # fast ✅
# ❌ When lengths may differ and you need all elements
list(zip(long, short)) # silently drops elements
list(zip_longest(long, short)) # ✅ use this instead
# ❌ You only need one list — use a regular for loop or enumerate
for i, name in enumerate(names): ... # cleaner than zip(range(...), names)| Goal | Pattern |
|---|---|
| Iterate two lists together | for a, b in zip(list1, list2) |
| Three or more lists | for a, b, c in zip(l1, l2, l3) |
| Dict from two lists | dict(zip(keys, values)) |
| Rows of data → dicts | [dict(zip(headers, row)) for row in rows] |
| Compare element-wise | [a == b for a, b in zip(l1, l2)] |
| Consecutive pairs | zip(data, data[1:]) |
| Transpose matrix | list(zip(*matrix)) |
| Unzip pairs | col1, col2 = zip(*pairs) |
| With index | enumerate(zip(l1, l2)) |
| Handle unequal lengths | zip_longest(l1, l2, fillvalue=...) |
| DataFrame from lists | pd.DataFrame(zip(l1,l2,l3), columns=[...]) |
zip() → pairs up elements by position across multiple iterables
returns an ITERATOR — wrap in list() to see values
stops at the SHORTEST iterable — silent truncation
zip(*iterable) → the UNZIP pattern
pairs → separate lists (transpose)
When zip shines:
Parallel lists that belong together conceptually
Building dicts on the fly
Comparing or combining two sequences
Sliding windows with zip(data, data[1:])
When to skip zip:
NumPy arrays → use vectorized math instead
Pandas columns → use vectorized ops or apply()
Unequal lengths → use zip_longest()
One list only → use enumerate() or plain for loop
zip is lazy — it produces tuples one at a time.
Wrap in list() to materialize, or use directly in for loops.