import numpy as np
arr = np.array([1, 2, 3, 4, 5])
arr_2d = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
floats = np.array([1.5, 2.7, 3.2, 4.8, 5.1])
mixed = np.array([3, 1, 4, 1, 5, 9, 2, 6, 5, 3])np.sum(arr) # → 15
arr.sum() # → 15 (method form — same result)
# Sum of floats
np.sum(floats) # → 17.3
# Sum of 2D array — all elements
np.sum(arr_2d) # → 45# axis=0 → sum DOWN each column (collapse rows)
np.sum(arr_2d, axis=0) # → [12, 15, 18]
# col 0: 1+4+7=12
# col 1: 2+5+8=15
# col 2: 3+6+9=18
# axis=1 → sum ACROSS each row (collapse columns)
np.sum(arr_2d, axis=1) # → [6, 15, 24]
# row 0: 1+2+3=6
# row 1: 4+5+6=15
# row 2: 7+8+9=24Axis memory trick:
axis=0 → collapse ROWS → result has shape of a single row
axis=1 → collapse COLUMNS → result has shape of a single column
np.cumsum(arr) # → [1, 3, 6, 10, 15]
arr.cumsum() # → [1, 3, 6, 10, 15]
# Running total across rows of 2D array
np.cumsum(arr_2d, axis=1)
# [[1, 3, 6],
# [4, 9, 15],
# [7, 15, 24]]np.mean(arr) # → 3.0
arr.mean() # → 3.0
np.mean(floats) # → 3.46
# Mean of entire 2D array
np.mean(arr_2d) # → 5.0
# Mean per column
np.mean(arr_2d, axis=0) # → [4., 5., 6.]
# Mean per row
np.mean(arr_2d, axis=1) # → [2., 5., 8.]scores = np.array([85, 90, 78, 92])
weights = np.array([0.2, 0.3, 0.2, 0.3]) # must sum to 1.0
np.average(scores, weights=weights) # → 86.9
# Without weights — same as np.mean
np.average(scores) # → 86.25
np.mean()treats all values equally.np.average()allows weighted importance per element.
data = np.array([2, 4, 6, 8, 10, 12])
window = 3
moving_avg = np.convolve(data, np.ones(window) / window, mode='valid')
# → [4., 6., 8., 10.] (3-element rolling average)np.prod(arr) # → 120 (1×2×3×4×5)
arr.prod() # → 120
np.prod(floats) # → 1.5 × 2.7 × 3.2 × 4.8 × 5.1 → 80.62...
# Product along axis
np.prod(arr_2d, axis=0) # → [28, 80, 162] (product down each column)
np.prod(arr_2d, axis=1) # → [6, 120, 504] (product across each row)np.cumprod(arr) # → [1, 2, 6, 24, 120]
arr.cumprod() # → [1, 2, 6, 24, 120]np.min(arr) # → 1
np.max(arr) # → 5
arr.min() # → 1
arr.max() # → 5
# Per column
np.min(arr_2d, axis=0) # → [1, 2, 3]
np.max(arr_2d, axis=0) # → [7, 8, 9]
# Per row
np.min(arr_2d, axis=1) # → [1, 4, 7]
np.max(arr_2d, axis=1) # → [3, 6, 9]np.argmin(arr) # → 0 (index of value 1)
np.argmax(arr) # → 4 (index of value 5)
np.argmin(mixed) # → 1 (first occurrence of min)
np.argmax(mixed) # → 5 (index of value 9)
# Per column in 2D
np.argmax(arr_2d, axis=0) # → [2, 2, 2] (row index of max per column)
np.argmin(arr_2d, axis=1) # → [0, 0, 0] (col index of min per row)np.ptp(arr) # → 4 (max - min = 5 - 1)
np.ptp(mixed) # → 8 (9 - 1)
# Per axis
np.ptp(arr_2d, axis=0) # → [6, 6, 6] (range per column)
np.ptp(arr_2d, axis=1) # → [2, 2, 2] (range per row)vals = np.array([1.2, 2.5, 3.7, -1.5, -2.8])
np.round(vals) # → [ 1., 2., 4., -2., -3.] banker's rounding
np.round(vals, 1) # → [ 1.2, 2.5, 3.7, -1.5, -2.8]
np.round(floats, 1) # → [1.5, 2.7, 3.2, 4.8, 5.1]
np.floor(vals) # → [ 1., 2., 3., -2., -3.] always round DOWN
np.ceil(vals) # → [ 2., 3., 4., -1., -2.] always round UP
np.trunc(vals) # → [ 1., 2., 3., -1., -2.] always toward zeroround → nearest (ties go to even — "banker's rounding")
floor → always DOWN (-1.2 → -2)
ceil → always UP (-1.2 → -1)
trunc → toward zero (-1.8 → -1)
np.median(arr) # → 3.0
np.median(mixed) # → 4.0
# Median per column
np.median(arr_2d, axis=0) # → [4., 5., 6.]np.std(arr) # → 1.4142... population std dev
np.var(arr) # → 2.0 population variance
# Sample std dev (ddof=1 for Bessel's correction)
np.std(arr, ddof=1) # → 1.5811...
np.var(arr, ddof=1) # → 2.5
# Per column
np.std(arr_2d, axis=0) # → [2.449, 2.449, 2.449]
np.std(arr_2d, axis=1) # → [0.816, 0.816, 0.816]ddof=0 → population std dev (default — use when you have all data)
ddof=1 → sample std dev (use when data is a sample of a population)
np.percentile(arr, 50) # → 3.0 (median)
np.percentile(arr, 25) # → 2.0 (Q1)
np.percentile(arr, 75) # → 4.0 (Q3)
np.percentile(arr, [25, 50, 75]) # → [2., 3., 4.]
# IQR — interquartile range
q75, q25 = np.percentile(mixed, [75, 25])
iqr = q75 - q25 # → 4.25
# quantile — same as percentile but uses 0–1 scale
np.quantile(arr, 0.5) # → 3.0
np.quantile(arr, [0.25, 0.75]) # → [2., 4.]a = np.array([1, 2, 3, 4, 5])
b = np.array([2, 4, 5, 4, 5])
# Correlation coefficient matrix
np.corrcoef(a, b)
# [[1. , 0.93]
# [0.93, 1. ]]
# Extract the single correlation value
np.corrcoef(a, b)[0, 1] # → 0.9338...
# Covariance matrix
np.cov(a, b)
# [[2.5 , 2. ]
# [2. , 1.7 ]]neg = np.array([-3, -1, 0, 2, -5])
np.abs(neg) # → [3, 1, 0, 2, 5]
np.absolute(neg) # → same
# Distance from mean
np.abs(arr - np.mean(arr)) # → [2., 1., 0., 1., 2.]np.sqrt(arr) # → [1., 1.414, 1.732, 2., 2.236]
np.square(arr) # → [ 1, 4, 9, 16, 25]
np.power(arr, 3) # → [ 1, 8, 27, 64, 125]
np.exp(arr) # → [ 2.71, 7.38, 20.08, 54.6, 148.4]
np.log(arr) # → [0., 0.693, 1.098, 1.386, 1.609] natural log
np.log2(arr) # → [0., 1., 1.584, 2., 2.321]
np.log10(arr) # → [0., 0.301, 0.477, 0.602, 0.698]scores = np.array([45, 72, 88, 101, -5, 95])
np.clip(scores, 0, 100) # → [ 45, 72, 88, 100, 0, 95]
# values below 0 → 0, values above 100 → 100
# Cap GPA between 0.0 and 4.0
gpa = np.array([3.9, 4.2, -0.1, 3.5])
np.clip(gpa, 0.0, 4.0) # → [3.9, 4.0, 0.0, 3.5]vals = np.array([-5, -1, 0, 3, 8])
np.sign(vals) # → [-1, -1, 0, 1, 1]np.unique(mixed) # → [1, 2, 3, 4, 5, 6, 9]
# With counts
values, counts = np.unique(mixed, return_counts=True)
# values → [1, 2, 3, 4, 5, 6, 9]
# counts → [2, 1, 2, 1, 2, 1, 1]
# With sorted indices
values, indices = np.unique(mixed, return_index=True)data_with_nan = np.array([1.0, 2.0, np.nan, 4.0, 5.0])
np.sum(data_with_nan) # → nan (NaN contaminates result)
np.nansum(data_with_nan) # → 12.0 (ignores NaN)
np.mean(data_with_nan) # → nan
np.nanmean(data_with_nan) # → 3.0
np.min(data_with_nan) # → nan
np.nanmin(data_with_nan) # → 1.0
np.nanmax(data_with_nan) # → 5.0
np.nanstd(data_with_nan) # → 1.479...
np.nanmedian(data_with_nan) # → 3.0Always use
nan*variants when your data might contain NaN values. One NaN poisons the entire result without them.
| Function | Purpose | Example |
|---|---|---|
np.sum(a) |
Total of all elements | np.sum([1,2,3]) → 6 |
np.cumsum(a) |
Running total | [1,3,6,...] |
np.mean(a) |
Average | np.mean([1,2,3]) → 2.0 |
np.average(a, weights=w) |
Weighted average | scores × weights |
np.prod(a) |
Product of all elements | np.prod([1,2,3]) → 6 |
np.cumprod(a) |
Running product | [1,2,6,24,...] |
np.min(a) |
Smallest value | |
np.max(a) |
Largest value | |
np.argmin(a) |
Index of smallest | |
np.argmax(a) |
Index of largest | |
np.ptp(a) |
Range (max − min) | |
np.median(a) |
Middle value | |
np.std(a) |
Standard deviation | ddof=1 for sample |
np.var(a) |
Variance | ddof=1 for sample |
np.percentile(a, q) |
Percentile (0–100 scale) | |
np.quantile(a, q) |
Quantile (0–1 scale) | |
np.round(a, n) |
Round to n decimals | |
np.floor(a) |
Round down | |
np.ceil(a) |
Round up | |
np.trunc(a) |
Truncate toward zero | |
np.abs(a) |
Absolute value | |
np.sqrt(a) |
Square root | |
np.power(a, n) |
Raise to power n | |
np.clip(a, lo, hi) |
Cap values in range | |
np.sign(a) |
-1, 0, or +1 | |
np.unique(a) |
Unique values | |
np.nansum(a) |
Sum ignoring NaN | |
np.nanmean(a) |
Mean ignoring NaN |
arr_2d = [[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]
axis=0 → operate DOWN columns → result is one row [12, 15, 18]
axis=1 → operate ACROSS rows → result is one column [6, 15, 24]
None → flatten everything → one scalar 45
Think of axis as "which dimension gets collapsed":
axis=0 collapses the row dimension (rows disappear)
axis=1 collapses the column dimension (columns disappear)
NaN safety:
np.sum → contaminated by NaN → use np.nansum
np.mean → contaminated by NaN → use np.nanmean
Always use nan* variants when data may have missing values
ddof for std/var:
ddof=0 → population (you have ALL the data)
ddof=1 → sample (your data is a subset)