Skip to content

Instantly share code, notes, and snippets.

@LxYuan0420
Created December 16, 2025 10:54
Show Gist options
  • Select an option

  • Save LxYuan0420/1cfe75637dbb811c6b88fef605ac88e4 to your computer and use it in GitHub Desktop.

Select an option

Save LxYuan0420/1cfe75637dbb811c6b88fef605ac88e4 to your computer and use it in GitHub Desktop.
Postgres bigint PK vs UUIDv4 PK: locality + buffers demo (psql script)
-- Compare bigint PK vs UUIDv4 PK behavior in Postgres.
--
-- What this script tries to show (toy but reproducible):
-- 1) UUIDv4 PK indexes are bigger (uuid is 16 bytes vs bigint 8 bytes).
-- 2) UUIDv4 insertion order doesn't correlate with PK order, so "keyset pagination"
-- style access patterns touch far more heap pages (worse locality => more buffers).
--
-- It creates two tables with identical payloads:
-- - bigint_pk: sequential IDENTITY primary key
-- - uuidv4_pk: random gen_random_uuid() primary key
--
-- Then it prints:
-- - table + PK index sizes
-- - pg_stats correlation for id (1.0 = strongly ordered on disk, ~0 = random)
-- - EXPLAIN (ANALYZE, BUFFERS) for a few read/write patterns
--
-- Note on "Index Only Scan" + Heap Fetches:
-- - If the visibility map isn't set, Postgres still has to visit the heap to check
-- tuple visibility, so you may see Heap Fetches even for an index-only plan.
-- - Optional: pass `-v vacuum=1` to VACUUM (ANALYZE) first (often reduces Heap Fetches).
--
-- Run this in a scratch database:
-- $ createdb uuid_pk_bench
-- $ psql -d uuid_pk_bench -f scripts/uuid_pk_bench.sql
--
-- Optional knobs:
-- $ psql -d uuid_pk_bench -f scripts/uuid_pk_bench.sql -v nrows=200000 -v limit_rows=20000
--
-- Output:
-- - table + index sizes
-- - pg_stats correlation (how well key order matches physical order)
-- - EXPLAIN (ANALYZE, BUFFERS) for a few queries
\set ON_ERROR_STOP on
\timing on
\pset pager off
\if :{?nrows}
\else
\set nrows 200000
\endif
\if :{?limit_rows}
\else
\set limit_rows 20000
\endif
\echo 'nrows =' :nrows
\echo 'limit_rows =' :limit_rows
\echo ''
\echo 'This measures: index size, correlation, and buffers for ORDER BY id LIMIT patterns.'
\echo 'Tip: add `-v vacuum=1` to VACUUM first (often reduces Heap Fetches for Index Only Scan).'
\echo ''
CREATE EXTENSION IF NOT EXISTS pgcrypto;
DROP SCHEMA IF EXISTS uuid_pk_bench CASCADE;
CREATE SCHEMA uuid_pk_bench;
SET search_path TO uuid_pk_bench;
CREATE TABLE bigint_pk (
id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
payload text NOT NULL
);
CREATE TABLE uuidv4_pk (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
payload text NOT NULL
);
\echo '== Insert bigint_pk =='
INSERT INTO bigint_pk (payload)
SELECT repeat('x', 100)
FROM generate_series(1, :nrows);
\echo ''
\echo '== Insert uuidv4_pk =='
INSERT INTO uuidv4_pk (payload)
SELECT repeat('x', 100)
FROM generate_series(1, :nrows);
ANALYZE bigint_pk;
ANALYZE uuidv4_pk;
\if :{?vacuum}
\echo ''
\echo '== VACUUM (ANALYZE) to help index-only scans avoid heap visibility checks =='
VACUUM (ANALYZE) bigint_pk;
VACUUM (ANALYZE) uuidv4_pk;
\endif
\echo ''
\echo '== Sizes (table + pkey index) =='
SELECT
c.relname,
pg_size_pretty(pg_relation_size(c.oid)) AS size
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE n.nspname = 'uuid_pk_bench'
AND c.relname IN (
'bigint_pk',
'uuidv4_pk',
'bigint_pk_pkey',
'uuidv4_pk_pkey'
)
ORDER BY pg_relation_size(c.oid) DESC;
\echo ''
\echo '== Correlation (how well physical order matches key order) =='
SELECT
tablename,
attname,
correlation
FROM pg_stats
WHERE schemaname = 'uuid_pk_bench'
AND tablename IN ('bigint_pk', 'uuidv4_pk')
AND attname = 'id';
\echo ''
\echo '== Index-only scan (id only) =='
\echo '-- bigint_pk'
EXPLAIN (ANALYZE, BUFFERS)
SELECT id
FROM bigint_pk
ORDER BY id
LIMIT :limit_rows;
\echo ''
\echo '-- uuidv4_pk'
EXPLAIN (ANALYZE, BUFFERS)
SELECT id
FROM uuidv4_pk
ORDER BY id
LIMIT :limit_rows;
\echo ''
\echo '== Index scan + heap fetch (payload) =='
\echo '-- bigint_pk'
EXPLAIN (ANALYZE, BUFFERS)
SELECT payload
FROM bigint_pk
ORDER BY id
LIMIT :limit_rows;
\echo ''
\echo '-- uuidv4_pk'
EXPLAIN (ANALYZE, BUFFERS)
SELECT payload
FROM uuidv4_pk
ORDER BY id
LIMIT :limit_rows;
\echo ''
\echo '== Update the same LIMIT set (forces heap writes) =='
\echo '-- bigint_pk'
EXPLAIN (ANALYZE, BUFFERS)
WITH target AS (
SELECT id
FROM bigint_pk
ORDER BY id
LIMIT :limit_rows
)
UPDATE bigint_pk t
SET payload = md5(t.payload)
FROM target
WHERE t.id = target.id;
\echo ''
\echo '-- uuidv4_pk'
EXPLAIN (ANALYZE, BUFFERS)
WITH target AS (
SELECT id
FROM uuidv4_pk
ORDER BY id
LIMIT :limit_rows
)
UPDATE uuidv4_pk t
SET payload = md5(t.payload)
FROM target
WHERE t.id = target.id;
\echo ''
\echo '== Done =='
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment