Created
December 16, 2025 10:54
-
-
Save LxYuan0420/1cfe75637dbb811c6b88fef605ac88e4 to your computer and use it in GitHub Desktop.
Postgres bigint PK vs UUIDv4 PK: locality + buffers demo (psql script)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Compare bigint PK vs UUIDv4 PK behavior in Postgres. | |
| -- | |
| -- What this script tries to show (toy but reproducible): | |
| -- 1) UUIDv4 PK indexes are bigger (uuid is 16 bytes vs bigint 8 bytes). | |
| -- 2) UUIDv4 insertion order doesn't correlate with PK order, so "keyset pagination" | |
| -- style access patterns touch far more heap pages (worse locality => more buffers). | |
| -- | |
| -- It creates two tables with identical payloads: | |
| -- - bigint_pk: sequential IDENTITY primary key | |
| -- - uuidv4_pk: random gen_random_uuid() primary key | |
| -- | |
| -- Then it prints: | |
| -- - table + PK index sizes | |
| -- - pg_stats correlation for id (1.0 = strongly ordered on disk, ~0 = random) | |
| -- - EXPLAIN (ANALYZE, BUFFERS) for a few read/write patterns | |
| -- | |
| -- Note on "Index Only Scan" + Heap Fetches: | |
| -- - If the visibility map isn't set, Postgres still has to visit the heap to check | |
| -- tuple visibility, so you may see Heap Fetches even for an index-only plan. | |
| -- - Optional: pass `-v vacuum=1` to VACUUM (ANALYZE) first (often reduces Heap Fetches). | |
| -- | |
| -- Run this in a scratch database: | |
| -- $ createdb uuid_pk_bench | |
| -- $ psql -d uuid_pk_bench -f scripts/uuid_pk_bench.sql | |
| -- | |
| -- Optional knobs: | |
| -- $ psql -d uuid_pk_bench -f scripts/uuid_pk_bench.sql -v nrows=200000 -v limit_rows=20000 | |
| -- | |
| -- Output: | |
| -- - table + index sizes | |
| -- - pg_stats correlation (how well key order matches physical order) | |
| -- - EXPLAIN (ANALYZE, BUFFERS) for a few queries | |
| \set ON_ERROR_STOP on | |
| \timing on | |
| \pset pager off | |
| \if :{?nrows} | |
| \else | |
| \set nrows 200000 | |
| \endif | |
| \if :{?limit_rows} | |
| \else | |
| \set limit_rows 20000 | |
| \endif | |
| \echo 'nrows =' :nrows | |
| \echo 'limit_rows =' :limit_rows | |
| \echo '' | |
| \echo 'This measures: index size, correlation, and buffers for ORDER BY id LIMIT patterns.' | |
| \echo 'Tip: add `-v vacuum=1` to VACUUM first (often reduces Heap Fetches for Index Only Scan).' | |
| \echo '' | |
| CREATE EXTENSION IF NOT EXISTS pgcrypto; | |
| DROP SCHEMA IF EXISTS uuid_pk_bench CASCADE; | |
| CREATE SCHEMA uuid_pk_bench; | |
| SET search_path TO uuid_pk_bench; | |
| CREATE TABLE bigint_pk ( | |
| id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, | |
| payload text NOT NULL | |
| ); | |
| CREATE TABLE uuidv4_pk ( | |
| id uuid PRIMARY KEY DEFAULT gen_random_uuid(), | |
| payload text NOT NULL | |
| ); | |
| \echo '== Insert bigint_pk ==' | |
| INSERT INTO bigint_pk (payload) | |
| SELECT repeat('x', 100) | |
| FROM generate_series(1, :nrows); | |
| \echo '' | |
| \echo '== Insert uuidv4_pk ==' | |
| INSERT INTO uuidv4_pk (payload) | |
| SELECT repeat('x', 100) | |
| FROM generate_series(1, :nrows); | |
| ANALYZE bigint_pk; | |
| ANALYZE uuidv4_pk; | |
| \if :{?vacuum} | |
| \echo '' | |
| \echo '== VACUUM (ANALYZE) to help index-only scans avoid heap visibility checks ==' | |
| VACUUM (ANALYZE) bigint_pk; | |
| VACUUM (ANALYZE) uuidv4_pk; | |
| \endif | |
| \echo '' | |
| \echo '== Sizes (table + pkey index) ==' | |
| SELECT | |
| c.relname, | |
| pg_size_pretty(pg_relation_size(c.oid)) AS size | |
| FROM pg_class c | |
| JOIN pg_namespace n ON n.oid = c.relnamespace | |
| WHERE n.nspname = 'uuid_pk_bench' | |
| AND c.relname IN ( | |
| 'bigint_pk', | |
| 'uuidv4_pk', | |
| 'bigint_pk_pkey', | |
| 'uuidv4_pk_pkey' | |
| ) | |
| ORDER BY pg_relation_size(c.oid) DESC; | |
| \echo '' | |
| \echo '== Correlation (how well physical order matches key order) ==' | |
| SELECT | |
| tablename, | |
| attname, | |
| correlation | |
| FROM pg_stats | |
| WHERE schemaname = 'uuid_pk_bench' | |
| AND tablename IN ('bigint_pk', 'uuidv4_pk') | |
| AND attname = 'id'; | |
| \echo '' | |
| \echo '== Index-only scan (id only) ==' | |
| \echo '-- bigint_pk' | |
| EXPLAIN (ANALYZE, BUFFERS) | |
| SELECT id | |
| FROM bigint_pk | |
| ORDER BY id | |
| LIMIT :limit_rows; | |
| \echo '' | |
| \echo '-- uuidv4_pk' | |
| EXPLAIN (ANALYZE, BUFFERS) | |
| SELECT id | |
| FROM uuidv4_pk | |
| ORDER BY id | |
| LIMIT :limit_rows; | |
| \echo '' | |
| \echo '== Index scan + heap fetch (payload) ==' | |
| \echo '-- bigint_pk' | |
| EXPLAIN (ANALYZE, BUFFERS) | |
| SELECT payload | |
| FROM bigint_pk | |
| ORDER BY id | |
| LIMIT :limit_rows; | |
| \echo '' | |
| \echo '-- uuidv4_pk' | |
| EXPLAIN (ANALYZE, BUFFERS) | |
| SELECT payload | |
| FROM uuidv4_pk | |
| ORDER BY id | |
| LIMIT :limit_rows; | |
| \echo '' | |
| \echo '== Update the same LIMIT set (forces heap writes) ==' | |
| \echo '-- bigint_pk' | |
| EXPLAIN (ANALYZE, BUFFERS) | |
| WITH target AS ( | |
| SELECT id | |
| FROM bigint_pk | |
| ORDER BY id | |
| LIMIT :limit_rows | |
| ) | |
| UPDATE bigint_pk t | |
| SET payload = md5(t.payload) | |
| FROM target | |
| WHERE t.id = target.id; | |
| \echo '' | |
| \echo '-- uuidv4_pk' | |
| EXPLAIN (ANALYZE, BUFFERS) | |
| WITH target AS ( | |
| SELECT id | |
| FROM uuidv4_pk | |
| ORDER BY id | |
| LIMIT :limit_rows | |
| ) | |
| UPDATE uuidv4_pk t | |
| SET payload = md5(t.payload) | |
| FROM target | |
| WHERE t.id = target.id; | |
| \echo '' | |
| \echo '== Done ==' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment