Skip to content

Instantly share code, notes, and snippets.

@tdfirth
Last active April 8, 2024 03:27
Show Gist options
  • Select an option

  • Save tdfirth/49e56c1043f8a4ed1d931e7225a30f62 to your computer and use it in GitHub Desktop.

Select an option

Save tdfirth/49e56c1043f8a4ed1d931e7225a30f62 to your computer and use it in GitHub Desktop.

Revisions

  1. tdfirth revised this gist Feb 22, 2024. 1 changed file with 38 additions and 0 deletions.
    38 changes: 38 additions & 0 deletions 8_asof_plan.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,38 @@
    ┌─────────────────────────────┐
    │┌───────────────────────────┐│
    ││ Physical Plan ││
    │└───────────────────────────┘│
    └─────────────────────────────┘
    ┌───────────────────────────┐
    │ ORDER_BY │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ORDERS: │
    │ o."timestamp" DESC │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id │
    │ order_id │
    │ post_id │
    │ order_time │
    │ post_view_time │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ ASOF_JOIN │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ INNER │
    │ user_id = user_id ├──────────────┐
    │ timestamp >= timestamp │ │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
    │ EC: 23 │ │
    └─────────────┬─────────────┘ │
    ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
    │READ_CSV_AUTO (MULTI-T... ││READ_CSV_AUTO (MULTI-T... │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id ││ user_id │
    │ timestamp ││ timestamp │
    │ id ││ id │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 7 ││ EC: 23 │
    └───────────────────────────┘└───────────────────────────┘
  2. tdfirth revised this gist Feb 22, 2024. 1 changed file with 63 additions and 0 deletions.
    63 changes: 63 additions & 0 deletions 7_window_plan.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,63 @@
    ┌─────────────────────────────┐
    │┌───────────────────────────┐│
    ││ Physical Plan ││
    │└───────────────────────────┘│
    └─────────────────────────────┘
    ┌───────────────────────────┐
    │ ORDER_BY │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ORDERS: │
    │ candidates.order_time DESC│
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id │
    │ order_id │
    │ post_id │
    │ order_time │
    │ post_view_time │
    │ rn │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ #0 │
    │ #1 │
    │ #2 │
    │ #4 │
    │ #5 │
    │ #6 │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ FILTER │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ (rn = 1) │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 23 │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ WINDOW │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ROW_NUMBER() OVER │
    │(PARTITION BY id ORDER... │
    │ DESC NULLS LAST) │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ HASH_JOIN │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ INNER │
    │ user_id = user_id ├──────────────┐
    │ timestamp <= timestamp │ │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
    │ EC: 23 │ │
    └─────────────┬─────────────┘ │
    ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
    │READ_CSV_AUTO (MULTI-T... ││READ_CSV_AUTO (MULTI-T... │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id ││ user_id │
    │ timestamp ││ timestamp │
    │ id ││ id │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 23 ││ EC: 7 │
    └───────────────────────────┘└───────────────────────────┘
  3. tdfirth revised this gist Feb 22, 2024. 2 changed files with 0 additions and 101 deletions.
    38 changes: 0 additions & 38 deletions 7_asof_plan.md
    Original file line number Diff line number Diff line change
    @@ -1,38 +0,0 @@
    ┌─────────────────────────────┐
    │┌───────────────────────────┐│
    ││ Physical Plan ││
    │└───────────────────────────┘│
    └─────────────────────────────┘
    ┌───────────────────────────┐
    │ ORDER_BY │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ORDERS: │
    │ o."timestamp" DESC │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id │
    │ order_id │
    │ post_id │
    │ order_time │
    │ post_view_time │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ ASOF_JOIN │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ INNER │
    │ user_id = user_id ├──────────────┐
    │ timestamp >= timestamp │ │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
    │ EC: 23 │ │
    └─────────────┬─────────────┘ │
    ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
    │READ_CSV_AUTO (MULTI-T... ││READ_CSV_AUTO (MULTI-T... │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id ││ user_id │
    │ timestamp ││ timestamp │
    │ id ││ id │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 7 ││ EC: 23 │
    └───────────────────────────┘└───────────────────────────┘
    63 changes: 0 additions & 63 deletions 8_window_plan.md
    Original file line number Diff line number Diff line change
    @@ -1,63 +0,0 @@
    ┌─────────────────────────────┐
    │┌───────────────────────────┐│
    ││ Physical Plan ││
    │└───────────────────────────┘│
    └─────────────────────────────┘
    ┌───────────────────────────┐
    │ ORDER_BY │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ORDERS: │
    │ candidates.order_time DESC│
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id │
    │ order_id │
    │ post_id │
    │ order_time │
    │ post_view_time │
    │ rn │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    #0
    #1
    #2
    #4
    #5
    #6
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ FILTER │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ (rn = 1) │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 23 │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ WINDOW │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ROW_NUMBER() OVER │
    │(PARTITION BY id ORDER... │
    │ DESC NULLS LAST) │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ HASH_JOIN │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ INNER │
    │ user_id = user_id ├──────────────┐
    │ timestamp <= timestamp │ │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
    │ EC: 23 │ │
    └─────────────┬─────────────┘ │
    ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
    │READ_CSV_AUTO (MULTI-T... ││READ_CSV_AUTO (MULTI-T... │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id ││ user_id │
    │ timestamp ││ timestamp │
    │ id ││ id │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 23 ││ EC: 7 │
    └───────────────────────────┘└───────────────────────────┘
  4. tdfirth revised this gist Feb 22, 2024. 2 changed files with 101 additions and 0 deletions.
    38 changes: 38 additions & 0 deletions 7_asof_plan.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,38 @@
    ┌─────────────────────────────┐
    │┌───────────────────────────┐│
    ││ Physical Plan ││
    │└───────────────────────────┘│
    └─────────────────────────────┘
    ┌───────────────────────────┐
    │ ORDER_BY │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ORDERS: │
    │ o."timestamp" DESC │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id │
    │ order_id │
    │ post_id │
    │ order_time │
    │ post_view_time │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ ASOF_JOIN │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ INNER │
    │ user_id = user_id ├──────────────┐
    │ timestamp >= timestamp │ │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
    │ EC: 23 │ │
    └─────────────┬─────────────┘ │
    ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
    │READ_CSV_AUTO (MULTI-T... ││READ_CSV_AUTO (MULTI-T... │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id ││ user_id │
    │ timestamp ││ timestamp │
    │ id ││ id │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 7 ││ EC: 23 │
    └───────────────────────────┘└───────────────────────────┘
    63 changes: 63 additions & 0 deletions 8_window_plan.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,63 @@
    ┌─────────────────────────────┐
    │┌───────────────────────────┐│
    ││ Physical Plan ││
    │└───────────────────────────┘│
    └─────────────────────────────┘
    ┌───────────────────────────┐
    │ ORDER_BY │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ORDERS: │
    │ candidates.order_time DESC│
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id │
    │ order_id │
    │ post_id │
    │ order_time │
    │ post_view_time │
    │ rn │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ PROJECTION │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    #0
    #1
    #2
    #4
    #5
    #6
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ FILTER │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ (rn = 1) │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 23 │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ WINDOW │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ ROW_NUMBER() OVER │
    │(PARTITION BY id ORDER... │
    │ DESC NULLS LAST) │
    └─────────────┬─────────────┘
    ┌─────────────┴─────────────┐
    │ HASH_JOIN │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ INNER │
    │ user_id = user_id ├──────────────┐
    │ timestamp <= timestamp │ │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
    │ EC: 23 │ │
    └─────────────┬─────────────┘ │
    ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
    │READ_CSV_AUTO (MULTI-T... ││READ_CSV_AUTO (MULTI-T... │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ user_id ││ user_id │
    │ timestamp ││ timestamp │
    │ id ││ id │
    │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
    │ EC: 23 ││ EC: 7 │
    └───────────────────────────┘└───────────────────────────┘
  5. tdfirth renamed this gist Feb 22, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  6. tdfirth renamed this gist Feb 22, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  7. tdfirth revised this gist Feb 22, 2024. 2 changed files with 1 addition and 5 deletions.
    1 change: 1 addition & 0 deletions Asof Queries in SQL
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    How to think about asof queries.
    5 changes: 0 additions & 5 deletions Asof SQL queries
    Original file line number Diff line number Diff line change
    @@ -1,5 +0,0 @@
    # Asof SQL queries

    I've seen a few people get confused about asof queries recently, and they're starting to land in a couple of mainstream platforms! Snowflake added support very recently (it's still in closed beta) and duckdb added them a little while back.

    They've been around for a while though, the first time I encountered them was with a system called q/kdb+, which is commonly used in hedge funds and other financial organizations. Matching up data on timestamps is extremely common in that domain, so it's no surprise that tooling there has made this first class for some time.
  8. tdfirth revised this gist Feb 22, 2024. 7 changed files with 5 additions and 0 deletions.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
    5 changes: 5 additions & 0 deletions Asof SQL queries
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,5 @@
    # Asof SQL queries

    I've seen a few people get confused about asof queries recently, and they're starting to land in a couple of mainstream platforms! Snowflake added support very recently (it's still in closed beta) and duckdb added them a little while back.

    They've been around for a while though, the first time I encountered them was with a system called q/kdb+, which is commonly used in hedge funds and other financial organizations. Matching up data on timestamps is extremely common in that domain, so it's no surprise that tooling there has made this first class for some time.
  9. tdfirth renamed this gist Feb 22, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  10. tdfirth revised this gist Feb 22, 2024. 6 changed files with 0 additions and 0 deletions.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
  11. tdfirth created this gist Feb 22, 2024.
    7 changes: 7 additions & 0 deletions asof_output.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,7 @@
    user_id,order_id,post_id,order_time,post_view_time
    1,1,2,2024-02-21,2024-02-20
    2,3,7,2024-02-20,2024-02-20
    3,4,13,2024-02-19,2024-02-18
    4,6,18,2024-02-18,2024-02-18
    1,2,4,2024-02-17,2024-02-16
    3,5,15,2024-02-15,2024-02-14
    11 changes: 11 additions & 0 deletions asof_query.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,11 @@
    select
    o.user_id as user_id,
    o.id as order_id,
    p.id as post_id,
    o.timestamp as order_time,
    p.timestamp as post_view_time
    from "orders.csv" o
    asof join "post_views.csv" p
    on o.user_id = p.user_id
    and o.timestamp >= p.timestamp
    order by order_time desc;
    7 changes: 7 additions & 0 deletions orders.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,7 @@
    id,user_id,timestamp
    1,1,2/21/2024
    2,1,2/17/2024
    3,2,2/20/2024
    4,3,2/19/2024
    5,3,2/15/2024
    6,4,2/18/2024
    26 changes: 26 additions & 0 deletions post_views.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,26 @@
    id,user_id,post,timestamp
    1,1,1,2/22/2024
    2,1,2,2/20/2024
    3,1,3,2/18/2024
    4,1,4,2/16/2024
    5,1,5,2/14/2024
    6,2,1,2/22/2024
    7,2,2,2/20/2024
    8,2,3,2/18/2024
    9,2,4,2/16/2024
    10,2,5,2/14/2024
    11,3,1,2/22/2024
    12,3,2,2/20/2024
    13,3,3,2/18/2024
    14,3,4,2/16/2024
    15,3,5,2/14/2024
    16,4,1,2/22/2024
    17,4,2,2/20/2024
    18,4,3,2/18/2024
    19,4,4,2/16/2024
    20,4,5,2/14/2024
    21,5,1,2/22/2024
    22,5,2,2/20/2024
    23,5,3,2/18/2024
    24,5,4,2/16/2024
    25,5,5,2/14/2024
    15 changes: 15 additions & 0 deletions window.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,15 @@
    with candidates as (
    select
    o.user_id as user_id,
    o.id as order_id,
    p.id as post_id,
    o.timestamp as order_time,
    p.timestamp as post_view_time,
    row_number() over (partition by o.id order by p.timestamp desc) rn
    from "orders.csv" o
    inner join "post_views.csv" p
    on o.user_id = p.user_id and o.timestamp >= p.timestamp
    )
    select * from candidates
    where rn = 1
    order by order_time desc;
    7 changes: 7 additions & 0 deletions window_output.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,7 @@
    user_id,order_id,post_id,order_time,post_view_time,rn
    1,1,2,2024-02-21,2024-02-20,1
    2,3,7,2024-02-20,2024-02-20,1
    3,4,13,2024-02-19,2024-02-18,1
    4,6,18,2024-02-18,2024-02-18,1
    1,2,4,2024-02-17,2024-02-16,1
    3,5,15,2024-02-15,2024-02-14,1