-- This SQL snippet attributes an interest to traffic source and campaign. -- Any interest event is attributed to previous traffic_source event. -- -- The approach used here, uses 'lag() ignore nulls'. This is supported -- on Redshift, but not in PostgreSQL. This approach yields a shorter, -- more readable, and likely faster query, but it is less portable. with -- Merge traffic_sources and interest events in one event pool. -- * pad with 'null' every field that is not common between these two events -- * add a new field 'type' with event type: interest or traffic_source. merged as ( select 'traffic_source' as type, sent_at, user_id, name as traffic_source_name, campaign, null as interest_id, null as state, null as action, null as contract, null as source_app from web.traffic_source union select 'interest' as type, sent_at, user_id, null as traffic_source_name, null as campaign, interest_id, state, action, contract, source_app from api.interest where source_app in ('TabletApartmentList', 'ApartmentList', 'MobileApartmentList') and sent_at > '2015-06-20' -- traffic sources where not recorded before 06/15, giving couple days buffer ), -- Copy traffic_source campaign and name to all subsequent events. attributed as ( select *, coalesce( campaign, lag(campaign) ignore nulls over ( partition by user_id order by sent_at ) ) as interest_campaign, coalesce( traffic_source_name, lag(traffic_source_name) ignore nulls over ( partition by user_id order by sent_at ) ) as interest_traffic_source from merged ) -- Filter out traffic_source events, and leave only interests select user_id, sent_at, interest_id, action, state, contract, source_app, interest_traffic_source as traffic_source, interest_campaign as campaign from attributed where type = 'interest' ;