So the first part of the problem is to give each thing a grouping cluster, which can be done with CONDITIONAL_TRUE_EVENT
with data(page_name, page_value, page_num, timestamp) as ( select * from values ('subscription_id', '12345', 5, '2020-01-01 13:00'::timestamp), ('cancel', 'no_interest', 6, '2020-01-01 13:05'::timestamp), ('cancel', 'watch_all', 7, '2020-01-01 13:10'::timestamp), ('Survey', 'Clicked', 8, '2020-01-01 13:15'::timestamp), ('Survey', 'Not_clicked', 9, '2020-01-01 13:20'::timestamp), ('subscription_id', '456', 10, '2020-01-01 13:25'::timestamp), ('cancel', 'don''t_like', 11, '2020-01-01 13:30'::timestamp), ('cancel', 'bored', 12, '2020-01-01 13:40'::timestamp), ('subscription_id', '789', 13, '2020-01-01 13:45'::timestamp), ('Survey', 'Not_clicked', 14, '2020-01-01 13:50'::timestamp), ('cancel', 'watch_all', 15, '2020-01-01 13:55'::timestamp), ('Survey', 'Clicked', 16, '2020-01-01 14:15'::timestamp) ) select * ,CONDITIONAL_TRUE_EVENT(page_name='subscription_id')over(order by page_num) as event_grp from data
| PAGE_NAME | PAGE_VALUE | PAGE_NUM | TIMESTAMP | EVENT_GRP |
| subscription_id | 12345 | 5 | 2020-01-01 13:00:00.000 | 1 |
| cancel | no_interest | 6 | 2020-01-01 13:05:00.000 | 1 |
| cancel | watch_all | 7 | 2020-01-01 13:10:00.000 | 1 |
| Survey | Clicked | 8 | 2020-01-01 13:15:00.000 | 1 |
| Survey | Not_clicked | 9 | 2020-01-01 13:20:00.000 | 1 |
| subscription_id | 456 | 10 | 2020-01-01 13:25:00.000 | 2 |
| cancel | don't_like | 11 | 2020-01-01 13:30:00.000 | 2 |
| cancel | bored | 12 | 2020-01-01 13:40:00.000 | 2 |
| subscription_id | 789 | 13 | 2020-01-01 13:45:00.000 | 3 |
| Survey | Not_clicked | 14 | 2020-01-01 13:50:00.000 | 3 |
| cancel | watch_all | 15 | 2020-01-01 13:55:00.000 | 3 |
| Survey | Clicked | 16 | 2020-01-01 14:15:00.000 | 3 |
Then using QUALIFY/ROW_NUMBER on that we can keep the first of each groups... but qualify cannot we used in the same block as a CONDITIONAL_TRUE_EVENT.. so nested it is:
select * from ( select * ,CONDITIONAL_TRUE_EVENT(page_name='subscription_id')over(order by page_num) as event_grp from data ) qualify row_number()over(partition by event_grp, page_name order by timestamp) = 1
| PAGE_NAME | PAGE_VALUE | PAGE_NUM | TIMESTAMP | EVENT_GRP |
| subscription_id | 12345 | 5 | 2020-01-01 13:00:00.000 | 1 |
| cancel | no_interest | 6 | 2020-01-01 13:05:00.000 | 1 |
| Survey | Clicked | 8 | 2020-01-01 13:15:00.000 | 1 |
| subscription_id | 456 | 10 | 2020-01-01 13:25:00.000 | 2 |
| cancel | don't_like | 11 | 2020-01-01 13:30:00.000 | 2 |
| subscription_id | 789 | 13 | 2020-01-01 13:45:00.000 | 3 |
| Survey | Not_clicked | 14 | 2020-01-01 13:50:00.000 | 3 |
| cancel | watch_all | 15 | 2020-01-01 13:55:00.000 | 3 |
Now to do the ORDER of event filter:
with data(session, page_name, page_value, page_num, timestamp) as ( select * from values (1,'subscription_id', '12345', 5, '2020-01-01 13:00'::timestamp), (1,'cancel', 'no_interest', 6, '2020-01-01 13:05'::timestamp), (1,'cancel', 'watch_all', 7, '2020-01-01 13:10'::timestamp), (1,'Survey', 'Clicked', 8, '2020-01-01 13:15'::timestamp), (1,'Survey', 'Not_clicked', 9, '2020-01-01 13:20'::timestamp), (1,'subscription_id', '456', 10, '2020-01-01 13:25'::timestamp), (1,'cancel', 'don''t_like', 11, '2020-01-01 13:30'::timestamp), (1,'cancel', 'bored', 12, '2020-01-01 13:40'::timestamp), (1,'subscription_id', '789', 13, '2020-01-01 13:45'::timestamp), (1,'Survey', 'Not_clicked', 14, '2020-01-01 13:50'::timestamp), (1,'cancel', 'watch_all', 15, '2020-01-01 13:55'::timestamp), (1,'Survey', 'Clicked', 16, '2020-01-01 14:15'::timestamp) ), step_one as ( select * from ( select * ,CONDITIONAL_TRUE_EVENT(page_name='subscription_id')over(partition by session order by page_num) as event_grp from data ) qualify row_number()over(partition by session, event_grp, page_name order by timestamp) = 1 )--, step_two as ( select * ,row_number()over(partition by session, event_grp order by timestamp) as rn from step_one qualify case when page_name='subscription_id' then true when page_name='cancel' and rn = 2 then true when page_name='Survey' then true else false end ;)
gives:
| SESSION | PAGE_NAME | PAGE_VALUE | PAGE_NUM | TIMESTAMP | EVENT_GRP | RN |
| 1 | subscription_id | 12345 | 5 | 2020-01-01 13:00:00.000 | 1 | 1 |
| 1 | cancel | no_interest | 6 | 2020-01-01 13:05:00.000 | 1 | 2 |
| 1 | Survey | Clicked | 8 | 2020-01-01 13:15:00.000 | 1 | 3 |
| 1 | subscription_id | 456 | 10 | 2020-01-01 13:25:00.000 | 2 | 1 |
| 1 | cancel | don't_like | 11 | 2020-01-01 13:30:00.000 | 2 | 2 |
| 1 | subscription_id | 789 | 13 | 2020-01-01 13:45:00.000 | 3 | 1 |
| 1 | Survey | Not_clicked | 14 | 2020-01-01 13:50:00.000 | 3 | 2 |
so now we can unpivot this with MAX and IFF, The iff is used to select the only value we want, and the MAX take the non-null value (which due to the ROW_NUMBER fitlering of the prior step there is only one non-null value per column)
with data(session, page_name, page_value, page_num, timestamp) as ( select * from values (1,'subscription_id', '12345', 5, '2020-01-01 13:00'::timestamp), (1,'cancel', 'no_interest', 6, '2020-01-01 13:05'::timestamp), (1,'cancel', 'watch_all', 7, '2020-01-01 13:10'::timestamp), (1,'Survey', 'Clicked', 8, '2020-01-01 13:15'::timestamp), (1,'Survey', 'Not_clicked', 9, '2020-01-01 13:20'::timestamp), (1,'subscription_id', '456', 10, '2020-01-01 13:25'::timestamp), (1,'cancel', 'don''t_like', 11, '2020-01-01 13:30'::timestamp), (1,'cancel', 'bored', 12, '2020-01-01 13:40'::timestamp), (1,'subscription_id', '789', 13, '2020-01-01 13:45'::timestamp), (1,'Survey', 'Not_clicked', 14, '2020-01-01 13:50'::timestamp), (1,'cancel', 'watch_all', 15, '2020-01-01 13:55'::timestamp), (1,'Survey', 'Clicked', 16, '2020-01-01 14:15'::timestamp) ), step_one as ( select * from ( select * ,CONDITIONAL_TRUE_EVENT(page_name='subscription_id')over(partition by session order by page_num) as event_grp from data ) qualify row_number()over(partition by session, event_grp, page_name order by timestamp) = 1 ), step_two as ( select * ,row_number()over(partition by session, event_grp order by timestamp) as rn from step_one qualify case when page_name='subscription_id' then true when page_name='cancel' and rn = 2 then true when page_name='Survey' then true else false end ) select session ,max(iff(page_name='subscription_id',page_value, null)) as subscription_id ,max(iff(page_name='cancel',page_value, null)) as cancel_reason ,max(iff(page_name='cancel',timestamp, null)) as cancel_timestamp ,max(iff(page_name='Survey',page_value, null)) as Survey_reason ,max(iff(page_name='Survey',timestamp, null)) as Survey_timestamp from step_two group by session, event_grp order by session, event_grp
gives:
| SESSION | SUBSCRIPTION_ID | CANCEL_REASON | CANCEL_TIMESTAMP | SURVEY_REASON | SURVEY_TIMESTAMP |
| 1 | 12345 | no_interest | 2020-01-01 13:05:00.000 | Clicked | 2020-01-01 13:15:00.000 |
| 1 | 456 | don't_like | 2020-01-01 13:30:00.000 | null | null |
| 1 | 789 | null | null | Not_clicked | 2020-01-01 13:50:00.000 |
the Mat Solution:
Not how I would solve this:
with data(session, page_name, page_value, page_num, timestamp) as ( select * from values (1,'subscription_id', '12345', 5, '2020-01-01 13:00'::timestamp), (1,'cancel', 'no_interest', 6, '2020-01-01 13:05'::timestamp), (1,'cancel', 'watch_all', 7, '2020-01-01 13:10'::timestamp), (1,'Survey', 'Clicked', 8, '2020-01-01 13:15'::timestamp), (1,'Survey', 'Not_clicked', 9, '2020-01-01 13:20'::timestamp), (1,'subscription_id', '456', 10, '2020-01-01 13:25'::timestamp), (1,'cancel', 'don''t_like', 11, '2020-01-01 13:30'::timestamp), (1,'cancel', 'bored', 12, '2020-01-01 13:40'::timestamp), (1,'subscription_id', '789', 13, '2020-01-01 13:45'::timestamp), (1,'Survey', 'Not_clicked', 14, '2020-01-01 13:50'::timestamp), (1,'cancel', 'watch_all', 15, '2020-01-01 13:55'::timestamp), (1,'Survey', 'Clicked', 16, '2020-01-01 14:15'::timestamp) ), step_one as ( select * ,CONDITIONAL_TRUE_EVENT(page_name='subscription_id')over(partition by session order by page_num) as event_grp from data ) select session, iff(Survey_timestamp is null OR cancel_timestamp<Survey_timestamp, cancel_reason, null) as cancel_reason, iff(Survey_timestamp is null OR cancel_timestamp<Survey_timestamp, cancel_timestamp, null) as cancel_timestamp, Survey_reason, Survey_timestamp from ( select distinct session, event_grp ,first_value(iff(page_name='subscription_id',page_value, null))ignore nulls over(partition by session, event_grp order by timestamp) as subscription_id ,first_value(iff(page_name='cancel',page_value, null))ignore nulls over(partition by session, event_grp order by timestamp) as cancel_reason ,first_value(iff(page_name='cancel',timestamp, null))ignore nulls over(partition by session, event_grp order by timestamp) as cancel_timestamp ,first_value(iff(page_name='Survey',page_value, null))ignore nulls over(partition by session, event_grp order by timestamp) as Survey_reason ,first_value(iff(page_name='Survey',timestamp, null))ignore nulls over(partition by session, event_grp order by timestamp) as Survey_timestamp from step_one ) order by session, event_grp
subscription_id's are bound to that row?