Skip to content

Instantly share code, notes, and snippets.

@lewish
Last active April 25, 2024 19:41
Show Gist options
  • Save lewish/a37ecb423a45ccf861373d1c942a0ea5 to your computer and use it in GitHub Desktop.
Save lewish/a37ecb423a45ccf861373d1c942a0ea5 to your computer and use it in GitHub Desktop.
Sessionized segment tracks & pages (BigQuery)
/*
This query creates a combined view of tracks and pages from segment data.
Sessions are computed by finding the any session start timestamp denoted by an activity gap of 30 minutes.
Session IDs are assigned to each track or page record using a combination of the session index, user ID, and date.
*/
WITH
with_session_starts AS (
SELECT
*,
COALESCE( (UNIX_MILLIS(timestamp) - UNIX_MILLIS(LAG(timestamp) OVER (PARTITION BY user_id ORDER BY timestamp ASC)))/(1000*60) >= 30,
TRUE ) AS session_start_event
FROM ((
SELECT
timestamp,
user_id,
context_ip,
context_page_url,
context_page_path,
STRUCT(id,
event) AS tracks_info,
STRUCT(NULL AS id,
NULL AS url,
NULL AS referrer,
NULL AS url_hash,
NULL AS title,
NULL as name,
NULL as search,
NULL as path) AS pages_info
FROM
<your_segment_schema>.tracks)
UNION ALL (
SELECT
timestamp,
user_id,
context_ip,
context_page_url,
context_page_path,
STRUCT(NULL AS id,
NULL AS event) AS tracks_info,
STRUCT(id,
url,
referrer,
url_hash,
title,
name,
search,
path) AS pages_info
FROM
<your_segment_schema>.pages))
WHERE NOT user_id is NULL ),
with_session_index AS (
SELECT
*,
SUM(
IF
( session_start_event,
1,
0 ) ) OVER (PARTITION BY user_id ORDER BY timestamp ASC) AS session_index
FROM
with_session_starts ),
with_session_id AS (
SELECT
*,
farm_fingerprint(CONCAT(CAST(session_index AS STRING), "|", CAST(user_id AS STRING), "|", CAST(DATE(timestamp) AS STRING))) AS session_id
FROM
with_session_index)
SELECT
*
FROM
with_session_id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment