data layer
This commit is contained in:
0
data-layer/infra/clickhouse/.gitkeep
Normal file
0
data-layer/infra/clickhouse/.gitkeep
Normal file
60
data-layer/infra/clickhouse/event_explorer.sql.tmpl
Normal file
60
data-layer/infra/clickhouse/event_explorer.sql.tmpl
Normal file
@@ -0,0 +1,60 @@
|
||||
-- Event Explorer -- filter raw events for one workspace inside a time range.
|
||||
--
|
||||
-- Required parameters (clickhouse.Named):
|
||||
-- workspace_id : String
|
||||
-- from : DateTime64(3,'UTC')
|
||||
-- to : DateTime64(3,'UTC')
|
||||
-- limit : UInt32
|
||||
-- offset : UInt32
|
||||
--
|
||||
-- Optional parameters (controlled by template flags):
|
||||
-- user_id : String (when .HasUserID)
|
||||
-- anonymous_id : String (when .HasAnonymousID)
|
||||
-- event : String (when .HasEventName, events_track only)
|
||||
--
|
||||
-- Template inputs:
|
||||
-- .Table : whitelisted enum (events_track | events_identify | events_page | events_group)
|
||||
-- .HasUserID : bool
|
||||
-- .HasAnonymousID : bool
|
||||
-- .HasEventName : bool
|
||||
SELECT
|
||||
workspace_id,
|
||||
source_id,
|
||||
message_id,
|
||||
anonymous_id,
|
||||
user_id,
|
||||
{{- if eq .Table "events_track" }}
|
||||
event,
|
||||
{{- end }}
|
||||
{{- if eq .Table "events_page" }}
|
||||
name,
|
||||
category,
|
||||
path,
|
||||
url,
|
||||
{{- end }}
|
||||
{{- if eq .Table "events_group" }}
|
||||
group_id,
|
||||
{{- end }}
|
||||
timestamp,
|
||||
received_at,
|
||||
{{- if or (eq .Table "events_identify") (eq .Table "events_group") }}
|
||||
traits
|
||||
{{- else }}
|
||||
properties
|
||||
{{- end }}
|
||||
FROM {{ .Table }}
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND received_at >= {from:DateTime64(3,'UTC')}
|
||||
AND received_at < {to:DateTime64(3,'UTC')}
|
||||
{{- if .HasUserID }}
|
||||
AND user_id = {user_id:String}
|
||||
{{- end }}
|
||||
{{- if .HasAnonymousID }}
|
||||
AND anonymous_id = {anonymous_id:String}
|
||||
{{- end }}
|
||||
{{- if .HasEventName }}
|
||||
AND event = {event:String}
|
||||
{{- end }}
|
||||
ORDER BY received_at DESC
|
||||
LIMIT {limit:UInt32}
|
||||
OFFSET {offset:UInt32}
|
||||
35
data-layer/infra/clickhouse/funnel_analysis.sql.tmpl
Normal file
35
data-layer/infra/clickhouse/funnel_analysis.sql.tmpl
Normal file
@@ -0,0 +1,35 @@
|
||||
-- Funnel Analysis -- count users reaching each step in order within window.
|
||||
--
|
||||
-- Required parameters (clickhouse.Named):
|
||||
-- workspace_id : String
|
||||
-- from : DateTime64(3,'UTC')
|
||||
-- to : DateTime64(3,'UTC')
|
||||
-- window_seconds : UInt32
|
||||
-- step{i} : String for i in 0..N-1
|
||||
--
|
||||
-- Template inputs:
|
||||
-- .Steps : []struct{ Index int; Last bool }
|
||||
-- .StepCount : int
|
||||
SELECT
|
||||
step,
|
||||
countIf(level >= step) AS reached,
|
||||
if(step = 1, 1.0, countIf(level >= step) / countIf(level >= 1)) AS conversion_rate
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
windowFunnel({window_seconds:UInt32})(
|
||||
timestamp,
|
||||
{{- range $i, $s := .Steps }}
|
||||
event = {step{{ $s.Index }}:String}{{ if not $s.Last }},{{ end }}
|
||||
{{- end }}
|
||||
) AS level
|
||||
FROM events_track
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND received_at >= {from:DateTime64(3,'UTC')}
|
||||
AND received_at < {to:DateTime64(3,'UTC')}
|
||||
AND user_id != ''
|
||||
GROUP BY user_id
|
||||
) AS f
|
||||
ARRAY JOIN range(1, toUInt32({{ .StepCount }}) + 1) AS step
|
||||
GROUP BY step
|
||||
ORDER BY step
|
||||
57
data-layer/infra/clickhouse/profile_timeline.sql.tmpl
Normal file
57
data-layer/infra/clickhouse/profile_timeline.sql.tmpl
Normal file
@@ -0,0 +1,57 @@
|
||||
-- Profile timeline -- merged event stream for one user_id within a workspace.
|
||||
--
|
||||
-- Required parameters (clickhouse.Named):
|
||||
-- workspace_id : String
|
||||
-- user_id : String
|
||||
-- limit : UInt32
|
||||
-- offset : UInt32
|
||||
SELECT * FROM (
|
||||
SELECT
|
||||
'track' AS kind,
|
||||
message_id,
|
||||
event AS name,
|
||||
received_at,
|
||||
properties AS payload
|
||||
FROM events_track
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND user_id = {user_id:String}
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'identify' AS kind,
|
||||
message_id,
|
||||
'' AS name,
|
||||
received_at,
|
||||
traits AS payload
|
||||
FROM events_identify
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND user_id = {user_id:String}
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'page' AS kind,
|
||||
message_id,
|
||||
name AS name,
|
||||
received_at,
|
||||
properties AS payload
|
||||
FROM events_page
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND user_id = {user_id:String}
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'group' AS kind,
|
||||
message_id,
|
||||
'' AS name,
|
||||
received_at,
|
||||
traits AS payload
|
||||
FROM events_group
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND user_id = {user_id:String}
|
||||
)
|
||||
ORDER BY received_at DESC
|
||||
LIMIT {limit:UInt32}
|
||||
OFFSET {offset:UInt32}
|
||||
41
data-layer/infra/clickhouse/retention_cohort.sql.tmpl
Normal file
41
data-layer/infra/clickhouse/retention_cohort.sql.tmpl
Normal file
@@ -0,0 +1,41 @@
|
||||
-- Retention Cohort -- of users whose first `initial_event` lands on day D,
|
||||
-- what share triggered `return_event` on day D+k for k in 1..Periods.
|
||||
--
|
||||
-- Required parameters (clickhouse.Named):
|
||||
-- workspace_id : String
|
||||
-- from : DateTime64(3,'UTC')
|
||||
-- to : DateTime64(3,'UTC')
|
||||
-- initial_event : String
|
||||
-- return_event : String
|
||||
--
|
||||
-- Template inputs:
|
||||
-- .Outer : []{ RIndex int; OffsetDay int; Last bool }
|
||||
-- One entry per follow-up day. RIndex is the position in the retention()
|
||||
-- output array; OffsetDay is the day delta from the cohort day.
|
||||
SELECT
|
||||
cohort_day,
|
||||
countIf(arrayElement(r, 1)) AS cohort_size,
|
||||
{{- range $p := .Outer }}
|
||||
countIf(arrayElement(r, {{ $p.RIndex }})) AS retained_d{{ $p.OffsetDay }}{{ if not $p.Last }},{{ end }}
|
||||
{{- end }}
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
toDate(min(if(event = {initial_event:String}, timestamp, NULL))) AS cohort_day,
|
||||
retention(
|
||||
event = {initial_event:String} AND toDate(timestamp) = cohort_day,
|
||||
{{- range $p := .Outer }}
|
||||
event = {return_event:String} AND toDate(timestamp) = addDays(cohort_day, {{ $p.OffsetDay }}){{ if not $p.Last }},{{ end }}
|
||||
{{- end }}
|
||||
) AS r
|
||||
FROM events_track
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND received_at >= {from:DateTime64(3,'UTC')}
|
||||
AND received_at < {to:DateTime64(3,'UTC')}
|
||||
AND user_id != ''
|
||||
AND event IN ({initial_event:String}, {return_event:String})
|
||||
GROUP BY user_id
|
||||
HAVING cohort_day IS NOT NULL
|
||||
)
|
||||
GROUP BY cohort_day
|
||||
ORDER BY cohort_day
|
||||
52
data-layer/infra/clickhouse/session_analysis.sql.tmpl
Normal file
52
data-layer/infra/clickhouse/session_analysis.sql.tmpl
Normal file
@@ -0,0 +1,52 @@
|
||||
-- Session Analysis -- split each user's event stream into sessions based on
|
||||
-- inactivity gap, then aggregate per session.
|
||||
--
|
||||
-- Required parameters (clickhouse.Named):
|
||||
-- workspace_id : String
|
||||
-- from : DateTime64(3,'UTC')
|
||||
-- to : DateTime64(3,'UTC')
|
||||
-- timeout_seconds : UInt32
|
||||
-- limit : UInt32
|
||||
-- offset : UInt32
|
||||
--
|
||||
-- Optional parameters (template-driven):
|
||||
-- user_id : String (when .HasUserID)
|
||||
SELECT
|
||||
user_id,
|
||||
session_index AS session_id,
|
||||
min(timestamp) AS started_at,
|
||||
max(timestamp) AS ended_at,
|
||||
count() AS events,
|
||||
dateDiff('second', min(timestamp), max(timestamp)) AS duration_seconds
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
timestamp,
|
||||
sum(is_new_session) OVER (PARTITION BY user_id ORDER BY timestamp) AS session_index
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
timestamp,
|
||||
if(
|
||||
dateDiff(
|
||||
'second',
|
||||
lagInFrame(timestamp) OVER (PARTITION BY user_id ORDER BY timestamp),
|
||||
timestamp
|
||||
) > {timeout_seconds:UInt32},
|
||||
1,
|
||||
0
|
||||
) AS is_new_session
|
||||
FROM events_track
|
||||
WHERE workspace_id = {workspace_id:String}
|
||||
AND received_at >= {from:DateTime64(3,'UTC')}
|
||||
AND received_at < {to:DateTime64(3,'UTC')}
|
||||
AND user_id != ''
|
||||
{{- if .HasUserID }}
|
||||
AND user_id = {user_id:String}
|
||||
{{- end }}
|
||||
)
|
||||
)
|
||||
GROUP BY user_id, session_index
|
||||
ORDER BY started_at DESC
|
||||
LIMIT {limit:UInt32}
|
||||
OFFSET {offset:UInt32}
|
||||
Reference in New Issue
Block a user