This commit is contained in:
2026-05-25 13:38:20 +07:00
parent 5a1829bc0f
commit b40568dd30
7 changed files with 948 additions and 259 deletions

View File

@@ -1,41 +1,43 @@
-- Retention Cohort -- of users whose first `initial_event` lands on day D,
-- what share triggered `return_event` on day D+k for k in 1..Periods.
--
-- Required parameters (clickhouse.Named):
-- We compute the cohort day in a CTE first, then LEFT JOIN events_track and
-- count distinct returners per (cohort_day, day_offset). Doing it this way
-- avoids ClickHouse's "aggregate inside another aggregate" restriction that
-- the older retention()-based form ran into.
--
-- Required parameters (clickhouse.Named, string-valued):
-- workspace_id : String
-- from : DateTime64(3,'UTC')
-- from : DateTime64(3,'UTC') (formatted as 'YYYY-MM-DD HH:MM:SS.mmm')
-- to : DateTime64(3,'UTC')
-- initial_event : String
-- return_event : String
--
-- Template inputs:
-- .Outer : []{ RIndex int; OffsetDay int; Last bool }
-- One entry per follow-up day. RIndex is the position in the retention()
-- output array; OffsetDay is the day delta from the cohort day.
SELECT
cohort_day,
countIf(arrayElement(r, 1)) AS cohort_size,
{{- range $p := .Outer }}
countIf(arrayElement(r, {{ $p.RIndex }})) AS retained_d{{ $p.OffsetDay }}{{ if not $p.Last }},{{ end }}
{{- end }}
FROM (
-- .Outer : []{ OffsetDay int; Last bool }
WITH cohorts AS (
SELECT
user_id,
toDate(min(if(event = {initial_event:String}, timestamp, NULL))) AS cohort_day,
retention(
event = {initial_event:String} AND toDate(timestamp) = cohort_day,
{{- range $p := .Outer }}
event = {return_event:String} AND toDate(timestamp) = addDays(cohort_day, {{ $p.OffsetDay }}){{ if not $p.Last }},{{ end }}
{{- end }}
) AS r
toDate(min(timestamp)) AS cohort_day
FROM events_track
WHERE workspace_id = {workspace_id:String}
AND received_at >= {from:DateTime64(3,'UTC')}
AND received_at < {to:DateTime64(3,'UTC')}
AND user_id != ''
AND event IN ({initial_event:String}, {return_event:String})
AND event = {initial_event:String}
GROUP BY user_id
HAVING cohort_day IS NOT NULL
)
GROUP BY cohort_day
ORDER BY cohort_day
SELECT
c.cohort_day AS cohort_day,
uniqExact(c.user_id) AS cohort_size,
{{- range $p := .Outer }}
uniqExactIf(c.user_id, e.event = {return_event:String} AND toDate(e.timestamp) = addDays(c.cohort_day, {{ $p.OffsetDay }})) AS retained_d{{ $p.OffsetDay }}{{ if not $p.Last }},{{ end }}
{{- end }}
FROM cohorts AS c
LEFT JOIN events_track AS e
ON e.workspace_id = {workspace_id:String}
AND e.user_id = c.user_id
AND e.received_at >= {from:DateTime64(3,'UTC')}
AND e.received_at < {to:DateTime64(3,'UTC')}
GROUP BY c.cohort_day
ORDER BY c.cohort_day