init ingestion

This commit is contained in:
2026-05-24 22:59:24 +07:00
commit 4e8c11d545
80 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
DROP TABLE IF EXISTS events_dlq;
DROP TABLE IF EXISTS events_group;
DROP TABLE IF EXISTS events_page;
DROP TABLE IF EXISTS events_identify;
DROP TABLE IF EXISTS events_track;

View File

@@ -0,0 +1,117 @@
-- ---------------------------------------------------------------------------
-- ClickHouse event store.
--
-- One wide table per event family (track / identify / page / group).
-- All share the same key columns. Custom fields are flattened into the
-- properties / traits Map.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS events_track
(
workspace_id String,
source_id String,
message_id String,
anonymous_id String,
user_id String,
event String,
timestamp DateTime64(3, 'UTC'),
sent_at DateTime64(3, 'UTC'),
received_at DateTime64(3, 'UTC'),
properties Map(String, String),
context Map(String, String),
ip String,
user_agent String,
library_name String,
library_version String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(received_at)
ORDER BY (workspace_id, source_id, received_at, message_id)
TTL toDateTime(received_at) + INTERVAL 18 MONTH
SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS events_identify
(
workspace_id String,
source_id String,
message_id String,
anonymous_id String,
user_id String,
timestamp DateTime64(3, 'UTC'),
sent_at DateTime64(3, 'UTC'),
received_at DateTime64(3, 'UTC'),
traits Map(String, String),
context Map(String, String),
ip String,
user_agent String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(received_at)
ORDER BY (workspace_id, source_id, received_at, message_id)
TTL toDateTime(received_at) + INTERVAL 18 MONTH
SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS events_page
(
workspace_id String,
source_id String,
message_id String,
anonymous_id String,
user_id String,
name String,
category String,
timestamp DateTime64(3, 'UTC'),
sent_at DateTime64(3, 'UTC'),
received_at DateTime64(3, 'UTC'),
properties Map(String, String),
context Map(String, String),
ip String,
user_agent String,
referrer String,
path String,
url String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(received_at)
ORDER BY (workspace_id, source_id, received_at, message_id)
TTL toDateTime(received_at) + INTERVAL 18 MONTH
SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS events_group
(
workspace_id String,
source_id String,
message_id String,
anonymous_id String,
user_id String,
group_id String,
timestamp DateTime64(3, 'UTC'),
sent_at DateTime64(3, 'UTC'),
received_at DateTime64(3, 'UTC'),
traits Map(String, String),
context Map(String, String),
ip String,
user_agent String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(received_at)
ORDER BY (workspace_id, source_id, received_at, message_id)
TTL toDateTime(received_at) + INTERVAL 18 MONTH
SETTINGS index_granularity = 8192;
-- Dead-letter queue for events that failed validation / transformation.
CREATE TABLE IF NOT EXISTS events_dlq
(
workspace_id String,
source_id String,
message_id String,
received_at DateTime64(3, 'UTC'),
reason String,
field String,
raw_payload String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(received_at)
ORDER BY (workspace_id, received_at)
TTL toDateTime(received_at) + INTERVAL 30 DAY
SETTINGS index_granularity = 8192;