init ingestion
This commit is contained in:
5
ingestion/infra/clickhouse/000001_events.down.sql
Normal file
5
ingestion/infra/clickhouse/000001_events.down.sql
Normal file
@@ -0,0 +1,5 @@
|
||||
DROP TABLE IF EXISTS events_dlq;
|
||||
DROP TABLE IF EXISTS events_group;
|
||||
DROP TABLE IF EXISTS events_page;
|
||||
DROP TABLE IF EXISTS events_identify;
|
||||
DROP TABLE IF EXISTS events_track;
|
||||
117
ingestion/infra/clickhouse/000001_events.up.sql
Normal file
117
ingestion/infra/clickhouse/000001_events.up.sql
Normal file
@@ -0,0 +1,117 @@
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- ClickHouse event store.
|
||||
--
|
||||
-- One wide table per event family (track / identify / page / group).
|
||||
-- All share the same key columns. Custom fields are flattened into the
|
||||
-- properties / traits Map.
|
||||
-- ---------------------------------------------------------------------------
|
||||
|
||||
CREATE TABLE IF NOT EXISTS events_track
|
||||
(
|
||||
workspace_id String,
|
||||
source_id String,
|
||||
message_id String,
|
||||
anonymous_id String,
|
||||
user_id String,
|
||||
event String,
|
||||
timestamp DateTime64(3, 'UTC'),
|
||||
sent_at DateTime64(3, 'UTC'),
|
||||
received_at DateTime64(3, 'UTC'),
|
||||
properties Map(String, String),
|
||||
context Map(String, String),
|
||||
ip String,
|
||||
user_agent String,
|
||||
library_name String,
|
||||
library_version String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(received_at)
|
||||
ORDER BY (workspace_id, source_id, received_at, message_id)
|
||||
TTL toDateTime(received_at) + INTERVAL 18 MONTH
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS events_identify
|
||||
(
|
||||
workspace_id String,
|
||||
source_id String,
|
||||
message_id String,
|
||||
anonymous_id String,
|
||||
user_id String,
|
||||
timestamp DateTime64(3, 'UTC'),
|
||||
sent_at DateTime64(3, 'UTC'),
|
||||
received_at DateTime64(3, 'UTC'),
|
||||
traits Map(String, String),
|
||||
context Map(String, String),
|
||||
ip String,
|
||||
user_agent String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(received_at)
|
||||
ORDER BY (workspace_id, source_id, received_at, message_id)
|
||||
TTL toDateTime(received_at) + INTERVAL 18 MONTH
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS events_page
|
||||
(
|
||||
workspace_id String,
|
||||
source_id String,
|
||||
message_id String,
|
||||
anonymous_id String,
|
||||
user_id String,
|
||||
name String,
|
||||
category String,
|
||||
timestamp DateTime64(3, 'UTC'),
|
||||
sent_at DateTime64(3, 'UTC'),
|
||||
received_at DateTime64(3, 'UTC'),
|
||||
properties Map(String, String),
|
||||
context Map(String, String),
|
||||
ip String,
|
||||
user_agent String,
|
||||
referrer String,
|
||||
path String,
|
||||
url String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(received_at)
|
||||
ORDER BY (workspace_id, source_id, received_at, message_id)
|
||||
TTL toDateTime(received_at) + INTERVAL 18 MONTH
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS events_group
|
||||
(
|
||||
workspace_id String,
|
||||
source_id String,
|
||||
message_id String,
|
||||
anonymous_id String,
|
||||
user_id String,
|
||||
group_id String,
|
||||
timestamp DateTime64(3, 'UTC'),
|
||||
sent_at DateTime64(3, 'UTC'),
|
||||
received_at DateTime64(3, 'UTC'),
|
||||
traits Map(String, String),
|
||||
context Map(String, String),
|
||||
ip String,
|
||||
user_agent String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(received_at)
|
||||
ORDER BY (workspace_id, source_id, received_at, message_id)
|
||||
TTL toDateTime(received_at) + INTERVAL 18 MONTH
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
-- Dead-letter queue for events that failed validation / transformation.
|
||||
CREATE TABLE IF NOT EXISTS events_dlq
|
||||
(
|
||||
workspace_id String,
|
||||
source_id String,
|
||||
message_id String,
|
||||
received_at DateTime64(3, 'UTC'),
|
||||
reason String,
|
||||
field String,
|
||||
raw_payload String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(received_at)
|
||||
ORDER BY (workspace_id, received_at)
|
||||
TTL toDateTime(received_at) + INTERVAL 30 DAY
|
||||
SETTINGS index_granularity = 8192;
|
||||
19
ingestion/infra/docker/clickhouse-config.xml
Normal file
19
ingestion/infra/docker/clickhouse-config.xml
Normal file
@@ -0,0 +1,19 @@
|
||||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<!-- Reasonable defaults for local development -->
|
||||
<logger>
|
||||
<level>information</level>
|
||||
<console>1</console>
|
||||
</logger>
|
||||
|
||||
<listen_host>0.0.0.0</listen_host>
|
||||
|
||||
<!-- Allow connections from any user/host in dev -->
|
||||
<profiles>
|
||||
<default>
|
||||
<max_memory_usage>4000000000</max_memory_usage>
|
||||
<use_uncompressed_cache>0</use_uncompressed_cache>
|
||||
<load_balancing>random</load_balancing>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
113
ingestion/infra/docker/docker-compose.yml
Normal file
113
ingestion/infra/docker/docker-compose.yml
Normal file
@@ -0,0 +1,113 @@
|
||||
version: "3.9"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CDP Ingestion - local development infrastructure
|
||||
#
|
||||
# Brings up: PostgreSQL, Redis, Redpanda (Kafka), ClickHouse, Redpanda Console
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: cdp-postgres
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_USER: cdp
|
||||
POSTGRES_PASSWORD: cdp
|
||||
POSTGRES_DB: cdp
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U cdp -d cdp"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: cdp-redis
|
||||
restart: unless-stopped
|
||||
command: ["redis-server", "--appendonly", "yes", "--save", "60", "1"]
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
redpanda:
|
||||
image: redpandadata/redpanda:v24.2.7
|
||||
container_name: cdp-redpanda
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr=internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
- --advertise-kafka-addr=internal://redpanda:9092,external://localhost:19092
|
||||
- --pandaproxy-addr=internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
- --advertise-pandaproxy-addr=internal://redpanda:8082,external://localhost:18082
|
||||
- --schema-registry-addr=internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
- --rpc-addr=0.0.0.0:33145
|
||||
- --advertise-rpc-addr=redpanda:33145
|
||||
- --smp=1
|
||||
- --memory=1G
|
||||
- --overprovisioned
|
||||
- --node-id=0
|
||||
- --check=false
|
||||
ports:
|
||||
- "9092:9092"
|
||||
- "19092:19092"
|
||||
- "9644:9644"
|
||||
volumes:
|
||||
- redpanda_data:/var/lib/redpanda/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "rpk cluster health | grep -E 'Healthy:.+true' || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
redpanda-console:
|
||||
image: redpandadata/console:v2.7.2
|
||||
container_name: cdp-redpanda-console
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- redpanda
|
||||
environment:
|
||||
KAFKA_BROKERS: redpanda:9092
|
||||
ports:
|
||||
- "8080:8080"
|
||||
|
||||
clickhouse:
|
||||
image: clickhouse/clickhouse-server:24.8
|
||||
container_name: cdp-clickhouse
|
||||
restart: unless-stopped
|
||||
ulimits:
|
||||
nofile:
|
||||
soft: 262144
|
||||
hard: 262144
|
||||
environment:
|
||||
CLICKHOUSE_DB: cdp
|
||||
CLICKHOUSE_USER: default
|
||||
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: "1"
|
||||
ports:
|
||||
- "8123:8123" # HTTP
|
||||
- "9000:9000" # Native
|
||||
volumes:
|
||||
- clickhouse_data:/var/lib/clickhouse
|
||||
- ./clickhouse-config.xml:/etc/clickhouse-server/config.d/local.xml:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8123/ping | grep -q Ok"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
redis_data:
|
||||
redpanda_data:
|
||||
clickhouse_data:
|
||||
12
ingestion/infra/migrations/000001_init.down.sql
Normal file
12
ingestion/infra/migrations/000001_init.down.sql
Normal file
@@ -0,0 +1,12 @@
|
||||
DROP TABLE IF EXISTS audit_log;
|
||||
DROP TABLE IF EXISTS schema_fields;
|
||||
DROP TABLE IF EXISTS function_attachments;
|
||||
DROP TABLE IF EXISTS functions;
|
||||
DROP TABLE IF EXISTS source_destination_links;
|
||||
DROP TABLE IF EXISTS destinations;
|
||||
DROP TABLE IF EXISTS write_keys;
|
||||
DROP TABLE IF EXISTS sources;
|
||||
DROP TABLE IF EXISTS workspace_members;
|
||||
DROP TABLE IF EXISTS users;
|
||||
DROP TABLE IF EXISTS workspaces;
|
||||
DROP EXTENSION IF EXISTS "pgcrypto";
|
||||
178
ingestion/infra/migrations/000001_init.up.sql
Normal file
178
ingestion/infra/migrations/000001_init.up.sql
Normal file
@@ -0,0 +1,178 @@
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Initial schema for CDP Ingestion control plane.
|
||||
--
|
||||
-- This database stores configuration, not events. Events live in ClickHouse.
|
||||
-- ---------------------------------------------------------------------------
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- workspaces
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE workspaces (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
slug TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL,
|
||||
tier TEXT NOT NULL DEFAULT 'default'
|
||||
CHECK (tier IN ('default', 'pro', 'enterprise')),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
deleted_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
CREATE INDEX idx_workspaces_slug ON workspaces (slug) WHERE deleted_at IS NULL;
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- users (console operators)
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE users (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
email TEXT NOT NULL UNIQUE,
|
||||
password_hash TEXT NOT NULL,
|
||||
name TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TABLE workspace_members (
|
||||
workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE,
|
||||
user_id UUID NOT NULL REFERENCES users (id) ON DELETE CASCADE,
|
||||
role TEXT NOT NULL DEFAULT 'member'
|
||||
CHECK (role IN ('owner', 'admin', 'member', 'viewer')),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (workspace_id, user_id)
|
||||
);
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- sources -- each source is something that pushes events (web, mobile, server)
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE sources (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE,
|
||||
slug TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL
|
||||
CHECK (kind IN ('web', 'mobile', 'server', 'segment', 'webhook')),
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
settings JSONB NOT NULL DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
deleted_at TIMESTAMPTZ,
|
||||
UNIQUE (workspace_id, slug)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_sources_workspace ON sources (workspace_id) WHERE deleted_at IS NULL;
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- write_keys -- API auth tokens, scoped to a source
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE write_keys (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE,
|
||||
source_id UUID NOT NULL REFERENCES sources (id) ON DELETE CASCADE,
|
||||
key_hash TEXT NOT NULL UNIQUE, -- store hash, never raw
|
||||
key_prefix TEXT NOT NULL, -- first ~8 chars for display
|
||||
label TEXT,
|
||||
revoked_at TIMESTAMPTZ,
|
||||
last_used_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_write_keys_workspace ON write_keys (workspace_id) WHERE revoked_at IS NULL;
|
||||
CREATE INDEX idx_write_keys_source ON write_keys (source_id) WHERE revoked_at IS NULL;
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- destinations -- where events are forwarded (clickhouse, snowflake, bq, s3...)
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE destinations (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE,
|
||||
slug TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL
|
||||
CHECK (kind IN ('clickhouse', 'postgres', 'snowflake', 'bigquery',
|
||||
'redshift', 's3', 'webhook')),
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
config JSONB NOT NULL DEFAULT '{}', -- credentials encrypted at rest
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
deleted_at TIMESTAMPTZ,
|
||||
UNIQUE (workspace_id, slug)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_destinations_workspace ON destinations (workspace_id) WHERE deleted_at IS NULL;
|
||||
|
||||
-- source -> destination wiring
|
||||
CREATE TABLE source_destination_links (
|
||||
source_id UUID NOT NULL REFERENCES sources (id) ON DELETE CASCADE,
|
||||
destination_id UUID NOT NULL REFERENCES destinations (id) ON DELETE CASCADE,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (source_id, destination_id)
|
||||
);
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- functions -- JS transformation code run by rotor
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE functions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE,
|
||||
slug TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
code TEXT NOT NULL,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
version INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
deleted_at TIMESTAMPTZ,
|
||||
UNIQUE (workspace_id, slug)
|
||||
);
|
||||
|
||||
CREATE TABLE function_attachments (
|
||||
source_id UUID REFERENCES sources (id) ON DELETE CASCADE,
|
||||
destination_id UUID REFERENCES destinations (id) ON DELETE CASCADE,
|
||||
function_id UUID NOT NULL REFERENCES functions (id) ON DELETE CASCADE,
|
||||
position INTEGER NOT NULL DEFAULT 0,
|
||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
CHECK (
|
||||
(source_id IS NOT NULL AND destination_id IS NULL) OR
|
||||
(source_id IS NULL AND destination_id IS NOT NULL)
|
||||
)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_function_attachments_source ON function_attachments (source_id);
|
||||
CREATE INDEX idx_function_attachments_destination ON function_attachments (destination_id);
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- schema_fields -- discovered field types per (workspace, event_type, field)
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE schema_fields (
|
||||
workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE,
|
||||
event_type TEXT NOT NULL,
|
||||
field TEXT NOT NULL,
|
||||
data_type TEXT NOT NULL
|
||||
CHECK (data_type IN ('string', 'number', 'boolean',
|
||||
'object', 'array', 'timestamp', 'null')),
|
||||
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
sample_count BIGINT NOT NULL DEFAULT 1,
|
||||
PRIMARY KEY (workspace_id, event_type, field)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_schema_fields_event ON schema_fields (workspace_id, event_type);
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- audit_log -- security-relevant operations
|
||||
-- ---------------------------------------------------------------------------
|
||||
CREATE TABLE audit_log (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
workspace_id UUID REFERENCES workspaces (id) ON DELETE SET NULL,
|
||||
actor_id UUID REFERENCES users (id) ON DELETE SET NULL,
|
||||
action TEXT NOT NULL,
|
||||
target_type TEXT,
|
||||
target_id TEXT,
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_audit_log_workspace ON audit_log (workspace_id, created_at DESC);
|
||||
3
ingestion/infra/migrations/000002_seed_dev.down.sql
Normal file
3
ingestion/infra/migrations/000002_seed_dev.down.sql
Normal file
@@ -0,0 +1,3 @@
|
||||
DELETE FROM write_keys WHERE id = '00000000-0000-0000-0000-000000000100';
|
||||
DELETE FROM sources WHERE id = '00000000-0000-0000-0000-000000000010';
|
||||
DELETE FROM workspaces WHERE id = '00000000-0000-0000-0000-000000000001';
|
||||
24
ingestion/infra/migrations/000002_seed_dev.up.sql
Normal file
24
ingestion/infra/migrations/000002_seed_dev.up.sql
Normal file
@@ -0,0 +1,24 @@
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Local dev seed -- a default workspace + source + write key.
|
||||
-- The plaintext write key for development is: cdp_dev_writekey_1234567890
|
||||
-- key_hash below is sha256 of that string.
|
||||
-- ---------------------------------------------------------------------------
|
||||
|
||||
INSERT INTO workspaces (id, slug, name, tier)
|
||||
VALUES ('00000000-0000-0000-0000-000000000001', 'dev', 'Dev Workspace', 'default')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO sources (id, workspace_id, slug, name, kind)
|
||||
VALUES ('00000000-0000-0000-0000-000000000010',
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'web', 'Dev Web Source', 'web')
|
||||
ON CONFLICT (workspace_id, slug) DO NOTHING;
|
||||
|
||||
INSERT INTO write_keys (id, workspace_id, source_id, key_hash, key_prefix, label)
|
||||
VALUES ('00000000-0000-0000-0000-000000000100',
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'00000000-0000-0000-0000-000000000010',
|
||||
encode(digest('cdp_dev_writekey_1234567890', 'sha256'), 'hex'),
|
||||
'cdp_dev_',
|
||||
'dev key')
|
||||
ON CONFLICT (key_hash) DO NOTHING;
|
||||
67
ingestion/infra/scripts/clickhouse_apply.sh
Executable file
67
ingestion/infra/scripts/clickhouse_apply.sh
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env bash
|
||||
# Apply / drop ClickHouse DDL files in alphabetical order.
|
||||
#
|
||||
# Usage:
|
||||
# clickhouse_apply.sh up apply *.up.sql in infra/clickhouse/
|
||||
# clickhouse_apply.sh down apply *.down.sql in REVERSE order
|
||||
#
|
||||
# Env:
|
||||
# CLICKHOUSE_ADDR (default localhost:9000)
|
||||
# CLICKHOUSE_DB (default cdp)
|
||||
# CLICKHOUSE_USER (default default)
|
||||
# CLICKHOUSE_PASSWORD (default empty)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/clickhouse"
|
||||
ADDR="${CLICKHOUSE_ADDR:-localhost:9000}"
|
||||
DB="${CLICKHOUSE_DB:-cdp}"
|
||||
USER="${CLICKHOUSE_USER:-default}"
|
||||
PASS="${CLICKHOUSE_PASSWORD:-}"
|
||||
|
||||
MODE="${1:-up}"
|
||||
|
||||
host="${ADDR%%:*}"
|
||||
port="${ADDR##*:}"
|
||||
|
||||
run_sql() {
|
||||
local file="$1"
|
||||
echo ">>> applying $(basename "$file")"
|
||||
if [[ -n "$PASS" ]]; then
|
||||
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \
|
||||
--database "$DB" --multiquery --queries-file "$file"
|
||||
else
|
||||
clickhouse-client --host "$host" --port "$port" --user "$USER" \
|
||||
--database "$DB" --multiquery --queries-file "$file"
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_db() {
|
||||
if [[ -n "$PASS" ]]; then
|
||||
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \
|
||||
--query "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
else
|
||||
clickhouse-client --host "$host" --port "$port" --user "$USER" \
|
||||
--query "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
fi
|
||||
}
|
||||
|
||||
case "$MODE" in
|
||||
up)
|
||||
ensure_db
|
||||
for f in $(ls "$DIR"/*.up.sql 2>/dev/null | sort); do
|
||||
run_sql "$f"
|
||||
done
|
||||
;;
|
||||
down)
|
||||
for f in $(ls "$DIR"/*.down.sql 2>/dev/null | sort -r); do
|
||||
run_sql "$f"
|
||||
done
|
||||
;;
|
||||
*)
|
||||
echo "usage: $0 {up|down}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "done."
|
||||
Reference in New Issue
Block a user