init ingestion

This commit is contained in:
2026-05-24 22:59:24 +07:00
commit 4e8c11d545
80 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
<?xml version="1.0"?>
<clickhouse>
<!-- Reasonable defaults for local development -->
<logger>
<level>information</level>
<console>1</console>
</logger>
<listen_host>0.0.0.0</listen_host>
<!-- Allow connections from any user/host in dev -->
<profiles>
<default>
<max_memory_usage>4000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
</default>
</profiles>
</clickhouse>

View File

@@ -0,0 +1,113 @@
version: "3.9"
# ---------------------------------------------------------------------------
# CDP Ingestion - local development infrastructure
#
# Brings up: PostgreSQL, Redis, Redpanda (Kafka), ClickHouse, Redpanda Console
# ---------------------------------------------------------------------------
services:
postgres:
image: postgres:16-alpine
container_name: cdp-postgres
restart: unless-stopped
environment:
POSTGRES_USER: cdp
POSTGRES_PASSWORD: cdp
POSTGRES_DB: cdp
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U cdp -d cdp"]
interval: 5s
timeout: 3s
retries: 10
redis:
image: redis:7-alpine
container_name: cdp-redis
restart: unless-stopped
command: ["redis-server", "--appendonly", "yes", "--save", "60", "1"]
ports:
- "6379:6379"
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 3s
retries: 10
redpanda:
image: redpandadata/redpanda:v24.2.7
container_name: cdp-redpanda
restart: unless-stopped
command:
- redpanda
- start
- --kafka-addr=internal://0.0.0.0:9092,external://0.0.0.0:19092
- --advertise-kafka-addr=internal://redpanda:9092,external://localhost:19092
- --pandaproxy-addr=internal://0.0.0.0:8082,external://0.0.0.0:18082
- --advertise-pandaproxy-addr=internal://redpanda:8082,external://localhost:18082
- --schema-registry-addr=internal://0.0.0.0:8081,external://0.0.0.0:18081
- --rpc-addr=0.0.0.0:33145
- --advertise-rpc-addr=redpanda:33145
- --smp=1
- --memory=1G
- --overprovisioned
- --node-id=0
- --check=false
ports:
- "9092:9092"
- "19092:19092"
- "9644:9644"
volumes:
- redpanda_data:/var/lib/redpanda/data
healthcheck:
test: ["CMD-SHELL", "rpk cluster health | grep -E 'Healthy:.+true' || exit 1"]
interval: 10s
timeout: 5s
retries: 10
redpanda-console:
image: redpandadata/console:v2.7.2
container_name: cdp-redpanda-console
restart: unless-stopped
depends_on:
- redpanda
environment:
KAFKA_BROKERS: redpanda:9092
ports:
- "8080:8080"
clickhouse:
image: clickhouse/clickhouse-server:24.8
container_name: cdp-clickhouse
restart: unless-stopped
ulimits:
nofile:
soft: 262144
hard: 262144
environment:
CLICKHOUSE_DB: cdp
CLICKHOUSE_USER: default
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: "1"
ports:
- "8123:8123" # HTTP
- "9000:9000" # Native
volumes:
- clickhouse_data:/var/lib/clickhouse
- ./clickhouse-config.xml:/etc/clickhouse-server/config.d/local.xml:ro
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:8123/ping | grep -q Ok"]
interval: 5s
timeout: 3s
retries: 10
volumes:
postgres_data:
redis_data:
redpanda_data:
clickhouse_data: