This commit is contained in:
2026-05-25 10:16:31 +07:00
parent a428170fef
commit c5e980aa52
21 changed files with 6172 additions and 102 deletions

View File

@@ -1,12 +1,15 @@
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Shared infrastructure (matches cdp-ingestion) # Shared infrastructure (remote)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
POSTGRES_DSN=postgres://cdp:cdp@localhost:5432/cdp?sslmode=disable POSTGRES_DSN=postgres://renolation:renolation@103.188.82.191:5432/analytics?sslmode=disable
REDIS_ADDR=localhost:6379 REDIS_ADDR=localhost:6379
CLICKHOUSE_ADDR=localhost:9000
CLICKHOUSE_DB=cdp # ClickHouse (internal, native protocol, plain)
CLICKHOUSE_USER=default CLICKHOUSE_ADDR=192.168.1.60:9000
CLICKHOUSE_PASSWORD= CLICKHOUSE_DB=analytics
CLICKHOUSE_USER=renolation
CLICKHOUSE_PASSWORD=renolation
CLICKHOUSE_SECURE=false
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Analytics API # Analytics API
@@ -19,8 +22,8 @@ ANALYTICS_CACHE_TTL_PROFILE_SECONDS=30s
ANALYTICS_CH_TEMPLATES_DIR=../infra/clickhouse ANALYTICS_CH_TEMPLATES_DIR=../infra/clickhouse
# Custom SQL ClickHouse user — must have SELECT-only grants # Custom SQL ClickHouse user — must have SELECT-only grants
ANALYTICS_CH_SQL_USER=analytics_ro ANALYTICS_CH_SQL_USER=renolation
ANALYTICS_CH_SQL_PASSWORD= ANALYTICS_CH_SQL_PASSWORD=renolation
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Analytics Worker (river) # Analytics Worker (river)

View File

@@ -10,12 +10,50 @@
# Configuration # Configuration
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
POSTGRES_DSN ?= postgres://cdp:cdp@localhost:5432/cdp?sslmode=disable # Auto-load .env if present, then export every variable so child processes
CLICKHOUSE_DSN ?= clickhouse://default:@localhost:9000/cdp # (go run, docker run, the apply script) inherit them.
MIGRATE_BIN ?= migrate ifneq (,$(wildcard ./.env))
include .env
export
endif
POSTGRES_DSN ?= postgres://renolation:renolation@103.188.82.191:5432/analytics?sslmode=disable
POSTGRES_DSN_DOCKER ?= $(POSTGRES_DSN)
CLICKHOUSE_ADDR ?= 192.168.1.60:9000
CLICKHOUSE_DB ?= analytics
CLICKHOUSE_USER ?= renolation
CLICKHOUSE_PASSWORD ?= renolation
CLICKHOUSE_SECURE ?= 0
# Re-export everything the Go services + scripts consume. Explicit list is
# required on GNU Make 3.81 (macOS default), where bare `export` is a no-op.
export POSTGRES_DSN REDIS_ADDR
export CLICKHOUSE_ADDR CLICKHOUSE_DB CLICKHOUSE_USER CLICKHOUSE_PASSWORD CLICKHOUSE_SECURE
export ANALYTICS_HTTP_ADDR ANALYTICS_LOG_LEVEL ANALYTICS_SHUTDOWN_TIMEOUT_SECONDS
export ANALYTICS_CACHE_TTL_QUERY_SECONDS ANALYTICS_CACHE_TTL_PROFILE_SECONDS ANALYTICS_CH_TEMPLATES_DIR
export ANALYTICS_CH_SQL_USER ANALYTICS_CH_SQL_PASSWORD
export WORKER_HTTP_ADDR WORKER_LOG_LEVEL WORKER_SHUTDOWN_TIMEOUT_SECONDS
export WORKER_MAX_WORKERS WORKER_COMPUTE_TRAITS_EVERY WORKER_REFRESH_SEGMENT_EVERY
MIGRATE_IMAGE ?= migrate/migrate:v4.17.1
MIGRATIONS_DIR := infra/migrations MIGRATIONS_DIR := infra/migrations
CLICKHOUSE_DIR := infra/clickhouse CLICKHOUSE_DIR := infra/clickhouse
export CLICKHOUSE_ADDR CLICKHOUSE_DB CLICKHOUSE_USER CLICKHOUSE_PASSWORD CLICKHOUSE_SECURE
# `migrate` CLI: prefer local binary, otherwise run the official Docker image.
MIGRATE_BIN ?= $(shell command -v migrate 2>/dev/null)
ifeq ($(MIGRATE_BIN),)
MIGRATE = docker run --rm \
-v $(CURDIR)/$(MIGRATIONS_DIR):/migrations \
$(MIGRATE_IMAGE) -path=/migrations -database "$(POSTGRES_DSN_DOCKER)"
MIGRATE_CREATE = docker run --rm \
-v $(CURDIR)/$(MIGRATIONS_DIR):/migrations \
$(MIGRATE_IMAGE) create -ext sql -dir /migrations -seq
else
MIGRATE = $(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)"
MIGRATE_CREATE = $(MIGRATE_BIN) create -ext sql -dir $(MIGRATIONS_DIR) -seq
endif
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Help # Help
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -47,16 +85,16 @@ help:
migrate/new: migrate/new:
@if [ -z "$(name)" ]; then echo "usage: make migrate/new name=add_xxx"; exit 1; fi @if [ -z "$(name)" ]; then echo "usage: make migrate/new name=add_xxx"; exit 1; fi
$(MIGRATE_BIN) create -ext sql -dir $(MIGRATIONS_DIR) -seq $(name) $(MIGRATE_CREATE) $(name)
migrate/up: migrate/up:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" up $(MIGRATE) up
migrate/down: migrate/down:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" down 1 $(MIGRATE) down 1
migrate/status: migrate/status:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" version $(MIGRATE) version
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# ClickHouse DDL # ClickHouse DDL
@@ -79,6 +117,7 @@ run/workers:
cd workers && go run ./cmd/worker cd workers && go run ./cmd/worker
run/console: run/console:
@cd console && [ -d node_modules ] || npm install
cd console && npm run dev cd console && npm run dev
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View File

@@ -60,13 +60,13 @@ func run() error {
} }
defer redisClient.Close() defer redisClient.Close()
chMain, err := repo.NewClickHouse(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseUser, cfg.ClickHousePassword) chMain, err := repo.NewClickHouse(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseUser, cfg.ClickHousePassword, cfg.ClickHouseSecure)
if err != nil { if err != nil {
return err return err
} }
defer func() { _ = chMain.Close() }() defer func() { _ = chMain.Close() }()
chRO, err := repo.NewClickHouseReadOnly(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseSQLUser, cfg.ClickHouseSQLPassword) chRO, err := repo.NewClickHouseReadOnly(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseSQLUser, cfg.ClickHouseSQLPassword, cfg.ClickHouseSecure)
if err != nil { if err != nil {
// Read-only user might not be provisioned in dev. Log + fall back to // Read-only user might not be provisioned in dev. Log + fall back to
// the main connection so /query/sql still works locally; production // the main connection so /query/sql still works locally; production

View File

@@ -36,6 +36,7 @@ type Config struct {
ClickHouseDB string `env:"CLICKHOUSE_DB" envDefault:"cdp"` ClickHouseDB string `env:"CLICKHOUSE_DB" envDefault:"cdp"`
ClickHouseUser string `env:"CLICKHOUSE_USER" envDefault:"default"` ClickHouseUser string `env:"CLICKHOUSE_USER" envDefault:"default"`
ClickHousePassword string `env:"CLICKHOUSE_PASSWORD"` ClickHousePassword string `env:"CLICKHOUSE_PASSWORD"`
ClickHouseSecure bool `env:"CLICKHOUSE_SECURE" envDefault:"false"`
} }
func Load() (*Config, error) { func Load() (*Config, error) {

View File

@@ -2,17 +2,20 @@ package repo
import ( import (
"context" "context"
"crypto/tls"
"fmt" "fmt"
"github.com/ClickHouse/clickhouse-go/v2" "github.com/ClickHouse/clickhouse-go/v2"
"github.com/ClickHouse/clickhouse-go/v2/lib/driver" "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
) )
// NewClickHouse opens a native-protocol ClickHouse connection. The returned // NewClickHouse opens a ClickHouse connection. `secure` enables TLS. The
// driver.Conn is safe for concurrent use. Caller owns Close(). // wire protocol is auto-selected from the port: 8123/8443 (HTTP interface)
func NewClickHouse(ctx context.Context, addr, db, user, password string) (driver.Conn, error) { // use HTTP, the native default otherwise.
conn, err := clickhouse.Open(&clickhouse.Options{ func NewClickHouse(ctx context.Context, addr, db, user, password string, secure bool) (driver.Conn, error) {
opts := &clickhouse.Options{
Addr: []string{addr}, Addr: []string{addr},
Protocol: protocolFromAddr(addr),
Auth: clickhouse.Auth{ Auth: clickhouse.Auth{
Database: db, Database: db,
Username: user, Username: user,
@@ -21,7 +24,11 @@ func NewClickHouse(ctx context.Context, addr, db, user, password string) (driver
Settings: clickhouse.Settings{ Settings: clickhouse.Settings{
"readonly": 0, // analytics queries; per-user read-only enforced for /query/sql separately "readonly": 0, // analytics queries; per-user read-only enforced for /query/sql separately
}, },
}) }
if secure {
opts.TLS = &tls.Config{MinVersion: tls.VersionTLS12}
}
conn, err := clickhouse.Open(opts)
if err != nil { if err != nil {
return nil, fmt.Errorf("open clickhouse: %w", err) return nil, fmt.Errorf("open clickhouse: %w", err)
} }
@@ -35,9 +42,10 @@ func NewClickHouse(ctx context.Context, addr, db, user, password string) (driver
// NewClickHouseReadOnly opens a ClickHouse connection using a SELECT-only // NewClickHouseReadOnly opens a ClickHouse connection using a SELECT-only
// account. Used to back the /query/sql sandbox: DDL/DML are rejected at the DB // account. Used to back the /query/sql sandbox: DDL/DML are rejected at the DB
// level even if the app-level keyword guard is bypassed. // level even if the app-level keyword guard is bypassed.
func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string) (driver.Conn, error) { func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string, secure bool) (driver.Conn, error) {
conn, err := clickhouse.Open(&clickhouse.Options{ opts := &clickhouse.Options{
Addr: []string{addr}, Addr: []string{addr},
Protocol: protocolFromAddr(addr),
Auth: clickhouse.Auth{ Auth: clickhouse.Auth{
Database: db, Database: db,
Username: user, Username: user,
@@ -46,7 +54,11 @@ func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string)
Settings: clickhouse.Settings{ Settings: clickhouse.Settings{
"readonly": 2, // belt-and-braces: server-side enforce read-only "readonly": 2, // belt-and-braces: server-side enforce read-only
}, },
}) }
if secure {
opts.TLS = &tls.Config{MinVersion: tls.VersionTLS12}
}
conn, err := clickhouse.Open(opts)
if err != nil { if err != nil {
return nil, fmt.Errorf("open clickhouse (ro): %w", err) return nil, fmt.Errorf("open clickhouse (ro): %w", err)
} }
@@ -56,3 +68,24 @@ func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string)
} }
return conn, nil return conn, nil
} }
// protocolFromAddr selects HTTP for the well-known ClickHouse HTTP-interface
// ports (8123/8443) and Native otherwise. Lets CLICKHOUSE_ADDR target either
// kind of endpoint without an extra env var.
func protocolFromAddr(addr string) clickhouse.Protocol {
switch port := portOf(addr); port {
case "8123", "8443":
return clickhouse.HTTP
default:
return clickhouse.Native
}
}
func portOf(addr string) string {
for i := len(addr) - 1; i >= 0; i-- {
if addr[i] == ':' {
return addr[i+1:]
}
}
return ""
}

View File

@@ -5,57 +5,113 @@
# clickhouse_apply.sh up apply *.up.sql in infra/clickhouse/ # clickhouse_apply.sh up apply *.up.sql in infra/clickhouse/
# clickhouse_apply.sh down apply *.down.sql in REVERSE order # clickhouse_apply.sh down apply *.down.sql in REVERSE order
# #
# Talks to the HTTP(S) interface via curl -- no clickhouse-client binary needed.
# The SQL is POSTed as the raw body; ClickHouse parses the body when the URL
# has no ?query= parameter.
#
# Env: # Env:
# CLICKHOUSE_ADDR (default localhost:9000) # CLICKHOUSE_ADDR (default localhost:8123)
# CLICKHOUSE_DB (default cdp) # CLICKHOUSE_DB (default cdp)
# CLICKHOUSE_USER (default default) # CLICKHOUSE_USER (default default)
# CLICKHOUSE_PASSWORD (default empty) # CLICKHOUSE_PASSWORD (default empty)
# CLICKHOUSE_SECURE (default 0; auto-on for port 8443)
set -euo pipefail set -euo pipefail
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/clickhouse" DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/clickhouse"
ADDR="${CLICKHOUSE_ADDR:-localhost:9000}" ADDR="${CLICKHOUSE_ADDR:-localhost:8123}"
DB="${CLICKHOUSE_DB:-cdp}" DB="${CLICKHOUSE_DB:-cdp}"
USER="${CLICKHOUSE_USER:-default}" USER="${CLICKHOUSE_USER:-default}"
PASS="${CLICKHOUSE_PASSWORD:-}" PASS="${CLICKHOUSE_PASSWORD:-}"
SECURE="${CLICKHOUSE_SECURE:-0}"
MODE="${1:-up}" MODE="${1:-up}"
host="${ADDR%%:*}" host="${ADDR%%:*}"
port="${ADDR##*:}" port="${ADDR##*:}"
run_sql() { case "$SECURE" in
1|true|TRUE|yes) scheme="https" ;;
*) [[ "$port" == "8443" ]] && scheme="https" || scheme="http" ;;
esac
URL_BASE="${scheme}://${host}:${port}/"
# post_sql posts a SQL string to ClickHouse. The query goes in the request
# body; URL params can carry session options like ?database=foo.
post_sql() {
local sql="$1"
local extra_param="${2:-}"
local url="$URL_BASE"
[[ -n "$extra_param" ]] && url="${URL_BASE}?${extra_param}"
printf '%s' "$sql" | curl -sS --fail-with-body \
-u "${USER}:${PASS}" \
--data-binary @- \
"$url"
echo
}
post_sql_file() {
# ClickHouse HTTP rejects multi-statements; split the file by `;` and post
# each statement individually. Our DDL files use `--` line comments and
# do not contain literal `;` inside strings, so a naive RS split is safe.
local file="$1" local file="$1"
echo ">>> applying $(basename "$file")" local extra_param="${2:-}"
if [[ -n "$PASS" ]]; then local url="$URL_BASE"
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \ [[ -n "$extra_param" ]] && url="${URL_BASE}?${extra_param}"
--database "$DB" --multiquery --queries-file "$file"
else local tmpdir
clickhouse-client --host "$host" --port "$port" --user "$USER" \ tmpdir=$(mktemp -d)
--database "$DB" --multiquery --queries-file "$file"
awk -v dir="$tmpdir" '
BEGIN { RS=";" ; count=0 }
{
stmt=$0
gsub(/^[[:space:]\n\r]+|[[:space:]\n\r]+$/, "", stmt)
if (stmt == "") next
count++
out=sprintf("%s/%04d.sql", dir, count)
print stmt > out
}
' "$file"
local rc=0
local s
for s in "$tmpdir"/*.sql; do
[[ -f "$s" ]] || continue
if ! curl -sS --fail-with-body \
-u "${USER}:${PASS}" \
--data-binary "@${s}" \
"$url"; then
rc=1
break
fi fi
echo
done
rm -rf "$tmpdir"
return "$rc"
} }
ensure_db() { ensure_db() {
if [[ -n "$PASS" ]]; then post_sql "CREATE DATABASE IF NOT EXISTS \`${DB}\`"
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \ }
--query "CREATE DATABASE IF NOT EXISTS $DB"
else run_sql_file() {
clickhouse-client --host "$host" --port "$port" --user "$USER" \ local file="$1"
--query "CREATE DATABASE IF NOT EXISTS $DB" echo ">>> applying $(basename "$file")"
fi post_sql_file "$file" "database=${DB}"
} }
case "$MODE" in case "$MODE" in
up) up)
ensure_db ensure_db
for f in $(ls "$DIR"/*.up.sql 2>/dev/null | sort); do for f in $(ls "$DIR"/*.up.sql 2>/dev/null | sort); do
run_sql "$f" run_sql_file "$f"
done done
;; ;;
down) down)
for f in $(ls "$DIR"/*.down.sql 2>/dev/null | sort -r); do for f in $(ls "$DIR"/*.down.sql 2>/dev/null | sort -r); do
run_sql "$f" run_sql_file "$f"
done done
;; ;;
*) *)

View File

@@ -1,13 +1,16 @@
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Shared infrastructure # Shared infrastructure (remote)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
POSTGRES_DSN=postgres://cdp:cdp@localhost:5432/cdp?sslmode=disable POSTGRES_DSN=postgres://renolation:renolation@103.188.82.191:5432/ingestion?sslmode=disable
REDIS_ADDR=localhost:6379 REDIS_ADDR=localhost:6379
KAFKA_BROKERS=localhost:9092 KAFKA_BROKERS=localhost:19092
CLICKHOUSE_ADDR=localhost:9000
CLICKHOUSE_DB=cdp # ClickHouse (internal, plain HTTP)
CLICKHOUSE_USER=default CLICKHOUSE_ADDR=192.168.1.60:9000
CLICKHOUSE_PASSWORD= CLICKHOUSE_DB=analytics
CLICKHOUSE_USER=renolation
CLICKHOUSE_PASSWORD=renolation
CLICKHOUSE_SECURE=false
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Ingest service # Ingest service
@@ -18,10 +21,10 @@ INGEST_PAYLOAD_LIMIT_KB=100
INGEST_BATCH_LIMIT_KB=4000 INGEST_BATCH_LIMIT_KB=4000
INGEST_LATE_EVENT_HOURS=24 INGEST_LATE_EVENT_HOURS=24
INGEST_DEDUP_TTL_HOURS=24 INGEST_DEDUP_TTL_HOURS=24
INGEST_WRITE_KEY_CACHE_TTL_SECONDS=45 INGEST_WRITE_KEY_CACHE_TTL_SECONDS=45s
INGEST_LOG_PAYLOAD_ON_SUCCESS=false INGEST_LOG_PAYLOAD_ON_SUCCESS=false
INGEST_LOG_PAYLOAD_ON_ERROR=true INGEST_LOG_PAYLOAD_ON_ERROR=true
INGEST_SHUTDOWN_TIMEOUT_SECONDS=30 INGEST_SHUTDOWN_TIMEOUT_SECONDS=30s
# Kafka topics # Kafka topics
KAFKA_TOPIC_INGEST=events.ingest KAFKA_TOPIC_INGEST=events.ingest
@@ -35,8 +38,8 @@ BULKER_HTTP_ADDR=:3042
BULKER_LOG_LEVEL=info BULKER_LOG_LEVEL=info
BULKER_KAFKA_GROUP=bulker BULKER_KAFKA_GROUP=bulker
BULKER_BATCH_SIZE=1000 BULKER_BATCH_SIZE=1000
BULKER_BATCH_INTERVAL_SECONDS=5 BULKER_BATCH_INTERVAL_SECONDS=5s
BULKER_SHUTDOWN_TIMEOUT_SECONDS=60 BULKER_SHUTDOWN_TIMEOUT_SECONDS=60s
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Rotor (Node.js) # Rotor (Node.js)

View File

@@ -9,12 +9,54 @@
# Configuration # Configuration
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
POSTGRES_DSN ?= postgres://cdp:cdp@localhost:5432/cdp?sslmode=disable # Auto-load .env if present, then export every variable so child processes
CLICKHOUSE_DSN ?= clickhouse://default:@localhost:9000/cdp # (go run, docker run, the apply script) inherit them. GNU Make 3.81 (macOS
MIGRATE_BIN ?= migrate # default) ignores bare `export`, so we list the keys explicitly below after
# all variables have been declared.
ifneq (,$(wildcard ./.env))
include .env
endif
POSTGRES_DSN ?= postgres://renolation:renolation@103.188.82.191:5432/ingestion?sslmode=disable
# For dockerized `migrate`: same DSN works because the target is reachable from
# the container too (it's a public host, not host.docker.internal).
POSTGRES_DSN_DOCKER ?= $(POSTGRES_DSN)
CLICKHOUSE_ADDR ?= 192.168.1.60:8123
CLICKHOUSE_DB ?= analytics
CLICKHOUSE_USER ?= renolation
CLICKHOUSE_PASSWORD ?= renolation
CLICKHOUSE_SECURE ?= 0
MIGRATE_IMAGE ?= migrate/migrate:v4.17.1
MIGRATIONS_DIR := infra/migrations MIGRATIONS_DIR := infra/migrations
CLICKHOUSE_DIR := infra/clickhouse CLICKHOUSE_DIR := infra/clickhouse
# Re-export everything that the Go services and the apply script read.
# Listed explicitly so this works on GNU Make 3.81 (macOS default).
export POSTGRES_DSN REDIS_ADDR KAFKA_BROKERS
export CLICKHOUSE_ADDR CLICKHOUSE_DB CLICKHOUSE_USER CLICKHOUSE_PASSWORD CLICKHOUSE_SECURE
export INGEST_HTTP_ADDR INGEST_LOG_LEVEL INGEST_PAYLOAD_LIMIT_KB INGEST_BATCH_LIMIT_KB
export INGEST_LATE_EVENT_HOURS INGEST_DEDUP_TTL_HOURS INGEST_WRITE_KEY_CACHE_TTL_SECONDS
export INGEST_LOG_PAYLOAD_ON_SUCCESS INGEST_LOG_PAYLOAD_ON_ERROR INGEST_SHUTDOWN_TIMEOUT_SECONDS
export KAFKA_TOPIC_INGEST KAFKA_TOPIC_DLQ KAFKA_TOPIC_RETRY
export BULKER_HTTP_ADDR BULKER_LOG_LEVEL BULKER_KAFKA_GROUP BULKER_BATCH_SIZE
export BULKER_BATCH_INTERVAL_SECONDS BULKER_SHUTDOWN_TIMEOUT_SECONDS
export ROTOR_PORT ROTOR_LOG_LEVEL ROTOR_ISOLATE_MEMORY_MB ROTOR_FUNCTION_TIMEOUT_MS
# `migrate` CLI: prefer the locally-installed binary if it exists, otherwise
# run the official Docker image. Set MIGRATE_BIN to override.
MIGRATE_BIN ?= $(shell command -v migrate 2>/dev/null)
ifeq ($(MIGRATE_BIN),)
MIGRATE = docker run --rm \
-v $(CURDIR)/$(MIGRATIONS_DIR):/migrations \
$(MIGRATE_IMAGE) -path=/migrations -database "$(POSTGRES_DSN_DOCKER)"
MIGRATE_CREATE = docker run --rm \
-v $(CURDIR)/$(MIGRATIONS_DIR):/migrations \
$(MIGRATE_IMAGE) create -ext sql -dir /migrations -seq
else
MIGRATE = $(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)"
MIGRATE_CREATE = $(MIGRATE_BIN) create -ext sql -dir $(MIGRATIONS_DIR) -seq
endif
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Help # Help
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -61,16 +103,16 @@ logs:
migrate/new: migrate/new:
@if [ -z "$(name)" ]; then echo "usage: make migrate/new name=add_xxx"; exit 1; fi @if [ -z "$(name)" ]; then echo "usage: make migrate/new name=add_xxx"; exit 1; fi
$(MIGRATE_BIN) create -ext sql -dir $(MIGRATIONS_DIR) -seq $(name) $(MIGRATE_CREATE) $(name)
migrate/up: migrate/up:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" up $(MIGRATE) up
migrate/down: migrate/down:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" down 1 $(MIGRATE) down 1
migrate/status: migrate/status:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" version $(MIGRATE) version
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# ClickHouse DDL # ClickHouse DDL
@@ -90,12 +132,22 @@ run/ingest:
cd ingest && go run ./cmd/server cd ingest && go run ./cmd/server
run/bulker: run/bulker:
@echo ">>> CLICKHOUSE_ADDR=$$CLICKHOUSE_ADDR CLICKHOUSE_SECURE=$$CLICKHOUSE_SECURE"
cd bulker && go run ./cmd/server cd bulker && go run ./cmd/server
debug/env:
@echo "CLICKHOUSE_ADDR=$$CLICKHOUSE_ADDR"
@echo "CLICKHOUSE_SECURE=$$CLICKHOUSE_SECURE"
@echo "POSTGRES_DSN=$$POSTGRES_DSN"
@echo "-- child env --"
@env | grep -E '^(CLICKHOUSE|POSTGRES|KAFKA|INGEST|BULKER)' | sort
run/rotor: run/rotor:
@cd rotor && [ -d node_modules ] || npm install
cd rotor && npm run dev cd rotor && npm run dev
run/console: run/console:
@cd console && [ -d node_modules ] || npm install
cd console && npm run dev cd console && npm run dev
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View File

@@ -44,7 +44,7 @@ func run() error {
defer cancel() defer cancel()
// ---- ClickHouse ------------------------------------------------------- // ---- ClickHouse -------------------------------------------------------
ch, err := writer.New(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseUser, cfg.ClickHousePassword) ch, err := writer.New(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseUser, cfg.ClickHousePassword, cfg.ClickHouseSecure)
if err != nil { if err != nil {
return err return err
} }

View File

@@ -10,3 +10,25 @@ require (
github.com/twmb/franz-go v1.17.1 github.com/twmb/franz-go v1.17.1
go.uber.org/zap v1.27.0 go.uber.org/zap v1.27.0
) )
require (
github.com/ClickHouse/ch-go v0.61.5 // indirect
github.com/andybalholm/brotli v1.1.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-faster/city v1.0.1 // indirect
github.com/go-faster/errors v0.7.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/compress v1.17.8 // indirect
github.com/paulmach/orb v0.11.1 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/segmentio/asm v1.2.0 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/twmb/franz-go/pkg/kmsg v1.8.0 // indirect
go.opentelemetry.io/otel v1.26.0 // indirect
go.opentelemetry.io/otel/trace v1.26.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/sys v0.26.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

129
ingestion/bulker/go.sum Normal file
View File

@@ -0,0 +1,129 @@
github.com/ClickHouse/ch-go v0.61.5 h1:zwR8QbYI0tsMiEcze/uIMK+Tz1D3XZXLdNrlaOpeEI4=
github.com/ClickHouse/ch-go v0.61.5/go.mod h1:s1LJW/F/LcFs5HJnuogFMta50kKDO0lf9zzfrbl0RQg=
github.com/ClickHouse/clickhouse-go/v2 v2.30.0 h1:AG4D/hW39qa58+JHQIFOSnxyL46H6h2lrmGGk17dhFo=
github.com/ClickHouse/clickhouse-go/v2 v2.30.0/go.mod h1:i9ZQAojcayW3RsdCb3YR+n+wC2h65eJsZCscZ1Z1wyo=
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/caarlos0/env/v11 v11.2.2 h1:95fApNrUyueipoZN/EhA8mMxiNxrBwDa+oAZrMWl3Kg=
github.com/caarlos0/env/v11 v11.2.2/go.mod h1:JBfcdeQiBoI3Zh1QRAWfe+tpiNTmDtcCj/hHHHMx0vc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-chi/chi/v5 v5.1.0 h1:acVI1TYaD+hhedDJ3r54HyA6sExp3HfXq7QWEEY/xMw=
github.com/go-chi/chi/v5 v5.1.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw=
github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw=
github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg=
github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU=
github.com/paulmach/orb v0.11.1/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU=
github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/twmb/franz-go v1.17.1 h1:0LwPsbbJeJ9R91DPUHSEd4su82WJWcTY1Zzbgbg4CeQ=
github.com/twmb/franz-go v1.17.1/go.mod h1:NreRdJ2F7dziDY/m6VyspWd6sNxHKXdMZI42UfQ3GXM=
github.com/twmb/franz-go/pkg/kmsg v1.8.0 h1:lAQB9Z3aMrIP9qF9288XcFf/ccaSxEitNA1CDTEIeTA=
github.com/twmb/franz-go/pkg/kmsg v1.8.0/go.mod h1:HzYEb8G3uu5XevZbtU0dVbkphaKTHk0X68N5ka4q6mU=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g=
github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g=
go.opentelemetry.io/otel v1.26.0 h1:LQwgL5s/1W7YiiRwxf03QGnWLb2HW4pLiAhaA5cZXBs=
go.opentelemetry.io/otel v1.26.0/go.mod h1:UmLkJHUAidDval2EICqBMbnAd0/m2vmpf/dAM+fvFs4=
go.opentelemetry.io/otel/trace v1.26.0 h1:1ieeAUb4y0TE26jUFrCIXKpTuVK7uJGN9/Z/2LP5sQA=
go.opentelemetry.io/otel/trace v1.26.0/go.mod h1:4iDxvGDQuUkHve82hJJ8UqrwswHYsZuWCBllGV2U2y0=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw=
golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -24,6 +24,7 @@ type Config struct {
ClickHouseDB string `env:"CLICKHOUSE_DB" envDefault:"cdp"` ClickHouseDB string `env:"CLICKHOUSE_DB" envDefault:"cdp"`
ClickHouseUser string `env:"CLICKHOUSE_USER" envDefault:"default"` ClickHouseUser string `env:"CLICKHOUSE_USER" envDefault:"default"`
ClickHousePassword string `env:"CLICKHOUSE_PASSWORD" envDefault:""` ClickHousePassword string `env:"CLICKHOUSE_PASSWORD" envDefault:""`
ClickHouseSecure bool `env:"CLICKHOUSE_SECURE" envDefault:"false"`
} }
func Load() (*Config, error) { func Load() (*Config, error) {

View File

@@ -8,6 +8,7 @@ package writer
import ( import (
"context" "context"
"crypto/tls"
"encoding/json" "encoding/json"
"fmt" "fmt"
"strconv" "strconv"
@@ -23,9 +24,13 @@ type ClickHouse struct {
db string db string
} }
func New(ctx context.Context, addr, db, user, password string) (*ClickHouse, error) { // New opens a ClickHouse connection. `secure` enables TLS. The wire protocol
conn, err := clickhouse.Open(&clickhouse.Options{ // is auto-selected from the port: 8123/8443 (HTTP interface) use HTTP, the
// native default otherwise.
func New(ctx context.Context, addr, db, user, password string, secure bool) (*ClickHouse, error) {
opts := &clickhouse.Options{
Addr: []string{addr}, Addr: []string{addr},
Protocol: protocolFromAddr(addr),
Auth: clickhouse.Auth{ Auth: clickhouse.Auth{
Database: db, Database: db,
Username: user, Username: user,
@@ -35,7 +40,11 @@ func New(ctx context.Context, addr, db, user, password string) (*ClickHouse, err
"async_insert": 0, "async_insert": 0,
"wait_for_async_insert": 0, "wait_for_async_insert": 0,
}, },
}) }
if secure {
opts.TLS = &tls.Config{MinVersion: tls.VersionTLS12}
}
conn, err := clickhouse.Open(opts)
if err != nil { if err != nil {
return nil, fmt.Errorf("clickhouse open: %w", err) return nil, fmt.Errorf("clickhouse open: %w", err)
} }
@@ -45,6 +54,26 @@ func New(ctx context.Context, addr, db, user, password string) (*ClickHouse, err
return &ClickHouse{conn: conn, db: db}, nil return &ClickHouse{conn: conn, db: db}, nil
} }
func protocolFromAddr(addr string) clickhouse.Protocol {
// 8443 = HTTPS interface, 8123 = HTTP interface (both speak the HTTP wire).
// Everything else (9000/9440/...) speaks the native protocol.
switch port := portOf(addr); port {
case "8123", "8443":
return clickhouse.HTTP
default:
return clickhouse.Native
}
}
func portOf(addr string) string {
for i := len(addr) - 1; i >= 0; i-- {
if addr[i] == ':' {
return addr[i+1:]
}
}
return ""
}
func (c *ClickHouse) Close() error { return c.conn.Close() } func (c *ClickHouse) Close() error { return c.conn.Close() }
// WriteEvents fans out a mixed-type batch into the per-type tables. // WriteEvents fans out a mixed-type batch into the per-type tables.

5197
ingestion/console/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,57 +5,113 @@
# clickhouse_apply.sh up apply *.up.sql in infra/clickhouse/ # clickhouse_apply.sh up apply *.up.sql in infra/clickhouse/
# clickhouse_apply.sh down apply *.down.sql in REVERSE order # clickhouse_apply.sh down apply *.down.sql in REVERSE order
# #
# Talks to the HTTP(S) interface via curl -- no clickhouse-client binary needed.
# The SQL is POSTed as the raw body; ClickHouse parses the body when the URL
# has no ?query= parameter.
#
# Env: # Env:
# CLICKHOUSE_ADDR (default localhost:9000) # CLICKHOUSE_ADDR (default localhost:8123)
# CLICKHOUSE_DB (default cdp) # CLICKHOUSE_DB (default cdp)
# CLICKHOUSE_USER (default default) # CLICKHOUSE_USER (default default)
# CLICKHOUSE_PASSWORD (default empty) # CLICKHOUSE_PASSWORD (default empty)
# CLICKHOUSE_SECURE (default 0; auto-on for port 8443)
set -euo pipefail set -euo pipefail
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/clickhouse" DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/clickhouse"
ADDR="${CLICKHOUSE_ADDR:-localhost:9000}" ADDR="${CLICKHOUSE_ADDR:-localhost:8123}"
DB="${CLICKHOUSE_DB:-cdp}" DB="${CLICKHOUSE_DB:-cdp}"
USER="${CLICKHOUSE_USER:-default}" USER="${CLICKHOUSE_USER:-default}"
PASS="${CLICKHOUSE_PASSWORD:-}" PASS="${CLICKHOUSE_PASSWORD:-}"
SECURE="${CLICKHOUSE_SECURE:-0}"
MODE="${1:-up}" MODE="${1:-up}"
host="${ADDR%%:*}" host="${ADDR%%:*}"
port="${ADDR##*:}" port="${ADDR##*:}"
run_sql() { case "$SECURE" in
1|true|TRUE|yes) scheme="https" ;;
*) [[ "$port" == "8443" ]] && scheme="https" || scheme="http" ;;
esac
URL_BASE="${scheme}://${host}:${port}/"
# post_sql posts a SQL string to ClickHouse. The query goes in the request
# body; URL params can carry session options like ?database=foo.
post_sql() {
local sql="$1"
local extra_param="${2:-}"
local url="$URL_BASE"
[[ -n "$extra_param" ]] && url="${URL_BASE}?${extra_param}"
printf '%s' "$sql" | curl -sS --fail-with-body \
-u "${USER}:${PASS}" \
--data-binary @- \
"$url"
echo
}
post_sql_file() {
# ClickHouse HTTP rejects multi-statements; split the file by `;` and post
# each statement individually. Our DDL files use `--` line comments and
# do not contain literal `;` inside strings, so a naive RS split is safe.
local file="$1" local file="$1"
echo ">>> applying $(basename "$file")" local extra_param="${2:-}"
if [[ -n "$PASS" ]]; then local url="$URL_BASE"
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \ [[ -n "$extra_param" ]] && url="${URL_BASE}?${extra_param}"
--database "$DB" --multiquery --queries-file "$file"
else local tmpdir
clickhouse-client --host "$host" --port "$port" --user "$USER" \ tmpdir=$(mktemp -d)
--database "$DB" --multiquery --queries-file "$file"
awk -v dir="$tmpdir" '
BEGIN { RS=";" ; count=0 }
{
stmt=$0
gsub(/^[[:space:]\n\r]+|[[:space:]\n\r]+$/, "", stmt)
if (stmt == "") next
count++
out=sprintf("%s/%04d.sql", dir, count)
print stmt > out
}
' "$file"
local rc=0
local s
for s in "$tmpdir"/*.sql; do
[[ -f "$s" ]] || continue
if ! curl -sS --fail-with-body \
-u "${USER}:${PASS}" \
--data-binary "@${s}" \
"$url"; then
rc=1
break
fi fi
echo
done
rm -rf "$tmpdir"
return "$rc"
} }
ensure_db() { ensure_db() {
if [[ -n "$PASS" ]]; then post_sql "CREATE DATABASE IF NOT EXISTS \`${DB}\`"
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \ }
--query "CREATE DATABASE IF NOT EXISTS $DB"
else run_sql_file() {
clickhouse-client --host "$host" --port "$port" --user "$USER" \ local file="$1"
--query "CREATE DATABASE IF NOT EXISTS $DB" echo ">>> applying $(basename "$file")"
fi post_sql_file "$file" "database=${DB}"
} }
case "$MODE" in case "$MODE" in
up) up)
ensure_db ensure_db
for f in $(ls "$DIR"/*.up.sql 2>/dev/null | sort); do for f in $(ls "$DIR"/*.up.sql 2>/dev/null | sort); do
run_sql "$f" run_sql_file "$f"
done done
;; ;;
down) down)
for f in $(ls "$DIR"/*.down.sql 2>/dev/null | sort -r); do for f in $(ls "$DIR"/*.down.sql 2>/dev/null | sort -r); do
run_sql "$f" run_sql_file "$f"
done done
;; ;;
*) *)

View File

@@ -1,6 +1,6 @@
module github.com/dbiz/cdp/ingestion/ingest module github.com/dbiz/cdp/ingestion/ingest
go 1.22 go 1.23
require ( require (
github.com/caarlos0/env/v11 v11.2.2 github.com/caarlos0/env/v11 v11.2.2
@@ -13,3 +13,26 @@ require (
github.com/twmb/franz-go v1.17.1 github.com/twmb/franz-go v1.17.1
go.uber.org/zap v1.27.0 go.uber.org/zap v1.27.0
) )
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
github.com/klauspost/compress v1.17.8 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/twmb/franz-go/pkg/kmsg v1.8.0 // indirect
go.uber.org/multierr v1.10.0 // indirect
golang.org/x/crypto v0.25.0 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.16.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

80
ingestion/ingest/go.sum Normal file
View File

@@ -0,0 +1,80 @@
github.com/caarlos0/env/v11 v11.2.2 h1:95fApNrUyueipoZN/EhA8mMxiNxrBwDa+oAZrMWl3Kg=
github.com/caarlos0/env/v11 v11.2.2/go.mod h1:JBfcdeQiBoI3Zh1QRAWfe+tpiNTmDtcCj/hHHHMx0vc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/go-chi/chi/v5 v5.1.0 h1:acVI1TYaD+hhedDJ3r54HyA6sExp3HfXq7QWEEY/xMw=
github.com/go-chi/chi/v5 v5.1.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.22.1 h1:40JcKH+bBNGFczGuoBYgX4I6m/i27HYW8P9FDk5PbgA=
github.com/go-playground/validator/v10 v10.22.1/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY=
github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/redis/rueidis v1.0.45 h1:j7hfcqfLLIqgTK3IkxBhXdeJcP34t3XLXvorDLqXfgM=
github.com/redis/rueidis v1.0.45/go.mod h1:by+34b0cFXndxtYmPAHpoTHO5NkosDlBvhexoTURIxM=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/twmb/franz-go v1.17.1 h1:0LwPsbbJeJ9R91DPUHSEd4su82WJWcTY1Zzbgbg4CeQ=
github.com/twmb/franz-go v1.17.1/go.mod h1:NreRdJ2F7dziDY/m6VyspWd6sNxHKXdMZI42UfQ3GXM=
github.com/twmb/franz-go/pkg/kmsg v1.8.0 h1:lAQB9Z3aMrIP9qF9288XcFf/ccaSxEitNA1CDTEIeTA=
github.com/twmb/franz-go/pkg/kmsg v1.8.0/go.mod h1:HzYEb8G3uu5XevZbtU0dVbkphaKTHk0X68N5ka4q6mU=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ=
go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30=
golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys=
golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -39,12 +39,14 @@ func (d *redisDedup) CheckAndSet(ctx context.Context, workspaceID, messageID str
Ex(d.ttl). Ex(d.ttl).
Build() Build()
resp := d.client.Do(ctx, cmd) resp := d.client.Do(ctx, cmd)
if err := resp.Error(); err != nil { err := resp.Error()
return false, fmt.Errorf("dedup setnx: %w", err) // SET NX returns nil reply when the key already exists; rueidis surfaces
} // that as a "redis nil" error, which is *not* a real failure.
// SET with NX returns "OK" when set, nil reply when key already exists. if rueidis.IsRedisNil(err) {
if resp.IsNil() {
return false, nil return false, nil
} }
if err != nil {
return false, fmt.Errorf("dedup setnx: %w", err)
}
return true, nil return true, nil
} }

View File

@@ -33,7 +33,8 @@ func NewProducer(brokers []string, topicIngest, topicDLQ, topicRetry string, log
kgo.ProducerLinger(5_000_000), // 5ms linger -> batch small bursts kgo.ProducerLinger(5_000_000), // 5ms linger -> batch small bursts
kgo.ProducerBatchCompression(kgo.ZstdCompression()), kgo.ProducerBatchCompression(kgo.ZstdCompression()),
kgo.MaxBufferedRecords(100_000), kgo.MaxBufferedRecords(100_000),
kgo.RequiredAcks(kgo.LeaderAck()), // franz-go enables idempotent writes by default, which requires acks=all.
kgo.RequiredAcks(kgo.AllISRAcks()),
kgo.ClientID("cdp-ingest"), kgo.ClientID("cdp-ingest"),
) )
if err != nil { if err != nil {
@@ -57,6 +58,12 @@ func (p *Producer) Close() {
} }
// Produce sends an event to the happy-path topic. Fire-and-forget. // Produce sends an event to the happy-path topic. Fire-and-forget.
//
// We detach the request's cancellation from the produce call: the HTTP
// handler returns 200 as soon as the record is buffered, after which the
// request context is cancelled. franz-go honours that cancellation and
// drops the buffered record. context.WithoutCancel preserves values for
// tracing but removes the deadline / Done signal.
func (p *Producer) Produce(ctx context.Context, ev *model.IngestedEvent) error { func (p *Producer) Produce(ctx context.Context, ev *model.IngestedEvent) error {
payload, err := json.Marshal(ev) payload, err := json.Marshal(ev)
if err != nil { if err != nil {
@@ -72,7 +79,7 @@ func (p *Producer) Produce(ctx context.Context, ev *model.IngestedEvent) error {
{Key: "type", Value: []byte(ev.Type)}, {Key: "type", Value: []byte(ev.Type)},
}, },
} }
p.client.Produce(ctx, rec, func(r *kgo.Record, err error) { p.client.Produce(context.WithoutCancel(ctx), rec, func(r *kgo.Record, err error) {
if err != nil { if err != nil {
p.log.Error("kafka produce failed", p.log.Error("kafka produce failed",
zap.String("topic", r.Topic), zap.String("topic", r.Topic),

112
ingestion/sdk/web/README.md Normal file
View File

@@ -0,0 +1,112 @@
# CDP Web SDK
Single-file TypeScript tracker for browsers. No build step, no dependencies.
## Install
Copy [`cdp.ts`](./cdp.ts) into your app. A common spot for Next.js:
```
your-app/
└── lib/
└── cdp.ts ← paste here
```
## Init (Next.js App Router)
`app/layout.tsx`:
```tsx
'use client';
import { useEffect } from 'react';
import { cdp } from '@/lib/cdp';
export default function RootLayout({ children }: { children: React.ReactNode }) {
useEffect(() => {
cdp.init({
writeKey: process.env.NEXT_PUBLIC_CDP_WRITE_KEY!,
endpoint: process.env.NEXT_PUBLIC_CDP_ENDPOINT ?? 'http://localhost:3049',
autoPage: true, // fire `page` on every SPA route change
});
}, []);
return <html><body>{children}</body></html>;
}
```
`.env.local`:
```
NEXT_PUBLIC_CDP_WRITE_KEY=cdp_dev_writekey_1234567890
NEXT_PUBLIC_CDP_ENDPOINT=http://localhost:3049
```
(The dev key above is the one seeded by `infra/migrations/000002_seed_dev.up.sql`.)
## Use
```tsx
import { cdp } from '@/lib/cdp';
// On login
cdp.identify(user.id, { email: user.email, plan: user.plan });
// On a meaningful action
cdp.track('Checkout Completed', { revenue: 199, currency: 'USD' });
// Manual page call (skip if autoPage is on)
cdp.page('Pricing');
// On logout
cdp.reset();
```
## Vite / Create React App
Identical — `cdp.init(...)` in your root component / `main.tsx`.
## What gets sent
Every call POSTs to `${endpoint}/v1/<type>` with this envelope:
```json
{
"type": "track",
"messageId": "uuid-v4",
"anonymousId": "uuid-v4 from localStorage",
"userId": "from identify()",
"sentAt": "2026-05-25T03:14:15Z",
"context": { "library_name": "cdp-web", "user_agent": "..." },
"event": "Checkout Completed",
"properties": { "revenue": 199, "currency": "USD" }
}
```
Header: `Authorization: Basic base64(<writeKey>:)`.
Payload is Segment-compatible: if you ever swap endpoints to Segment the same
code works.
## Things to know
- `anonymousId` is generated once and persisted in `localStorage` under
`cdp_anon`. It survives across sessions.
- `userId` is persisted in `cdp_uid` until you call `cdp.reset()`.
- `fetch` uses `keepalive: true` so events fire even when the page is
unloading. No `sendBeacon` because we need the `Authorization` header.
- For SSR (Next.js Server Components, Remix loaders) skip the SDK — fire
events from your API route or a server-side function instead.
## CORS
The ingest service serves `Access-Control-Allow-Origin: *` so any origin
works in dev. Lock this down for production (configure a reverse proxy or
patch `internal/middleware/middleware.go`).
## Production checklist
- [ ] Issue a per-workspace write key from the console (don't ship the dev key)
- [ ] Restrict CORS to known origins
- [ ] Front the ingest service with HTTPS (browser refuses mixed content)
- [ ] Set `NEXT_PUBLIC_CDP_ENDPOINT` to the public URL

225
ingestion/sdk/web/cdp.ts Normal file
View File

@@ -0,0 +1,225 @@
// Lightweight CDP tracker for browsers (React / Next.js / vanilla).
// Zero dependencies. Copy this file into your app and import the `cdp` object.
//
// import { cdp } from '@/lib/cdp';
//
// // Once, at app startup (e.g. layout.tsx / _app.tsx):
// cdp.init({
// writeKey: process.env.NEXT_PUBLIC_CDP_WRITE_KEY!,
// endpoint: process.env.NEXT_PUBLIC_CDP_ENDPOINT ?? 'http://localhost:3049',
// });
//
// cdp.identify('user_42', { plan: 'pro' });
// cdp.track('Button Clicked', { id: 'cta-hero' });
// cdp.page(); // pulls path/url from window.location
//
// Payload shape matches Segment's Track/Identify/Page API, so the same code
// works with Segment if you ever swap endpoints.
type Json = Record<string, unknown>;
type EventType = 'track' | 'identify' | 'page' | 'group' | 'alias' | 'screen';
interface CDPConfig {
writeKey: string;
/** Base URL of the ingest service. e.g. http://localhost:3049 */
endpoint: string;
/** localStorage key for the anonymous id. Default: "cdp_anon". */
anonymousIdKey?: string;
/** localStorage key for the resolved user id. Default: "cdp_uid". */
userIdKey?: string;
/** Auto-fire `page` on every history change. Default: false. */
autoPage?: boolean;
}
interface CommonPayload {
type: EventType;
messageId: string;
anonymousId: string;
userId?: string;
sentAt: string;
context: Json;
}
const DEFAULTS = {
anonymousIdKey: 'cdp_anon',
userIdKey: 'cdp_uid',
autoPage: false,
};
class CDPClient {
private cfg: Required<CDPConfig> | null = null;
private authHeader = '';
init(config: CDPConfig) {
this.cfg = { ...DEFAULTS, ...config };
// Segment-style basic auth: base64(writeKey + ":").
this.authHeader = 'Basic ' + b64(`${config.writeKey}:`);
this.ensureAnonymousId();
if (this.cfg.autoPage && typeof window !== 'undefined') {
this.installAutoPage();
}
}
identify(userId: string, traits: Json = {}) {
this.setUserId(userId);
return this.send('identify', { userId, traits });
}
track(event: string, properties: Json = {}) {
return this.send('track', { event, properties });
}
page(name?: string, properties: Json = {}) {
const loc = typeof window !== 'undefined' ? window.location : null;
const merged: Json = {
...(loc ? { url: loc.href, path: loc.pathname, referrer: document.referrer } : {}),
...properties,
};
return this.send('page', { name, properties: merged });
}
group(groupId: string, traits: Json = {}) {
return this.send('group', { groupId, traits });
}
alias(userId: string, previousId: string) {
return this.send('alias', { userId, previousId });
}
/** Forget the user id (e.g. on logout). Anonymous id is preserved. */
reset() {
if (typeof localStorage === 'undefined' || !this.cfg) return;
localStorage.removeItem(this.cfg.userIdKey);
}
// ---------------------------------------------------------------------------
// internals
// ---------------------------------------------------------------------------
private send(type: EventType, body: Json) {
if (!this.cfg) {
console.warn('[cdp] send() before init(); call cdp.init() at startup');
return Promise.resolve();
}
const payload: CommonPayload & Json = {
type,
messageId: uuidv4(),
anonymousId: this.getAnonymousId(),
userId: this.getUserId() ?? undefined,
sentAt: new Date().toISOString(),
context: this.buildContext(),
...body,
};
const url = `${this.cfg.endpoint}/v1/${type}`;
const blob = JSON.stringify(payload);
// Prefer sendBeacon for unload safety (page-close, route-change).
if (typeof navigator !== 'undefined' && 'sendBeacon' in navigator) {
// sendBeacon can't set Authorization; fall back to fetch with keepalive
// when an auth header is required. We always use fetch.
}
return fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: this.authHeader,
},
body: blob,
keepalive: true,
}).catch((err) => {
console.warn('[cdp] send failed', err);
});
}
private buildContext(): Json {
if (typeof window === 'undefined') return { library_name: 'cdp-web', library_version: '0.1.0' };
return {
library_name: 'cdp-web',
library_version: '0.1.0',
user_agent: navigator.userAgent,
locale: navigator.language,
screen_width: window.screen?.width,
screen_height: window.screen?.height,
};
}
private ensureAnonymousId() {
if (typeof localStorage === 'undefined' || !this.cfg) return;
if (!localStorage.getItem(this.cfg.anonymousIdKey)) {
localStorage.setItem(this.cfg.anonymousIdKey, uuidv4());
}
}
private getAnonymousId(): string {
if (!this.cfg || typeof localStorage === 'undefined') return uuidv4();
let id = localStorage.getItem(this.cfg.anonymousIdKey);
if (!id) {
id = uuidv4();
localStorage.setItem(this.cfg.anonymousIdKey, id);
}
return id;
}
private setUserId(id: string) {
if (!this.cfg || typeof localStorage === 'undefined') return;
localStorage.setItem(this.cfg.userIdKey, id);
}
private getUserId(): string | null {
if (!this.cfg || typeof localStorage === 'undefined') return null;
return localStorage.getItem(this.cfg.userIdKey);
}
private installAutoPage() {
let lastPath = location.pathname;
const fire = () => {
if (location.pathname !== lastPath) {
lastPath = location.pathname;
this.page();
}
};
// Patch pushState/replaceState so SPA route changes fire `page`.
const origPush = history.pushState;
const origReplace = history.replaceState;
history.pushState = function (...args) {
const r = origPush.apply(this, args);
fire();
return r;
};
history.replaceState = function (...args) {
const r = origReplace.apply(this, args);
fire();
return r;
};
window.addEventListener('popstate', fire);
// First page load.
this.page();
}
}
// ---------------------------------------------------------------------------
// tiny helpers (no deps)
// ---------------------------------------------------------------------------
function uuidv4(): string {
// crypto.randomUUID is available in all evergreen browsers and Node 19+.
if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) {
return (crypto as Crypto).randomUUID();
}
// RFC 4122-ish fallback.
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => {
const r = (Math.random() * 16) | 0;
const v = c === 'x' ? r : (r & 0x3) | 0x8;
return v.toString(16);
});
}
function b64(input: string): string {
if (typeof btoa !== 'undefined') return btoa(input);
// Node SSR
// @ts-expect-error: Buffer exists in Node.
return Buffer.from(input, 'utf-8').toString('base64');
}
export const cdp = new CDPClient();
export type { CDPConfig };