This commit is contained in:
2026-05-25 10:16:31 +07:00
parent a428170fef
commit c5e980aa52
21 changed files with 6172 additions and 102 deletions

View File

@@ -1,12 +1,15 @@
# ---------------------------------------------------------------------------
# Shared infrastructure (matches cdp-ingestion)
# Shared infrastructure (remote)
# ---------------------------------------------------------------------------
POSTGRES_DSN=postgres://cdp:cdp@localhost:5432/cdp?sslmode=disable
POSTGRES_DSN=postgres://renolation:renolation@103.188.82.191:5432/analytics?sslmode=disable
REDIS_ADDR=localhost:6379
CLICKHOUSE_ADDR=localhost:9000
CLICKHOUSE_DB=cdp
CLICKHOUSE_USER=default
CLICKHOUSE_PASSWORD=
# ClickHouse (internal, native protocol, plain)
CLICKHOUSE_ADDR=192.168.1.60:9000
CLICKHOUSE_DB=analytics
CLICKHOUSE_USER=renolation
CLICKHOUSE_PASSWORD=renolation
CLICKHOUSE_SECURE=false
# ---------------------------------------------------------------------------
# Analytics API
@@ -19,8 +22,8 @@ ANALYTICS_CACHE_TTL_PROFILE_SECONDS=30s
ANALYTICS_CH_TEMPLATES_DIR=../infra/clickhouse
# Custom SQL ClickHouse user — must have SELECT-only grants
ANALYTICS_CH_SQL_USER=analytics_ro
ANALYTICS_CH_SQL_PASSWORD=
ANALYTICS_CH_SQL_USER=renolation
ANALYTICS_CH_SQL_PASSWORD=renolation
# ---------------------------------------------------------------------------
# Analytics Worker (river)

View File

@@ -10,12 +10,50 @@
# Configuration
# ---------------------------------------------------------------------------
POSTGRES_DSN ?= postgres://cdp:cdp@localhost:5432/cdp?sslmode=disable
CLICKHOUSE_DSN ?= clickhouse://default:@localhost:9000/cdp
MIGRATE_BIN ?= migrate
# Auto-load .env if present, then export every variable so child processes
# (go run, docker run, the apply script) inherit them.
ifneq (,$(wildcard ./.env))
include .env
export
endif
POSTGRES_DSN ?= postgres://renolation:renolation@103.188.82.191:5432/analytics?sslmode=disable
POSTGRES_DSN_DOCKER ?= $(POSTGRES_DSN)
CLICKHOUSE_ADDR ?= 192.168.1.60:9000
CLICKHOUSE_DB ?= analytics
CLICKHOUSE_USER ?= renolation
CLICKHOUSE_PASSWORD ?= renolation
CLICKHOUSE_SECURE ?= 0
# Re-export everything the Go services + scripts consume. Explicit list is
# required on GNU Make 3.81 (macOS default), where bare `export` is a no-op.
export POSTGRES_DSN REDIS_ADDR
export CLICKHOUSE_ADDR CLICKHOUSE_DB CLICKHOUSE_USER CLICKHOUSE_PASSWORD CLICKHOUSE_SECURE
export ANALYTICS_HTTP_ADDR ANALYTICS_LOG_LEVEL ANALYTICS_SHUTDOWN_TIMEOUT_SECONDS
export ANALYTICS_CACHE_TTL_QUERY_SECONDS ANALYTICS_CACHE_TTL_PROFILE_SECONDS ANALYTICS_CH_TEMPLATES_DIR
export ANALYTICS_CH_SQL_USER ANALYTICS_CH_SQL_PASSWORD
export WORKER_HTTP_ADDR WORKER_LOG_LEVEL WORKER_SHUTDOWN_TIMEOUT_SECONDS
export WORKER_MAX_WORKERS WORKER_COMPUTE_TRAITS_EVERY WORKER_REFRESH_SEGMENT_EVERY
MIGRATE_IMAGE ?= migrate/migrate:v4.17.1
MIGRATIONS_DIR := infra/migrations
CLICKHOUSE_DIR := infra/clickhouse
export CLICKHOUSE_ADDR CLICKHOUSE_DB CLICKHOUSE_USER CLICKHOUSE_PASSWORD CLICKHOUSE_SECURE
# `migrate` CLI: prefer local binary, otherwise run the official Docker image.
MIGRATE_BIN ?= $(shell command -v migrate 2>/dev/null)
ifeq ($(MIGRATE_BIN),)
MIGRATE = docker run --rm \
-v $(CURDIR)/$(MIGRATIONS_DIR):/migrations \
$(MIGRATE_IMAGE) -path=/migrations -database "$(POSTGRES_DSN_DOCKER)"
MIGRATE_CREATE = docker run --rm \
-v $(CURDIR)/$(MIGRATIONS_DIR):/migrations \
$(MIGRATE_IMAGE) create -ext sql -dir /migrations -seq
else
MIGRATE = $(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)"
MIGRATE_CREATE = $(MIGRATE_BIN) create -ext sql -dir $(MIGRATIONS_DIR) -seq
endif
# ---------------------------------------------------------------------------
# Help
# ---------------------------------------------------------------------------
@@ -47,16 +85,16 @@ help:
migrate/new:
@if [ -z "$(name)" ]; then echo "usage: make migrate/new name=add_xxx"; exit 1; fi
$(MIGRATE_BIN) create -ext sql -dir $(MIGRATIONS_DIR) -seq $(name)
$(MIGRATE_CREATE) $(name)
migrate/up:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" up
$(MIGRATE) up
migrate/down:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" down 1
$(MIGRATE) down 1
migrate/status:
$(MIGRATE_BIN) -path $(MIGRATIONS_DIR) -database "$(POSTGRES_DSN)" version
$(MIGRATE) version
# ---------------------------------------------------------------------------
# ClickHouse DDL
@@ -79,6 +117,7 @@ run/workers:
cd workers && go run ./cmd/worker
run/console:
@cd console && [ -d node_modules ] || npm install
cd console && npm run dev
# ---------------------------------------------------------------------------

View File

@@ -60,13 +60,13 @@ func run() error {
}
defer redisClient.Close()
chMain, err := repo.NewClickHouse(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseUser, cfg.ClickHousePassword)
chMain, err := repo.NewClickHouse(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseUser, cfg.ClickHousePassword, cfg.ClickHouseSecure)
if err != nil {
return err
}
defer func() { _ = chMain.Close() }()
chRO, err := repo.NewClickHouseReadOnly(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseSQLUser, cfg.ClickHouseSQLPassword)
chRO, err := repo.NewClickHouseReadOnly(ctx, cfg.ClickHouseAddr, cfg.ClickHouseDB, cfg.ClickHouseSQLUser, cfg.ClickHouseSQLPassword, cfg.ClickHouseSecure)
if err != nil {
// Read-only user might not be provisioned in dev. Log + fall back to
// the main connection so /query/sql still works locally; production

View File

@@ -36,6 +36,7 @@ type Config struct {
ClickHouseDB string `env:"CLICKHOUSE_DB" envDefault:"cdp"`
ClickHouseUser string `env:"CLICKHOUSE_USER" envDefault:"default"`
ClickHousePassword string `env:"CLICKHOUSE_PASSWORD"`
ClickHouseSecure bool `env:"CLICKHOUSE_SECURE" envDefault:"false"`
}
func Load() (*Config, error) {

View File

@@ -2,17 +2,20 @@ package repo
import (
"context"
"crypto/tls"
"fmt"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
)
// NewClickHouse opens a native-protocol ClickHouse connection. The returned
// driver.Conn is safe for concurrent use. Caller owns Close().
func NewClickHouse(ctx context.Context, addr, db, user, password string) (driver.Conn, error) {
conn, err := clickhouse.Open(&clickhouse.Options{
Addr: []string{addr},
// NewClickHouse opens a ClickHouse connection. `secure` enables TLS. The
// wire protocol is auto-selected from the port: 8123/8443 (HTTP interface)
// use HTTP, the native default otherwise.
func NewClickHouse(ctx context.Context, addr, db, user, password string, secure bool) (driver.Conn, error) {
opts := &clickhouse.Options{
Addr: []string{addr},
Protocol: protocolFromAddr(addr),
Auth: clickhouse.Auth{
Database: db,
Username: user,
@@ -21,7 +24,11 @@ func NewClickHouse(ctx context.Context, addr, db, user, password string) (driver
Settings: clickhouse.Settings{
"readonly": 0, // analytics queries; per-user read-only enforced for /query/sql separately
},
})
}
if secure {
opts.TLS = &tls.Config{MinVersion: tls.VersionTLS12}
}
conn, err := clickhouse.Open(opts)
if err != nil {
return nil, fmt.Errorf("open clickhouse: %w", err)
}
@@ -35,9 +42,10 @@ func NewClickHouse(ctx context.Context, addr, db, user, password string) (driver
// NewClickHouseReadOnly opens a ClickHouse connection using a SELECT-only
// account. Used to back the /query/sql sandbox: DDL/DML are rejected at the DB
// level even if the app-level keyword guard is bypassed.
func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string) (driver.Conn, error) {
conn, err := clickhouse.Open(&clickhouse.Options{
Addr: []string{addr},
func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string, secure bool) (driver.Conn, error) {
opts := &clickhouse.Options{
Addr: []string{addr},
Protocol: protocolFromAddr(addr),
Auth: clickhouse.Auth{
Database: db,
Username: user,
@@ -46,7 +54,11 @@ func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string)
Settings: clickhouse.Settings{
"readonly": 2, // belt-and-braces: server-side enforce read-only
},
})
}
if secure {
opts.TLS = &tls.Config{MinVersion: tls.VersionTLS12}
}
conn, err := clickhouse.Open(opts)
if err != nil {
return nil, fmt.Errorf("open clickhouse (ro): %w", err)
}
@@ -56,3 +68,24 @@ func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string)
}
return conn, nil
}
// protocolFromAddr selects HTTP for the well-known ClickHouse HTTP-interface
// ports (8123/8443) and Native otherwise. Lets CLICKHOUSE_ADDR target either
// kind of endpoint without an extra env var.
func protocolFromAddr(addr string) clickhouse.Protocol {
switch port := portOf(addr); port {
case "8123", "8443":
return clickhouse.HTTP
default:
return clickhouse.Native
}
}
func portOf(addr string) string {
for i := len(addr) - 1; i >= 0; i-- {
if addr[i] == ':' {
return addr[i+1:]
}
}
return ""
}

View File

@@ -5,57 +5,113 @@
# clickhouse_apply.sh up apply *.up.sql in infra/clickhouse/
# clickhouse_apply.sh down apply *.down.sql in REVERSE order
#
# Talks to the HTTP(S) interface via curl -- no clickhouse-client binary needed.
# The SQL is POSTed as the raw body; ClickHouse parses the body when the URL
# has no ?query= parameter.
#
# Env:
# CLICKHOUSE_ADDR (default localhost:9000)
# CLICKHOUSE_DB (default cdp)
# CLICKHOUSE_USER (default default)
# CLICKHOUSE_ADDR (default localhost:8123)
# CLICKHOUSE_DB (default cdp)
# CLICKHOUSE_USER (default default)
# CLICKHOUSE_PASSWORD (default empty)
# CLICKHOUSE_SECURE (default 0; auto-on for port 8443)
set -euo pipefail
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/clickhouse"
ADDR="${CLICKHOUSE_ADDR:-localhost:9000}"
ADDR="${CLICKHOUSE_ADDR:-localhost:8123}"
DB="${CLICKHOUSE_DB:-cdp}"
USER="${CLICKHOUSE_USER:-default}"
PASS="${CLICKHOUSE_PASSWORD:-}"
SECURE="${CLICKHOUSE_SECURE:-0}"
MODE="${1:-up}"
host="${ADDR%%:*}"
port="${ADDR##*:}"
run_sql() {
case "$SECURE" in
1|true|TRUE|yes) scheme="https" ;;
*) [[ "$port" == "8443" ]] && scheme="https" || scheme="http" ;;
esac
URL_BASE="${scheme}://${host}:${port}/"
# post_sql posts a SQL string to ClickHouse. The query goes in the request
# body; URL params can carry session options like ?database=foo.
post_sql() {
local sql="$1"
local extra_param="${2:-}"
local url="$URL_BASE"
[[ -n "$extra_param" ]] && url="${URL_BASE}?${extra_param}"
printf '%s' "$sql" | curl -sS --fail-with-body \
-u "${USER}:${PASS}" \
--data-binary @- \
"$url"
echo
}
post_sql_file() {
# ClickHouse HTTP rejects multi-statements; split the file by `;` and post
# each statement individually. Our DDL files use `--` line comments and
# do not contain literal `;` inside strings, so a naive RS split is safe.
local file="$1"
echo ">>> applying $(basename "$file")"
if [[ -n "$PASS" ]]; then
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \
--database "$DB" --multiquery --queries-file "$file"
else
clickhouse-client --host "$host" --port "$port" --user "$USER" \
--database "$DB" --multiquery --queries-file "$file"
fi
local extra_param="${2:-}"
local url="$URL_BASE"
[[ -n "$extra_param" ]] && url="${URL_BASE}?${extra_param}"
local tmpdir
tmpdir=$(mktemp -d)
awk -v dir="$tmpdir" '
BEGIN { RS=";" ; count=0 }
{
stmt=$0
gsub(/^[[:space:]\n\r]+|[[:space:]\n\r]+$/, "", stmt)
if (stmt == "") next
count++
out=sprintf("%s/%04d.sql", dir, count)
print stmt > out
}
' "$file"
local rc=0
local s
for s in "$tmpdir"/*.sql; do
[[ -f "$s" ]] || continue
if ! curl -sS --fail-with-body \
-u "${USER}:${PASS}" \
--data-binary "@${s}" \
"$url"; then
rc=1
break
fi
echo
done
rm -rf "$tmpdir"
return "$rc"
}
ensure_db() {
if [[ -n "$PASS" ]]; then
clickhouse-client --host "$host" --port "$port" --user "$USER" --password "$PASS" \
--query "CREATE DATABASE IF NOT EXISTS $DB"
else
clickhouse-client --host "$host" --port "$port" --user "$USER" \
--query "CREATE DATABASE IF NOT EXISTS $DB"
fi
post_sql "CREATE DATABASE IF NOT EXISTS \`${DB}\`"
}
run_sql_file() {
local file="$1"
echo ">>> applying $(basename "$file")"
post_sql_file "$file" "database=${DB}"
}
case "$MODE" in
up)
ensure_db
for f in $(ls "$DIR"/*.up.sql 2>/dev/null | sort); do
run_sql "$f"
run_sql_file "$f"
done
;;
down)
for f in $(ls "$DIR"/*.down.sql 2>/dev/null | sort -r); do
run_sql "$f"
run_sql_file "$f"
done
;;
*)