data layer

This commit is contained in:
2026-05-25 08:38:26 +07:00
parent 4e8c11d545
commit a428170fef
81 changed files with 3941 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
FROM golang:1.22-alpine AS build
WORKDIR /src
COPY go.mod go.sum* ./
RUN go mod download || true
COPY . .
RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/worker ./cmd/worker
FROM gcr.io/distroless/static-debian12:nonroot
COPY --from=build /out/worker /worker
EXPOSE 4001
USER nonroot:nonroot
ENTRYPOINT ["/worker"]

View File

@@ -0,0 +1,148 @@
// Command worker runs background jobs for the analytics service:
// computed-trait refresh, segment refresh, reverse-ETL pushes, webhook fan-out.
//
// Jobs are scheduled and dispatched via riverqueue/river backed by PostgreSQL.
// New job kinds are registered in registerWorkers below; periodic schedules
// are wired in periodicJobs.
package main
import (
"context"
"encoding/json"
"errors"
"log"
"net/http"
"os"
"os/signal"
"syscall"
"time"
"github.com/go-chi/chi/v5"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/riverqueue/river"
"github.com/riverqueue/river/riverdriver/riverpgxv5"
"go.uber.org/zap"
"github.com/dbiz/cdp/data-layer/workers/internal/config"
)
func main() {
if err := run(); err != nil {
log.Fatal(err)
}
}
func run() error {
cfg, err := config.Load()
if err != nil {
return err
}
logger, err := newLogger(cfg.LogLevel)
if err != nil {
return err
}
defer func() { _ = logger.Sync() }()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// ---- Postgres pool ----------------------------------------------------
pool, err := pgxpool.New(ctx, cfg.PostgresDSN)
if err != nil {
return err
}
defer pool.Close()
// ---- river client -----------------------------------------------------
workers := river.NewWorkers()
registerWorkers(workers, logger)
client, err := river.NewClient(riverpgxv5.New(pool), &river.Config{
Queues: map[string]river.QueueConfig{
river.QueueDefault: {MaxWorkers: cfg.MaxWorkers},
},
Workers: workers,
PeriodicJobs: periodicJobs(cfg),
Logger: newSlogAdapter(logger),
})
if err != nil {
return err
}
if err := client.Start(ctx); err != nil {
return err
}
// ---- HTTP (health) ----------------------------------------------------
r := chi.NewRouter()
r.Get("/health", func(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
})
r.Get("/ready", func(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, map[string]string{"status": "ready"})
})
srv := &http.Server{
Addr: cfg.HTTPAddr,
Handler: r,
ReadHeaderTimeout: 5 * time.Second,
}
httpErr := make(chan error, 1)
go func() {
logger.Info("worker http listening", zap.String("addr", cfg.HTTPAddr))
if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
httpErr <- err
}
}()
// ---- Signals ----------------------------------------------------------
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
select {
case <-sigCh:
logger.Info("shutdown signal received")
case err := <-httpErr:
logger.Error("http stopped unexpectedly", zap.Error(err))
}
shutCtx, shutCancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
defer shutCancel()
_ = srv.Shutdown(shutCtx)
if err := client.Stop(shutCtx); err != nil {
logger.Error("river client stop", zap.Error(err))
}
return nil
}
// registerWorkers adds job workers to the registry. Each new job kind
// (ComputeTraits, RefreshSegment, ReverseETL, ...) calls river.AddWorker here.
func registerWorkers(_ *river.Workers, _ *zap.Logger) {
// e.g. river.AddWorker(workers, &job.ComputeTraitsWorker{Repo: traitsRepo, Log: logger})
}
// periodicJobs returns the recurring schedules driven by river's built-in
// scheduler. Idempotent jobs only — river may retry on failure.
func periodicJobs(_ *config.Config) []*river.PeriodicJob {
// Real schedules land here once the corresponding workers are wired up.
// See CLAUDE_analytics.md → Job Queue (river).
return nil
}
func writeJSON(w http.ResponseWriter, status int, body any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(body)
}
func newLogger(level string) (*zap.Logger, error) {
lvl, err := zap.ParseAtomicLevel(level)
if err != nil {
lvl = zap.NewAtomicLevelAt(zap.InfoLevel)
}
cfg := zap.NewProductionConfig()
cfg.Level = lvl
cfg.EncoderConfig.TimeKey = "ts"
cfg.EncoderConfig.MessageKey = "msg"
return cfg.Build()
}

View File

@@ -0,0 +1,14 @@
package main
import (
"log/slog"
"go.uber.org/zap"
"go.uber.org/zap/exp/zapslog"
)
// newSlogAdapter returns a *slog.Logger backed by zap so river's structured
// logs flow through the same sink as the rest of the service.
func newSlogAdapter(logger *zap.Logger) *slog.Logger {
return slog.New(zapslog.NewHandler(logger.Core(), nil))
}

15
data-layer/workers/go.mod Normal file
View File

@@ -0,0 +1,15 @@
module github.com/dbiz/cdp/data-layer/workers
go 1.22
require (
github.com/ClickHouse/clickhouse-go/v2 v2.30.0
github.com/caarlos0/env/v11 v11.2.2
github.com/go-chi/chi/v5 v5.1.0
github.com/jackc/pgx/v5 v5.6.0
github.com/redis/rueidis v1.0.45
github.com/riverqueue/river v0.13.0
github.com/riverqueue/river/riverdriver/riverpgxv5 v0.13.0
github.com/stretchr/testify v1.9.0
go.uber.org/zap v1.27.0
)

View File

@@ -0,0 +1,33 @@
// Package apperr defines AppError, the single error type used across the
// worker. Job handlers wrap any underlying driver error before returning so
// river retry/log lines stay consistent with the api service.
package apperr
import (
"errors"
"fmt"
)
type AppError struct {
Message string
Err error
}
func (e *AppError) Error() string {
if e.Err != nil {
return fmt.Sprintf("%s: %v", e.Message, e.Err)
}
return e.Message
}
func (e *AppError) Unwrap() error { return e.Err }
func As(err error) (*AppError, bool) {
var ae *AppError
if errors.As(err, &ae) {
return ae, true
}
return nil, false
}
func Wrap(msg string, err error) *AppError { return &AppError{Message: msg, Err: err} }

View File

@@ -0,0 +1,37 @@
// Package config loads runtime configuration for the analytics worker.
package config
import (
"fmt"
"time"
"github.com/caarlos0/env/v11"
)
type Config struct {
HTTPAddr string `env:"WORKER_HTTP_ADDR" envDefault:":4001"`
LogLevel string `env:"WORKER_LOG_LEVEL" envDefault:"info"`
ShutdownTimeout time.Duration `env:"WORKER_SHUTDOWN_TIMEOUT_SECONDS" envDefault:"60s"`
// river queue tuning
MaxWorkers int `env:"WORKER_MAX_WORKERS" envDefault:"50"`
ComputeTraitsEvery time.Duration `env:"WORKER_COMPUTE_TRAITS_EVERY" envDefault:"1h"`
RefreshSegmentEvery time.Duration `env:"WORKER_REFRESH_SEGMENT_EVERY" envDefault:"1h"`
// Shared infra ----------------------------------------------------------
PostgresDSN string `env:"POSTGRES_DSN,required"`
RedisAddr string `env:"REDIS_ADDR" envDefault:"localhost:6379"`
ClickHouseAddr string `env:"CLICKHOUSE_ADDR" envDefault:"localhost:9000"`
ClickHouseDB string `env:"CLICKHOUSE_DB" envDefault:"cdp"`
ClickHouseUser string `env:"CLICKHOUSE_USER" envDefault:"default"`
ClickHousePassword string `env:"CLICKHOUSE_PASSWORD"`
}
func Load() (*Config, error) {
cfg := &Config{}
if err := env.Parse(cfg); err != nil {
return nil, fmt.Errorf("config load: %w", err)
}
return cfg, nil
}

View File