data layer
This commit is contained in:
75
data-layer/api/internal/apperr/apperr.go
Normal file
75
data-layer/api/internal/apperr/apperr.go
Normal file
@@ -0,0 +1,75 @@
|
||||
// Package apperr defines AppError, the single error type returned by every
|
||||
// service/repo function. Handlers translate AppError into HTTP responses.
|
||||
package apperr
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type AppError struct {
|
||||
Code int // HTTP status to return
|
||||
Message string // user-facing message (safe to expose)
|
||||
Field string // optional: which field caused the error
|
||||
RetryAfter int // seconds, for 429
|
||||
Err error // original error for logging (never exposed)
|
||||
}
|
||||
|
||||
func (e *AppError) Error() string {
|
||||
if e.Err != nil {
|
||||
return fmt.Sprintf("%s: %v", e.Message, e.Err)
|
||||
}
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func (e *AppError) Unwrap() error { return e.Err }
|
||||
|
||||
// As reports whether err is or wraps an *AppError.
|
||||
func As(err error) (*AppError, bool) {
|
||||
var ae *AppError
|
||||
if errors.As(err, &ae) {
|
||||
return ae, true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constructors
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func BadRequest(msg, field string, err error) *AppError {
|
||||
return &AppError{Code: http.StatusBadRequest, Message: msg, Field: field, Err: err}
|
||||
}
|
||||
|
||||
func Unauthorized(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusUnauthorized, Message: msg}
|
||||
}
|
||||
|
||||
func Forbidden(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusForbidden, Message: msg}
|
||||
}
|
||||
|
||||
func NotFound(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusNotFound, Message: msg}
|
||||
}
|
||||
|
||||
func Conflict(msg string, err error) *AppError {
|
||||
return &AppError{Code: http.StatusConflict, Message: msg, Err: err}
|
||||
}
|
||||
|
||||
func UnprocessableEntity(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusUnprocessableEntity, Message: msg}
|
||||
}
|
||||
|
||||
func TooManyRequests(retryAfterSeconds int) *AppError {
|
||||
return &AppError{
|
||||
Code: http.StatusTooManyRequests,
|
||||
Message: "rate limit exceeded",
|
||||
RetryAfter: retryAfterSeconds,
|
||||
}
|
||||
}
|
||||
|
||||
func Internal(err error) *AppError {
|
||||
return &AppError{Code: http.StatusInternalServerError, Message: "internal server error", Err: err}
|
||||
}
|
||||
52
data-layer/api/internal/cache/cache.go
vendored
Normal file
52
data-layer/api/internal/cache/cache.go
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
// Package cache wraps rueidis with the semantic-key convention used by the
|
||||
// analytics service. Keys follow cache:<kind>:<workspace_id>:<hash(params)>
|
||||
// so a workspace can be invalidated without scanning unrelated entries.
|
||||
package cache
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/redis/rueidis"
|
||||
)
|
||||
|
||||
type Cache struct {
|
||||
client rueidis.Client
|
||||
}
|
||||
|
||||
func New(client rueidis.Client) *Cache { return &Cache{client: client} }
|
||||
|
||||
// Key builds a deterministic cache key for the given (kind, workspace, params).
|
||||
// Params must JSON-serialize stably -- use a struct or a sorted map.
|
||||
func Key(kind, workspaceID string, params any) (string, error) {
|
||||
raw, err := json.Marshal(params)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("cache key marshal: %w", err)
|
||||
}
|
||||
sum := sha256.Sum256(raw)
|
||||
return fmt.Sprintf("cache:%s:%s:%s", kind, workspaceID, hex.EncodeToString(sum[:16])), nil
|
||||
}
|
||||
|
||||
// Get returns (value, true) on hit and (nil, false) on miss. Any redis error
|
||||
// is treated as a miss -- the caller falls through to the underlying source.
|
||||
func (c *Cache) Get(ctx context.Context, key string) ([]byte, bool) {
|
||||
res := c.client.Do(ctx, c.client.B().Get().Key(key).Build())
|
||||
b, err := res.AsBytes()
|
||||
if err != nil {
|
||||
if errors.Is(err, rueidis.Nil) {
|
||||
return nil, false
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
return b, true
|
||||
}
|
||||
|
||||
// Set writes the value with a TTL.
|
||||
func (c *Cache) Set(ctx context.Context, key string, value []byte, ttl time.Duration) error {
|
||||
return c.client.Do(ctx, c.client.B().Set().Key(key).Value(rueidis.BinaryString(value)).Ex(ttl).Build()).Error()
|
||||
}
|
||||
47
data-layer/api/internal/config/config.go
Normal file
47
data-layer/api/internal/config/config.go
Normal file
@@ -0,0 +1,47 @@
|
||||
// Package config loads runtime configuration from environment variables.
|
||||
//
|
||||
// Vars prefixed with ANALYTICS_ are owned by this service; un-prefixed ones
|
||||
// (POSTGRES_DSN, REDIS_ADDR, CLICKHOUSE_*) are shared with cdp-ingestion.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/caarlos0/env/v11"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
HTTPAddr string `env:"ANALYTICS_HTTP_ADDR" envDefault:":4000"`
|
||||
LogLevel string `env:"ANALYTICS_LOG_LEVEL" envDefault:"info"`
|
||||
ShutdownTimeout time.Duration `env:"ANALYTICS_SHUTDOWN_TIMEOUT_SECONDS" envDefault:"30s"`
|
||||
|
||||
// Cache TTLs — configurable per query type.
|
||||
CacheTTLQuery time.Duration `env:"ANALYTICS_CACHE_TTL_QUERY_SECONDS" envDefault:"60s"`
|
||||
CacheTTLProfile time.Duration `env:"ANALYTICS_CACHE_TTL_PROFILE_SECONDS" envDefault:"30s"`
|
||||
|
||||
// Where ClickHouse SQL templates live on disk. Resolved relative to the
|
||||
// process working directory; default matches `cd api && go run ./cmd/server`.
|
||||
ClickHouseTemplatesDir string `env:"ANALYTICS_CH_TEMPLATES_DIR" envDefault:"../infra/clickhouse"`
|
||||
|
||||
// Custom SQL ClickHouse credentials — separate read-only user.
|
||||
ClickHouseSQLUser string `env:"ANALYTICS_CH_SQL_USER" envDefault:"analytics_ro"`
|
||||
ClickHouseSQLPassword string `env:"ANALYTICS_CH_SQL_PASSWORD"`
|
||||
|
||||
// Shared infra ----------------------------------------------------------
|
||||
PostgresDSN string `env:"POSTGRES_DSN,required"`
|
||||
RedisAddr string `env:"REDIS_ADDR" envDefault:"localhost:6379"`
|
||||
|
||||
ClickHouseAddr string `env:"CLICKHOUSE_ADDR" envDefault:"localhost:9000"`
|
||||
ClickHouseDB string `env:"CLICKHOUSE_DB" envDefault:"cdp"`
|
||||
ClickHouseUser string `env:"CLICKHOUSE_USER" envDefault:"default"`
|
||||
ClickHousePassword string `env:"CLICKHOUSE_PASSWORD"`
|
||||
}
|
||||
|
||||
func Load() (*Config, error) {
|
||||
cfg := &Config{}
|
||||
if err := env.Parse(cfg); err != nil {
|
||||
return nil, fmt.Errorf("config load: %w", err)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
0
data-layer/api/internal/handler/.gitkeep
Normal file
0
data-layer/api/internal/handler/.gitkeep
Normal file
132
data-layer/api/internal/handler/analytics_handler.go
Normal file
132
data-layer/api/internal/handler/analytics_handler.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/middleware"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/repo"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/service"
|
||||
)
|
||||
|
||||
type AnalyticsHandler struct {
|
||||
svc *service.QueryService
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewAnalyticsHandler(svc *service.QueryService, log *zap.Logger) *AnalyticsHandler {
|
||||
return &AnalyticsHandler{svc: svc, log: log}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Funnel
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type funnelRequest struct {
|
||||
Steps []string `json:"steps" validate:"required,min=2,max=10,dive,min=1"`
|
||||
From *time.Time `json:"from" validate:"required"`
|
||||
To *time.Time `json:"to" validate:"required,gtfield=From"`
|
||||
WindowSeconds uint32 `json:"window_seconds" validate:"required,min=1,max=2592000"` // up to 30d
|
||||
}
|
||||
|
||||
func (h *AnalyticsHandler) Funnel(w http.ResponseWriter, r *http.Request) {
|
||||
var req funnelRequest
|
||||
if err := decodeAndValidate(r, &req); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
res, err := h.svc.Funnel(r.Context(), repo.FunnelQuery{
|
||||
WorkspaceID: ws,
|
||||
Steps: req.Steps,
|
||||
From: *req.From,
|
||||
To: *req.To,
|
||||
WindowSeconds: req.WindowSeconds,
|
||||
})
|
||||
if err != nil {
|
||||
h.log.Error("funnel", zap.String("workspace_id", ws), zap.Error(err))
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Retention
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type retentionRequest struct {
|
||||
InitialEvent string `json:"initial_event" validate:"required,min=1"`
|
||||
ReturnEvent string `json:"return_event" validate:"required,min=1"`
|
||||
From *time.Time `json:"from" validate:"required"`
|
||||
To *time.Time `json:"to" validate:"required,gtfield=From"`
|
||||
Periods int `json:"periods" validate:"omitempty,min=1,max=90"`
|
||||
}
|
||||
|
||||
func (h *AnalyticsHandler) Retention(w http.ResponseWriter, r *http.Request) {
|
||||
var req retentionRequest
|
||||
if err := decodeAndValidate(r, &req); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
res, err := h.svc.Retention(r.Context(), repo.RetentionQuery{
|
||||
WorkspaceID: ws,
|
||||
InitialEvent: req.InitialEvent,
|
||||
ReturnEvent: req.ReturnEvent,
|
||||
From: *req.From,
|
||||
To: *req.To,
|
||||
Periods: req.Periods,
|
||||
})
|
||||
if err != nil {
|
||||
h.log.Error("retention", zap.String("workspace_id", ws), zap.Error(err))
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Session
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type sessionRequest struct {
|
||||
From *time.Time `json:"from" validate:"required"`
|
||||
To *time.Time `json:"to" validate:"required,gtfield=From"`
|
||||
TimeoutSeconds uint32 `json:"timeout_seconds" validate:"omitempty,min=60,max=86400"`
|
||||
UserID string `json:"user_id"`
|
||||
Limit int `json:"limit" validate:"omitempty,min=1,max=1000"`
|
||||
Offset int `json:"offset" validate:"omitempty,min=0"`
|
||||
}
|
||||
|
||||
func (h *AnalyticsHandler) Session(w http.ResponseWriter, r *http.Request) {
|
||||
var req sessionRequest
|
||||
if err := decodeAndValidate(r, &req); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
if req.TimeoutSeconds == 0 {
|
||||
req.TimeoutSeconds = 30 * 60
|
||||
}
|
||||
if req.Limit == 0 {
|
||||
req.Limit = 100
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
res, err := h.svc.Sessions(r.Context(), repo.SessionQuery{
|
||||
WorkspaceID: ws,
|
||||
UserID: req.UserID,
|
||||
From: *req.From,
|
||||
To: *req.To,
|
||||
TimeoutSeconds: req.TimeoutSeconds,
|
||||
Limit: req.Limit,
|
||||
Offset: req.Offset,
|
||||
})
|
||||
if err != nil {
|
||||
h.log.Error("session", zap.String("workspace_id", ws), zap.Error(err))
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
36
data-layer/api/internal/handler/decode.go
Normal file
36
data-layer/api/internal/handler/decode.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/go-playground/validator/v10"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
)
|
||||
|
||||
var validate = validator.New(validator.WithRequiredStructEnabled())
|
||||
|
||||
// decodeAndValidate reads JSON into `dst`, then runs validator tags. Returns
|
||||
// a wrapped AppError so handlers can pass it straight to writeError.
|
||||
func decodeAndValidate(r *http.Request, dst any) error {
|
||||
dec := json.NewDecoder(r.Body)
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(dst); err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
return apperr.BadRequest("request body is empty", "", err)
|
||||
}
|
||||
return apperr.BadRequest("invalid JSON: "+err.Error(), "", err)
|
||||
}
|
||||
if err := validate.Struct(dst); err != nil {
|
||||
var verrs validator.ValidationErrors
|
||||
if errors.As(err, &verrs) && len(verrs) > 0 {
|
||||
ve := verrs[0]
|
||||
return apperr.BadRequest("validation failed on "+ve.Field()+": "+ve.Tag(), ve.Field(), err)
|
||||
}
|
||||
return apperr.BadRequest("validation failed: "+err.Error(), "", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
74
data-layer/api/internal/handler/event_handler.go
Normal file
74
data-layer/api/internal/handler/event_handler.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/middleware"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/service"
|
||||
)
|
||||
|
||||
type EventHandler struct {
|
||||
svc *service.QueryService
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewEventHandler(svc *service.QueryService, log *zap.Logger) *EventHandler {
|
||||
return &EventHandler{svc: svc, log: log}
|
||||
}
|
||||
|
||||
type queryEventsRequest struct {
|
||||
Table string `json:"table" validate:"required,oneof=events_track events_identify events_page events_group"`
|
||||
From *time.Time `json:"from" validate:"required"`
|
||||
To *time.Time `json:"to" validate:"required,gtfield=From"`
|
||||
UserID string `json:"user_id"`
|
||||
AnonymousID string `json:"anonymous_id"`
|
||||
EventName string `json:"event"`
|
||||
Limit int `json:"limit" validate:"omitempty,min=1,max=1000"`
|
||||
Offset int `json:"offset" validate:"omitempty,min=0"`
|
||||
}
|
||||
|
||||
// QueryEvents handles POST /query/events.
|
||||
func (h *EventHandler) QueryEvents(w http.ResponseWriter, r *http.Request) {
|
||||
var req queryEventsRequest
|
||||
if err := decodeAndValidate(r, &req); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
if req.Limit == 0 {
|
||||
req.Limit = 100
|
||||
}
|
||||
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
res, err := h.svc.Events(r.Context(), model.EventQuery{
|
||||
WorkspaceID: ws,
|
||||
Table: model.EventTable(req.Table),
|
||||
From: *req.From,
|
||||
To: *req.To,
|
||||
UserID: req.UserID,
|
||||
AnonymousID: req.AnonymousID,
|
||||
EventName: req.EventName,
|
||||
Limit: req.Limit,
|
||||
Offset: req.Offset,
|
||||
})
|
||||
if err != nil {
|
||||
h.log.Error("query events",
|
||||
zap.String("workspace_id", ws),
|
||||
zap.String("table", req.Table),
|
||||
zap.Error(err))
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
|
||||
// Health / Ready -- shared between all handlers but parked here for now.
|
||||
func (h *EventHandler) Health(w http.ResponseWriter, _ *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
|
||||
}
|
||||
func (h *EventHandler) Ready(w http.ResponseWriter, _ *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]string{"status": "ready"})
|
||||
}
|
||||
85
data-layer/api/internal/handler/profile_handler.go
Normal file
85
data-layer/api/internal/handler/profile_handler.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/middleware"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/service"
|
||||
)
|
||||
|
||||
type ProfileHandler struct {
|
||||
svc *service.ProfileService
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewProfileHandler(svc *service.ProfileService, log *zap.Logger) *ProfileHandler {
|
||||
return &ProfileHandler{svc: svc, log: log}
|
||||
}
|
||||
|
||||
// Get handles GET /profiles/:id.
|
||||
func (h *ProfileHandler) Get(w http.ResponseWriter, r *http.Request) {
|
||||
id, err := parseProfileID(r)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
p, err := h.svc.Get(r.Context(), ws, id)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, p)
|
||||
}
|
||||
|
||||
// Timeline handles GET /profiles/:id/events.
|
||||
func (h *ProfileHandler) Timeline(w http.ResponseWriter, r *http.Request) {
|
||||
id, err := parseProfileID(r)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
limit, offset := parsePagination(r, 100, 1000)
|
||||
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
res, err := h.svc.Timeline(r.Context(), ws, id, limit, offset)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
|
||||
func parseProfileID(r *http.Request) (string, error) {
|
||||
raw := chi.URLParam(r, "id")
|
||||
if raw == "" {
|
||||
return "", apperr.BadRequest("missing profile id", "id", nil)
|
||||
}
|
||||
if _, err := uuid.Parse(raw); err != nil {
|
||||
return "", apperr.BadRequest("profile id must be uuid", "id", err)
|
||||
}
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
// parsePagination reads ?limit & ?offset with bounds. Invalid values fall back
|
||||
// to the defaults rather than erroring -- the endpoints are GET, not strict.
|
||||
func parsePagination(r *http.Request, def, max int) (limit, offset int) {
|
||||
limit = def
|
||||
if v := r.URL.Query().Get("limit"); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil && n > 0 && n <= max {
|
||||
limit = n
|
||||
}
|
||||
}
|
||||
if v := r.URL.Query().Get("offset"); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil && n >= 0 {
|
||||
offset = n
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
47
data-layer/api/internal/handler/render.go
Normal file
47
data-layer/api/internal/handler/render.go
Normal file
@@ -0,0 +1,47 @@
|
||||
// Package handler holds HTTP handlers. Handlers parse the request, call into
|
||||
// service, and translate the result (or error) into an HTTP response.
|
||||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
)
|
||||
|
||||
type errorResponse struct {
|
||||
Error string `json:"error"`
|
||||
Field string `json:"field,omitempty"`
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, status int, body any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_ = json.NewEncoder(w).Encode(body)
|
||||
}
|
||||
|
||||
func writeError(w http.ResponseWriter, err error) {
|
||||
if ae, ok := apperr.As(err); ok {
|
||||
if ae.RetryAfter > 0 {
|
||||
w.Header().Set("Retry-After", itoa(ae.RetryAfter))
|
||||
}
|
||||
writeJSON(w, ae.Code, errorResponse{Error: ae.Message, Field: ae.Field})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusInternalServerError, errorResponse{Error: "internal server error"})
|
||||
}
|
||||
|
||||
func itoa(i int) string {
|
||||
const digits = "0123456789"
|
||||
if i == 0 {
|
||||
return "0"
|
||||
}
|
||||
var buf [20]byte
|
||||
pos := len(buf)
|
||||
for i > 0 {
|
||||
pos--
|
||||
buf[pos] = digits[i%10]
|
||||
i /= 10
|
||||
}
|
||||
return string(buf[pos:])
|
||||
}
|
||||
125
data-layer/api/internal/handler/saved_query_handler.go
Normal file
125
data-layer/api/internal/handler/saved_query_handler.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/middleware"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/repo"
|
||||
)
|
||||
|
||||
type SavedQueryHandler struct {
|
||||
repo *repo.SavedQueryRepo
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewSavedQueryHandler(r *repo.SavedQueryRepo, log *zap.Logger) *SavedQueryHandler {
|
||||
return &SavedQueryHandler{repo: r, log: log}
|
||||
}
|
||||
|
||||
type createSavedQueryRequest struct {
|
||||
Name string `json:"name" validate:"required,min=1,max=200"`
|
||||
Kind string `json:"kind" validate:"required,oneof=events sql funnel retention session"`
|
||||
Spec map[string]any `json:"spec" validate:"required"`
|
||||
}
|
||||
|
||||
type updateSavedQueryRequest struct {
|
||||
Name string `json:"name" validate:"required,min=1,max=200"`
|
||||
Spec map[string]any `json:"spec" validate:"required"`
|
||||
}
|
||||
|
||||
func (h *SavedQueryHandler) Create(w http.ResponseWriter, r *http.Request) {
|
||||
var req createSavedQueryRequest
|
||||
if err := decodeAndValidate(r, &req); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
q, err := h.repo.Create(r.Context(), model.SavedQuery{
|
||||
WorkspaceID: ws,
|
||||
Name: req.Name,
|
||||
Kind: req.Kind,
|
||||
Spec: req.Spec,
|
||||
})
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusCreated, q)
|
||||
}
|
||||
|
||||
func (h *SavedQueryHandler) List(w http.ResponseWriter, r *http.Request) {
|
||||
limit, offset := parsePagination(r, 50, 500)
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
qs, err := h.repo.List(r.Context(), ws, limit, offset)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{"items": qs, "limit": limit, "offset": offset})
|
||||
}
|
||||
|
||||
func (h *SavedQueryHandler) Get(w http.ResponseWriter, r *http.Request) {
|
||||
id, err := parseSavedQueryID(r)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
q, err := h.repo.Get(r.Context(), ws, id)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, q)
|
||||
}
|
||||
|
||||
func (h *SavedQueryHandler) Update(w http.ResponseWriter, r *http.Request) {
|
||||
id, err := parseSavedQueryID(r)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
var req updateSavedQueryRequest
|
||||
if err := decodeAndValidate(r, &req); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
q, err := h.repo.Update(r.Context(), ws, id, req.Name, req.Spec)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, q)
|
||||
}
|
||||
|
||||
func (h *SavedQueryHandler) Delete(w http.ResponseWriter, r *http.Request) {
|
||||
id, err := parseSavedQueryID(r)
|
||||
if err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
if err := h.repo.Delete(r.Context(), ws, id); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
func parseSavedQueryID(r *http.Request) (string, error) {
|
||||
raw := chi.URLParam(r, "id")
|
||||
if raw == "" {
|
||||
return "", apperr.BadRequest("missing query id", "id", nil)
|
||||
}
|
||||
if _, err := uuid.Parse(raw); err != nil {
|
||||
return "", apperr.BadRequest("query id must be uuid", "id", err)
|
||||
}
|
||||
return raw, nil
|
||||
}
|
||||
47
data-layer/api/internal/handler/sql_handler.go
Normal file
47
data-layer/api/internal/handler/sql_handler.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/middleware"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/service"
|
||||
)
|
||||
|
||||
type SQLHandler struct {
|
||||
svc *service.SQLService
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewSQLHandler(svc *service.SQLService, log *zap.Logger) *SQLHandler {
|
||||
return &SQLHandler{svc: svc, log: log}
|
||||
}
|
||||
|
||||
type customSQLRequest struct {
|
||||
SQL string `json:"sql" validate:"required,min=1,max=20000"`
|
||||
}
|
||||
|
||||
// CustomSQL handles POST /query/sql.
|
||||
func (h *SQLHandler) CustomSQL(w http.ResponseWriter, r *http.Request) {
|
||||
var req customSQLRequest
|
||||
if err := decodeAndValidate(r, &req); err != nil {
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
ws := middleware.WorkspaceFromCtx(r.Context())
|
||||
|
||||
res, err := h.svc.Run(r.Context(), req.SQL)
|
||||
if err != nil {
|
||||
h.log.Warn("custom sql rejected",
|
||||
zap.String("workspace_id", ws),
|
||||
zap.Error(err))
|
||||
writeError(w, err)
|
||||
return
|
||||
}
|
||||
h.log.Info("custom sql ok",
|
||||
zap.String("workspace_id", ws),
|
||||
zap.Int("rows", res.RowCount),
|
||||
zap.Int64("duration_ms", res.DurationMS))
|
||||
writeJSON(w, http.StatusOK, res)
|
||||
}
|
||||
111
data-layer/api/internal/middleware/middleware.go
Normal file
111
data-layer/api/internal/middleware/middleware.go
Normal file
@@ -0,0 +1,111 @@
|
||||
// Package middleware provides chi-compatible HTTP middleware:
|
||||
// request-id, panic recovery, structured logging, CORS.
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type ctxKey string
|
||||
|
||||
const ctxKeyRequestID ctxKey = "request_id"
|
||||
|
||||
// RequestID assigns a uuid v4 to each request and stores it in context.
|
||||
func RequestID(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
id := r.Header.Get("X-Request-Id")
|
||||
if id == "" {
|
||||
id = uuid.NewString()
|
||||
}
|
||||
ctx := context.WithValue(r.Context(), ctxKeyRequestID, id)
|
||||
w.Header().Set("X-Request-Id", id)
|
||||
next.ServeHTTP(w, r.WithContext(ctx))
|
||||
})
|
||||
}
|
||||
|
||||
func RequestIDFromCtx(ctx context.Context) string {
|
||||
v, _ := ctx.Value(ctxKeyRequestID).(string)
|
||||
return v
|
||||
}
|
||||
|
||||
// Recover handles panics so a buggy handler can't take down the server.
|
||||
func Recover(log *zap.Logger) func(http.Handler) http.Handler {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
defer func() {
|
||||
if rec := recover(); rec != nil {
|
||||
log.Error("panic in handler",
|
||||
zap.Any("panic", rec),
|
||||
zap.String("path", r.URL.Path),
|
||||
zap.ByteString("stack", debug.Stack()))
|
||||
http.Error(w, `{"error":"internal server error"}`, http.StatusInternalServerError)
|
||||
}
|
||||
}()
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Logger logs one structured line per request.
|
||||
func Logger(log *zap.Logger) func(http.Handler) http.Handler {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
start := time.Now()
|
||||
rw := &statusRecorder{ResponseWriter: w, status: 200}
|
||||
next.ServeHTTP(rw, r)
|
||||
log.Info("http",
|
||||
zap.String("method", r.Method),
|
||||
zap.String("path", r.URL.Path),
|
||||
zap.Int("status", rw.status),
|
||||
zap.Int64("duration_ms", time.Since(start).Milliseconds()),
|
||||
zap.String("request_id", RequestIDFromCtx(r.Context())),
|
||||
zap.String("ip", clientIP(r)))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// CORS returns a permissive CORS handler. The Analytics console calls the API
|
||||
// directly from the browser during development.
|
||||
func CORS(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
|
||||
w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, X-Request-Id")
|
||||
w.Header().Set("Access-Control-Max-Age", "86400")
|
||||
if r.Method == http.MethodOptions {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return
|
||||
}
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
func clientIP(r *http.Request) string {
|
||||
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
|
||||
if i := strings.Index(xff, ","); i >= 0 {
|
||||
return strings.TrimSpace(xff[:i])
|
||||
}
|
||||
return strings.TrimSpace(xff)
|
||||
}
|
||||
if rip := r.Header.Get("X-Real-Ip"); rip != "" {
|
||||
return rip
|
||||
}
|
||||
return r.RemoteAddr
|
||||
}
|
||||
|
||||
type statusRecorder struct {
|
||||
http.ResponseWriter
|
||||
status int
|
||||
}
|
||||
|
||||
func (s *statusRecorder) WriteHeader(code int) {
|
||||
s.status = code
|
||||
s.ResponseWriter.WriteHeader(code)
|
||||
}
|
||||
51
data-layer/api/internal/middleware/workspace.go
Normal file
51
data-layer/api/internal/middleware/workspace.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
)
|
||||
|
||||
const ctxKeyWorkspace ctxKey = "workspace_id"
|
||||
|
||||
// Workspace pulls the active workspace UUID from the X-Workspace-Id header
|
||||
// and stores it in context. Returns 400 for missing / malformed values.
|
||||
//
|
||||
// TODO(auth): wire this to the console session / JWT once the auth scheme
|
||||
// for the data-layer is finalized. For now the header drives everything.
|
||||
func Workspace(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
raw := r.Header.Get("X-Workspace-Id")
|
||||
if raw == "" {
|
||||
writeAppErr(w, apperr.BadRequest("missing X-Workspace-Id header", "workspace_id", nil))
|
||||
return
|
||||
}
|
||||
id, err := uuid.Parse(raw)
|
||||
if err != nil {
|
||||
writeAppErr(w, apperr.BadRequest("invalid X-Workspace-Id", "workspace_id", err))
|
||||
return
|
||||
}
|
||||
ctx := context.WithValue(r.Context(), ctxKeyWorkspace, id.String())
|
||||
next.ServeHTTP(w, r.WithContext(ctx))
|
||||
})
|
||||
}
|
||||
|
||||
// WorkspaceFromCtx returns the workspace id set by Workspace middleware.
|
||||
func WorkspaceFromCtx(ctx context.Context) string {
|
||||
v, _ := ctx.Value(ctxKeyWorkspace).(string)
|
||||
return v
|
||||
}
|
||||
|
||||
func writeAppErr(w http.ResponseWriter, err *apperr.AppError) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(err.Code)
|
||||
body := `{"error":"` + err.Message + `"`
|
||||
if err.Field != "" {
|
||||
body += `,"field":"` + err.Field + `"`
|
||||
}
|
||||
body += `}`
|
||||
_, _ = w.Write([]byte(body))
|
||||
}
|
||||
27
data-layer/api/internal/model/profile.go
Normal file
27
data-layer/api/internal/model/profile.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package model
|
||||
|
||||
import "time"
|
||||
|
||||
// Profile is the unified-profile shape returned by /profiles/:id. The
|
||||
// underlying table is owned by cdp-ingestion (identity-resolution).
|
||||
type Profile struct {
|
||||
ID string `json:"id"`
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
UserID string `json:"user_id,omitempty"`
|
||||
AnonymousIDs []string `json:"anonymous_ids,omitempty"`
|
||||
Traits map[string]any `json:"traits,omitempty"`
|
||||
FirstSeenAt time.Time `json:"first_seen_at"`
|
||||
LastSeenAt time.Time `json:"last_seen_at"`
|
||||
}
|
||||
|
||||
// SavedQuery mirrors the saved_queries table.
|
||||
type SavedQuery struct {
|
||||
ID string `json:"id"`
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
OwnerID string `json:"owner_id,omitempty"`
|
||||
Name string `json:"name"`
|
||||
Kind string `json:"kind"`
|
||||
Spec map[string]any `json:"spec"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
47
data-layer/api/internal/model/query.go
Normal file
47
data-layer/api/internal/model/query.go
Normal file
@@ -0,0 +1,47 @@
|
||||
// Package model defines domain types passed between layers.
|
||||
package model
|
||||
|
||||
import "time"
|
||||
|
||||
// EventTable enumerates the four ClickHouse event tables written by
|
||||
// cdp-ingestion. Used to whitelist `events` queries so we never interpolate
|
||||
// an untrusted table name into a template.
|
||||
type EventTable string
|
||||
|
||||
const (
|
||||
EventTableTrack EventTable = "events_track"
|
||||
EventTableIdentify EventTable = "events_identify"
|
||||
EventTablePage EventTable = "events_page"
|
||||
EventTableGroup EventTable = "events_group"
|
||||
)
|
||||
|
||||
func (t EventTable) Valid() bool {
|
||||
switch t {
|
||||
case EventTableTrack, EventTableIdentify, EventTablePage, EventTableGroup:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// EventQuery is the parsed filter passed to repo.QueryEvents.
|
||||
type EventQuery struct {
|
||||
WorkspaceID string
|
||||
Table EventTable
|
||||
From time.Time
|
||||
To time.Time
|
||||
UserID string // optional
|
||||
AnonymousID string // optional
|
||||
EventName string // optional, only meaningful when Table == events_track
|
||||
Limit int
|
||||
Offset int
|
||||
}
|
||||
|
||||
// QueryResult is a generic columns+rows envelope returned by Query API endpoints.
|
||||
type QueryResult struct {
|
||||
Columns []string `json:"columns"`
|
||||
Rows [][]any `json:"rows"`
|
||||
RowCount int `json:"row_count"`
|
||||
DurationMS int64 `json:"duration_ms"`
|
||||
CacheHit bool `json:"cache_hit"`
|
||||
Meta map[string]any `json:"meta,omitempty"`
|
||||
}
|
||||
0
data-layer/api/internal/repo/.gitkeep
Normal file
0
data-layer/api/internal/repo/.gitkeep
Normal file
167
data-layer/api/internal/repo/analytics_repo.go
Normal file
167
data-layer/api/internal/repo/analytics_repo.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClickHouse/clickhouse-go/v2"
|
||||
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/templates"
|
||||
)
|
||||
|
||||
// AnalyticsRepo runs the higher-level P1 query templates (funnel, retention,
|
||||
// session) against ClickHouse. It shares the read connection with EventRepo
|
||||
// but lives in its own file because the templates need their own data shapes.
|
||||
type AnalyticsRepo struct {
|
||||
ch driver.Conn
|
||||
tpl *templates.Store
|
||||
}
|
||||
|
||||
func NewAnalyticsRepo(ch driver.Conn, tpl *templates.Store) *AnalyticsRepo {
|
||||
return &AnalyticsRepo{ch: ch, tpl: tpl}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Funnel
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type FunnelQuery struct {
|
||||
WorkspaceID string
|
||||
Steps []string
|
||||
From time.Time
|
||||
To time.Time
|
||||
WindowSeconds uint32
|
||||
}
|
||||
|
||||
func (r *AnalyticsRepo) Funnel(ctx context.Context, q FunnelQuery) (*model.QueryResult, error) {
|
||||
if len(q.Steps) < 2 {
|
||||
return nil, fmt.Errorf("funnel requires at least 2 steps")
|
||||
}
|
||||
|
||||
type stepTpl struct {
|
||||
Index int
|
||||
Last bool
|
||||
}
|
||||
stepsTpl := make([]stepTpl, len(q.Steps))
|
||||
for i := range q.Steps {
|
||||
stepsTpl[i] = stepTpl{Index: i, Last: i == len(q.Steps)-1}
|
||||
}
|
||||
|
||||
sql, err := r.tpl.Render("funnel_analysis.sql.tmpl", map[string]any{
|
||||
"Steps": stepsTpl,
|
||||
"StepCount": len(q.Steps),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
args := []any{
|
||||
clickhouse.Named("workspace_id", q.WorkspaceID),
|
||||
clickhouse.DateNamed("from", q.From, clickhouse.MilliSeconds),
|
||||
clickhouse.DateNamed("to", q.To, clickhouse.MilliSeconds),
|
||||
clickhouse.Named("window_seconds", q.WindowSeconds),
|
||||
}
|
||||
for i, name := range q.Steps {
|
||||
args = append(args, clickhouse.Named(fmt.Sprintf("step%d", i), name))
|
||||
}
|
||||
|
||||
rows, err := r.ch.Query(ctx, sql, args...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("clickhouse funnel: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
return ScanRows(rows)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Retention
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type RetentionQuery struct {
|
||||
WorkspaceID string
|
||||
InitialEvent string
|
||||
ReturnEvent string
|
||||
From time.Time
|
||||
To time.Time
|
||||
Periods int // e.g. 14 => D0..D13
|
||||
}
|
||||
|
||||
func (r *AnalyticsRepo) Retention(ctx context.Context, q RetentionQuery) (*model.QueryResult, error) {
|
||||
if q.Periods < 1 {
|
||||
q.Periods = 14
|
||||
}
|
||||
type periodTpl struct {
|
||||
RIndex int
|
||||
OffsetDay int
|
||||
Last bool
|
||||
}
|
||||
outer := make([]periodTpl, q.Periods)
|
||||
for i := 0; i < q.Periods; i++ {
|
||||
outer[i] = periodTpl{RIndex: i + 2, OffsetDay: i + 1, Last: i == q.Periods-1}
|
||||
}
|
||||
|
||||
sql, err := r.tpl.Render("retention_cohort.sql.tmpl", map[string]any{
|
||||
"Outer": outer,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rows, err := r.ch.Query(ctx, sql,
|
||||
clickhouse.Named("workspace_id", q.WorkspaceID),
|
||||
clickhouse.DateNamed("from", q.From, clickhouse.MilliSeconds),
|
||||
clickhouse.DateNamed("to", q.To, clickhouse.MilliSeconds),
|
||||
clickhouse.Named("initial_event", q.InitialEvent),
|
||||
clickhouse.Named("return_event", q.ReturnEvent),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("clickhouse retention: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
return ScanRows(rows)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Session
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type SessionQuery struct {
|
||||
WorkspaceID string
|
||||
UserID string // optional
|
||||
From time.Time
|
||||
To time.Time
|
||||
TimeoutSeconds uint32
|
||||
Limit int
|
||||
Offset int
|
||||
}
|
||||
|
||||
func (r *AnalyticsRepo) Sessions(ctx context.Context, q SessionQuery) (*model.QueryResult, error) {
|
||||
sql, err := r.tpl.Render("session_analysis.sql.tmpl", map[string]any{
|
||||
"HasUserID": q.UserID != "",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
args := []any{
|
||||
clickhouse.Named("workspace_id", q.WorkspaceID),
|
||||
clickhouse.DateNamed("from", q.From, clickhouse.MilliSeconds),
|
||||
clickhouse.DateNamed("to", q.To, clickhouse.MilliSeconds),
|
||||
clickhouse.Named("timeout_seconds", q.TimeoutSeconds),
|
||||
clickhouse.Named("limit", uint32(q.Limit)),
|
||||
clickhouse.Named("offset", uint32(q.Offset)),
|
||||
}
|
||||
if q.UserID != "" {
|
||||
args = append(args, clickhouse.Named("user_id", q.UserID))
|
||||
}
|
||||
|
||||
rows, err := r.ch.Query(ctx, sql, args...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("clickhouse session: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
return ScanRows(rows)
|
||||
}
|
||||
58
data-layer/api/internal/repo/chconn.go
Normal file
58
data-layer/api/internal/repo/chconn.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClickHouse/clickhouse-go/v2"
|
||||
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
|
||||
)
|
||||
|
||||
// NewClickHouse opens a native-protocol ClickHouse connection. The returned
|
||||
// driver.Conn is safe for concurrent use. Caller owns Close().
|
||||
func NewClickHouse(ctx context.Context, addr, db, user, password string) (driver.Conn, error) {
|
||||
conn, err := clickhouse.Open(&clickhouse.Options{
|
||||
Addr: []string{addr},
|
||||
Auth: clickhouse.Auth{
|
||||
Database: db,
|
||||
Username: user,
|
||||
Password: password,
|
||||
},
|
||||
Settings: clickhouse.Settings{
|
||||
"readonly": 0, // analytics queries; per-user read-only enforced for /query/sql separately
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open clickhouse: %w", err)
|
||||
}
|
||||
if err := conn.Ping(ctx); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("ping clickhouse: %w", err)
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
// NewClickHouseReadOnly opens a ClickHouse connection using a SELECT-only
|
||||
// account. Used to back the /query/sql sandbox: DDL/DML are rejected at the DB
|
||||
// level even if the app-level keyword guard is bypassed.
|
||||
func NewClickHouseReadOnly(ctx context.Context, addr, db, user, password string) (driver.Conn, error) {
|
||||
conn, err := clickhouse.Open(&clickhouse.Options{
|
||||
Addr: []string{addr},
|
||||
Auth: clickhouse.Auth{
|
||||
Database: db,
|
||||
Username: user,
|
||||
Password: password,
|
||||
},
|
||||
Settings: clickhouse.Settings{
|
||||
"readonly": 2, // belt-and-braces: server-side enforce read-only
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open clickhouse (ro): %w", err)
|
||||
}
|
||||
if err := conn.Ping(ctx); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("ping clickhouse (ro): %w", err)
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
194
data-layer/api/internal/repo/event_repo.go
Normal file
194
data-layer/api/internal/repo/event_repo.go
Normal file
@@ -0,0 +1,194 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClickHouse/clickhouse-go/v2"
|
||||
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/templates"
|
||||
)
|
||||
|
||||
type EventRepo struct {
|
||||
ch driver.Conn
|
||||
tpl *templates.Store
|
||||
}
|
||||
|
||||
func NewEventRepo(ch driver.Conn, tpl *templates.Store) *EventRepo {
|
||||
return &EventRepo{ch: ch, tpl: tpl}
|
||||
}
|
||||
|
||||
// QueryEvents renders the event_explorer template against q.Table and returns
|
||||
// columns+rows. The query is parameterized -- user input never lands in the
|
||||
// SQL string, only in clickhouse.Named bindings.
|
||||
func (r *EventRepo) QueryEvents(ctx context.Context, q model.EventQuery) (*model.QueryResult, error) {
|
||||
if !q.Table.Valid() {
|
||||
return nil, fmt.Errorf("invalid event table: %q", q.Table)
|
||||
}
|
||||
sql, err := r.tpl.Render("event_explorer.sql.tmpl", map[string]any{
|
||||
"Table": string(q.Table),
|
||||
"HasUserID": q.UserID != "",
|
||||
"HasAnonymousID": q.AnonymousID != "",
|
||||
"HasEventName": q.EventName != "" && q.Table == model.EventTableTrack,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
args := []any{
|
||||
clickhouse.Named("workspace_id", q.WorkspaceID),
|
||||
clickhouse.DateNamed("from", q.From, clickhouse.MilliSeconds),
|
||||
clickhouse.DateNamed("to", q.To, clickhouse.MilliSeconds),
|
||||
clickhouse.Named("limit", uint32(q.Limit)),
|
||||
clickhouse.Named("offset", uint32(q.Offset)),
|
||||
}
|
||||
if q.UserID != "" {
|
||||
args = append(args, clickhouse.Named("user_id", q.UserID))
|
||||
}
|
||||
if q.AnonymousID != "" {
|
||||
args = append(args, clickhouse.Named("anonymous_id", q.AnonymousID))
|
||||
}
|
||||
if q.EventName != "" && q.Table == model.EventTableTrack {
|
||||
args = append(args, clickhouse.Named("event", q.EventName))
|
||||
}
|
||||
|
||||
rows, err := r.ch.Query(ctx, sql, args...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("clickhouse query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
return ScanRows(rows)
|
||||
}
|
||||
|
||||
// QueryProfileTimeline returns recent events for a profile (resolved to
|
||||
// user_id) across all four event tables, ordered by received_at desc.
|
||||
func (r *EventRepo) QueryProfileTimeline(ctx context.Context, workspaceID, userID string, limit, offset int) (*model.QueryResult, error) {
|
||||
sql, err := r.tpl.Render("profile_timeline.sql.tmpl", nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rows, err := r.ch.Query(ctx, sql,
|
||||
clickhouse.Named("workspace_id", workspaceID),
|
||||
clickhouse.Named("user_id", userID),
|
||||
clickhouse.Named("limit", uint32(limit)),
|
||||
clickhouse.Named("offset", uint32(offset)),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("clickhouse query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
return ScanRows(rows)
|
||||
}
|
||||
|
||||
// ScanRows turns a driver.Rows iterator into a generic QueryResult. Column
|
||||
// types come from rows.ColumnTypes() so we allocate the right pointer kinds.
|
||||
func ScanRows(rows driver.Rows) (*model.QueryResult, error) {
|
||||
cols := rows.Columns()
|
||||
colTypes := rows.ColumnTypes()
|
||||
out := &model.QueryResult{Columns: cols, Rows: [][]any{}}
|
||||
|
||||
for rows.Next() {
|
||||
dest := make([]any, len(colTypes))
|
||||
for i, ct := range colTypes {
|
||||
dest[i] = newScanTarget(ct.ScanType().String())
|
||||
}
|
||||
if err := rows.Scan(dest...); err != nil {
|
||||
return nil, fmt.Errorf("scan row: %w", err)
|
||||
}
|
||||
row := make([]any, len(dest))
|
||||
for i, p := range dest {
|
||||
row[i] = derefScanTarget(p)
|
||||
}
|
||||
out.Rows = append(out.Rows, row)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out.RowCount = len(out.Rows)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// newScanTarget returns a pointer matching ClickHouse's reported Go scan type.
|
||||
// We keep this list small -- the analytics tables share a handful of types.
|
||||
func newScanTarget(typeName string) any {
|
||||
switch typeName {
|
||||
case "string":
|
||||
var v string
|
||||
return &v
|
||||
case "uint8":
|
||||
var v uint8
|
||||
return &v
|
||||
case "uint16":
|
||||
var v uint16
|
||||
return &v
|
||||
case "uint32":
|
||||
var v uint32
|
||||
return &v
|
||||
case "uint64":
|
||||
var v uint64
|
||||
return &v
|
||||
case "int32":
|
||||
var v int32
|
||||
return &v
|
||||
case "int64":
|
||||
var v int64
|
||||
return &v
|
||||
case "float32":
|
||||
var v float32
|
||||
return &v
|
||||
case "float64":
|
||||
var v float64
|
||||
return &v
|
||||
case "bool":
|
||||
var v bool
|
||||
return &v
|
||||
case "time.Time":
|
||||
return new(any) // let driver fill, deref below handles it
|
||||
case "map[string]string":
|
||||
var v map[string]string
|
||||
return &v
|
||||
case "[]string":
|
||||
var v []string
|
||||
return &v
|
||||
default:
|
||||
// Fallback: untyped pointer; driver decides.
|
||||
var v any
|
||||
return &v
|
||||
}
|
||||
}
|
||||
|
||||
func derefScanTarget(p any) any {
|
||||
switch v := p.(type) {
|
||||
case *string:
|
||||
return *v
|
||||
case *uint8:
|
||||
return *v
|
||||
case *uint16:
|
||||
return *v
|
||||
case *uint32:
|
||||
return *v
|
||||
case *uint64:
|
||||
return *v
|
||||
case *int32:
|
||||
return *v
|
||||
case *int64:
|
||||
return *v
|
||||
case *float32:
|
||||
return *v
|
||||
case *float64:
|
||||
return *v
|
||||
case *bool:
|
||||
return *v
|
||||
case *map[string]string:
|
||||
return *v
|
||||
case *[]string:
|
||||
return *v
|
||||
case *any:
|
||||
return *v
|
||||
default:
|
||||
return v
|
||||
}
|
||||
}
|
||||
28
data-layer/api/internal/repo/pool.go
Normal file
28
data-layer/api/internal/repo/pool.go
Normal file
@@ -0,0 +1,28 @@
|
||||
// Package repo holds data-access code. PostgreSQL handles owned tables
|
||||
// (trait_definitions, profile_traits, segment_*, saved_queries) and read-only
|
||||
// joins onto ingestion-owned tables (workspaces, profiles, sources, ...).
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// NewPool returns a pgxpool ready for use. Caller owns Close().
|
||||
func NewPool(ctx context.Context, dsn string) (*pgxpool.Pool, error) {
|
||||
cfg, err := pgxpool.ParseConfig(dsn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse pg dsn: %w", err)
|
||||
}
|
||||
pool, err := pgxpool.NewWithConfig(ctx, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open pg pool: %w", err)
|
||||
}
|
||||
if err := pool.Ping(ctx); err != nil {
|
||||
pool.Close()
|
||||
return nil, fmt.Errorf("ping pg: %w", err)
|
||||
}
|
||||
return pool, nil
|
||||
}
|
||||
70
data-layer/api/internal/repo/profile_repo.go
Normal file
70
data-layer/api/internal/repo/profile_repo.go
Normal file
@@ -0,0 +1,70 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
)
|
||||
|
||||
// ProfileRepo reads the unified-profile table owned by cdp-ingestion.
|
||||
//
|
||||
// Assumed schema (TODO: align with cdp-ingestion once that migration lands):
|
||||
//
|
||||
// profiles (
|
||||
// id UUID,
|
||||
// workspace_id UUID,
|
||||
// user_id TEXT,
|
||||
// anonymous_ids TEXT[],
|
||||
// traits JSONB,
|
||||
// first_seen_at TIMESTAMPTZ,
|
||||
// last_seen_at TIMESTAMPTZ
|
||||
// )
|
||||
type ProfileRepo struct {
|
||||
pg *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewProfileRepo(pg *pgxpool.Pool) *ProfileRepo { return &ProfileRepo{pg: pg} }
|
||||
|
||||
const selectProfileByID = `
|
||||
SELECT id, workspace_id, user_id, anonymous_ids, traits, first_seen_at, last_seen_at
|
||||
FROM profiles
|
||||
WHERE workspace_id = $1 AND id = $2
|
||||
`
|
||||
|
||||
func (r *ProfileRepo) GetByID(ctx context.Context, workspaceID, profileID string) (*model.Profile, error) {
|
||||
row := r.pg.QueryRow(ctx, selectProfileByID, workspaceID, profileID)
|
||||
var p model.Profile
|
||||
var traitsRaw []byte
|
||||
if err := row.Scan(&p.ID, &p.WorkspaceID, &p.UserID, &p.AnonymousIDs, &traitsRaw, &p.FirstSeenAt, &p.LastSeenAt); err != nil {
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return nil, apperr.NotFound("profile not found")
|
||||
}
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
if len(traitsRaw) > 0 {
|
||||
if err := json.Unmarshal(traitsRaw, &p.Traits); err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
}
|
||||
return &p, nil
|
||||
}
|
||||
|
||||
// GetUserIDForProfile resolves a profile UUID back to its primary user_id so
|
||||
// the timeline query can target ClickHouse events on that key.
|
||||
func (r *ProfileRepo) GetUserIDForProfile(ctx context.Context, workspaceID, profileID string) (string, error) {
|
||||
const q = `SELECT user_id FROM profiles WHERE workspace_id = $1 AND id = $2`
|
||||
var uid string
|
||||
if err := r.pg.QueryRow(ctx, q, workspaceID, profileID).Scan(&uid); err != nil {
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return "", apperr.NotFound("profile not found")
|
||||
}
|
||||
return "", apperr.Internal(err)
|
||||
}
|
||||
return uid, nil
|
||||
}
|
||||
120
data-layer/api/internal/repo/saved_query_repo.go
Normal file
120
data-layer/api/internal/repo/saved_query_repo.go
Normal file
@@ -0,0 +1,120 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
)
|
||||
|
||||
type SavedQueryRepo struct {
|
||||
pg *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewSavedQueryRepo(pg *pgxpool.Pool) *SavedQueryRepo { return &SavedQueryRepo{pg: pg} }
|
||||
|
||||
const (
|
||||
insertSavedQuery = `
|
||||
INSERT INTO saved_queries (workspace_id, owner_id, name, kind, spec)
|
||||
VALUES ($1, NULLIF($2, '')::uuid, $3, $4, $5)
|
||||
RETURNING id, workspace_id, COALESCE(owner_id::text, '') AS owner_id, name, kind, spec, created_at, updated_at
|
||||
`
|
||||
selectSavedQueries = `
|
||||
SELECT id, workspace_id, COALESCE(owner_id::text, '') AS owner_id, name, kind, spec, created_at, updated_at
|
||||
FROM saved_queries
|
||||
WHERE workspace_id = $1
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT $2 OFFSET $3
|
||||
`
|
||||
selectSavedQuery = `
|
||||
SELECT id, workspace_id, COALESCE(owner_id::text, '') AS owner_id, name, kind, spec, created_at, updated_at
|
||||
FROM saved_queries
|
||||
WHERE workspace_id = $1 AND id = $2
|
||||
`
|
||||
updateSavedQuery = `
|
||||
UPDATE saved_queries
|
||||
SET name = $3, spec = $4, updated_at = now()
|
||||
WHERE workspace_id = $1 AND id = $2
|
||||
RETURNING id, workspace_id, COALESCE(owner_id::text, '') AS owner_id, name, kind, spec, created_at, updated_at
|
||||
`
|
||||
deleteSavedQuery = `DELETE FROM saved_queries WHERE workspace_id = $1 AND id = $2`
|
||||
)
|
||||
|
||||
func (r *SavedQueryRepo) Create(ctx context.Context, q model.SavedQuery) (*model.SavedQuery, error) {
|
||||
spec, err := json.Marshal(q.Spec)
|
||||
if err != nil {
|
||||
return nil, apperr.BadRequest("spec must be valid json", "spec", err)
|
||||
}
|
||||
row := r.pg.QueryRow(ctx, insertSavedQuery, q.WorkspaceID, q.OwnerID, q.Name, q.Kind, spec)
|
||||
return scanSavedQuery(row)
|
||||
}
|
||||
|
||||
func (r *SavedQueryRepo) List(ctx context.Context, workspaceID string, limit, offset int) ([]model.SavedQuery, error) {
|
||||
rows, err := r.pg.Query(ctx, selectSavedQueries, workspaceID, limit, offset)
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := []model.SavedQuery{}
|
||||
for rows.Next() {
|
||||
q, err := scanSavedQuery(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, *q)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (r *SavedQueryRepo) Get(ctx context.Context, workspaceID, id string) (*model.SavedQuery, error) {
|
||||
row := r.pg.QueryRow(ctx, selectSavedQuery, workspaceID, id)
|
||||
return scanSavedQuery(row)
|
||||
}
|
||||
|
||||
func (r *SavedQueryRepo) Update(ctx context.Context, workspaceID, id, name string, spec map[string]any) (*model.SavedQuery, error) {
|
||||
specJSON, err := json.Marshal(spec)
|
||||
if err != nil {
|
||||
return nil, apperr.BadRequest("spec must be valid json", "spec", err)
|
||||
}
|
||||
row := r.pg.QueryRow(ctx, updateSavedQuery, workspaceID, id, name, specJSON)
|
||||
return scanSavedQuery(row)
|
||||
}
|
||||
|
||||
func (r *SavedQueryRepo) Delete(ctx context.Context, workspaceID, id string) error {
|
||||
ct, err := r.pg.Exec(ctx, deleteSavedQuery, workspaceID, id)
|
||||
if err != nil {
|
||||
return apperr.Internal(err)
|
||||
}
|
||||
if ct.RowsAffected() == 0 {
|
||||
return apperr.NotFound("saved query not found")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// scanSavedQuery accepts both pgx.Row and pgx.Rows (they share Scan).
|
||||
type scanner interface {
|
||||
Scan(dest ...any) error
|
||||
}
|
||||
|
||||
func scanSavedQuery(s scanner) (*model.SavedQuery, error) {
|
||||
var q model.SavedQuery
|
||||
var specRaw []byte
|
||||
if err := s.Scan(&q.ID, &q.WorkspaceID, &q.OwnerID, &q.Name, &q.Kind, &specRaw, &q.CreatedAt, &q.UpdatedAt); err != nil {
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return nil, apperr.NotFound("saved query not found")
|
||||
}
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
if len(specRaw) > 0 {
|
||||
if err := json.Unmarshal(specRaw, &q.Spec); err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
}
|
||||
return &q, nil
|
||||
}
|
||||
0
data-layer/api/internal/service/.gitkeep
Normal file
0
data-layer/api/internal/service/.gitkeep
Normal file
66
data-layer/api/internal/service/profile_service.go
Normal file
66
data-layer/api/internal/service/profile_service.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/cache"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/repo"
|
||||
)
|
||||
|
||||
type ProfileService struct {
|
||||
profiles *repo.ProfileRepo
|
||||
events *repo.EventRepo
|
||||
cache *cache.Cache
|
||||
profileTTL time.Duration
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewProfileService(p *repo.ProfileRepo, e *repo.EventRepo, c *cache.Cache, profileTTL time.Duration, log *zap.Logger) *ProfileService {
|
||||
return &ProfileService{profiles: p, events: e, cache: c, profileTTL: profileTTL, log: log}
|
||||
}
|
||||
|
||||
func (s *ProfileService) Get(ctx context.Context, workspaceID, profileID string) (*model.Profile, error) {
|
||||
key, err := cache.Key("profile", workspaceID, profileID)
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
if b, ok := s.cache.Get(ctx, key); ok {
|
||||
var p model.Profile
|
||||
if jerr := json.Unmarshal(b, &p); jerr == nil {
|
||||
return &p, nil
|
||||
}
|
||||
}
|
||||
p, err := s.profiles.GetByID(ctx, workspaceID, profileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b, jerr := json.Marshal(p); jerr == nil {
|
||||
if cerr := s.cache.Set(ctx, key, b, s.profileTTL); cerr != nil {
|
||||
s.log.Warn("cache set", zap.String("key", key), zap.Error(cerr))
|
||||
}
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func (s *ProfileService) Timeline(ctx context.Context, workspaceID, profileID string, limit, offset int) (*model.QueryResult, error) {
|
||||
uid, err := s.profiles.GetUserIDForProfile(ctx, workspaceID, profileID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if uid == "" {
|
||||
return nil, apperr.NotFound("profile has no user_id and cannot be timelined")
|
||||
}
|
||||
start := time.Now()
|
||||
res, err := s.events.QueryProfileTimeline(ctx, workspaceID, uid, limit, offset)
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
res.DurationMS = time.Since(start).Milliseconds()
|
||||
return res, nil
|
||||
}
|
||||
87
data-layer/api/internal/service/query_service.go
Normal file
87
data-layer/api/internal/service/query_service.go
Normal file
@@ -0,0 +1,87 @@
|
||||
// Package service holds business logic. It owns cache orchestration around
|
||||
// the read repos and never touches HTTP/chi or the SQL drivers directly.
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/cache"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/repo"
|
||||
)
|
||||
|
||||
type QueryService struct {
|
||||
events *repo.EventRepo
|
||||
analytics *repo.AnalyticsRepo
|
||||
cache *cache.Cache
|
||||
queryTTL time.Duration
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewQueryService(events *repo.EventRepo, analytics *repo.AnalyticsRepo, c *cache.Cache, queryTTL time.Duration, log *zap.Logger) *QueryService {
|
||||
return &QueryService{events: events, analytics: analytics, cache: c, queryTTL: queryTTL, log: log}
|
||||
}
|
||||
|
||||
// cached wraps `fetch` with the per-workspace Redis cache. Result is JSON-
|
||||
// encoded on miss; CacheHit is set true on hit.
|
||||
func (s *QueryService) cached(
|
||||
ctx context.Context,
|
||||
kind, workspaceID string,
|
||||
params any,
|
||||
fetch func(context.Context) (*model.QueryResult, error),
|
||||
) (*model.QueryResult, error) {
|
||||
key, err := cache.Key(kind, workspaceID, params)
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
if cached, ok := s.cache.Get(ctx, key); ok {
|
||||
var out model.QueryResult
|
||||
if jerr := json.Unmarshal(cached, &out); jerr == nil {
|
||||
out.CacheHit = true
|
||||
return &out, nil
|
||||
}
|
||||
}
|
||||
start := time.Now()
|
||||
res, err := fetch(ctx)
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
res.DurationMS = time.Since(start).Milliseconds()
|
||||
res.CacheHit = false
|
||||
|
||||
if b, jerr := json.Marshal(res); jerr == nil {
|
||||
if cerr := s.cache.Set(ctx, key, b, s.queryTTL); cerr != nil {
|
||||
s.log.Warn("cache set", zap.String("key", key), zap.Error(cerr))
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (s *QueryService) Events(ctx context.Context, q model.EventQuery) (*model.QueryResult, error) {
|
||||
return s.cached(ctx, "query:events", q.WorkspaceID, q, func(c context.Context) (*model.QueryResult, error) {
|
||||
return s.events.QueryEvents(c, q)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *QueryService) Funnel(ctx context.Context, q repo.FunnelQuery) (*model.QueryResult, error) {
|
||||
return s.cached(ctx, "query:funnel", q.WorkspaceID, q, func(c context.Context) (*model.QueryResult, error) {
|
||||
return s.analytics.Funnel(c, q)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *QueryService) Retention(ctx context.Context, q repo.RetentionQuery) (*model.QueryResult, error) {
|
||||
return s.cached(ctx, "query:retention", q.WorkspaceID, q, func(c context.Context) (*model.QueryResult, error) {
|
||||
return s.analytics.Retention(c, q)
|
||||
})
|
||||
}
|
||||
|
||||
func (s *QueryService) Sessions(ctx context.Context, q repo.SessionQuery) (*model.QueryResult, error) {
|
||||
return s.cached(ctx, "query:session", q.WorkspaceID, q, func(c context.Context) (*model.QueryResult, error) {
|
||||
return s.analytics.Sessions(c, q)
|
||||
})
|
||||
}
|
||||
98
data-layer/api/internal/service/sql_service.go
Normal file
98
data-layer/api/internal/service/sql_service.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
||||
"github.com/dbiz/cdp/data-layer/api/internal/repo"
|
||||
)
|
||||
|
||||
// SQLService backs the Custom SQL sandbox. It applies two layers of guard:
|
||||
// 1. App-level: parse the statement, reject anything that is not a single
|
||||
// SELECT and anything containing DDL/DML keywords.
|
||||
// 2. DB-level: queries run against a SELECT-only ClickHouse account so the
|
||||
// server rejects writes even if app-level checks are bypassed.
|
||||
type SQLService struct {
|
||||
ch driver.Conn // read-only conn
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewSQLService(roConn driver.Conn, log *zap.Logger) *SQLService {
|
||||
return &SQLService{ch: roConn, log: log}
|
||||
}
|
||||
|
||||
var forbiddenKeywords = []string{
|
||||
"INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER", "TRUNCATE",
|
||||
"GRANT", "REVOKE", "ATTACH", "DETACH", "OPTIMIZE", "RENAME", "EXCHANGE",
|
||||
}
|
||||
|
||||
// validateReadOnly rejects multi-statement input and obvious DDL/DML.
|
||||
func validateReadOnly(sql string) error {
|
||||
trimmed := strings.TrimSpace(sql)
|
||||
if trimmed == "" {
|
||||
return apperr.BadRequest("sql is empty", "sql", nil)
|
||||
}
|
||||
// Reject multiple statements -- the ClickHouse driver also rejects this,
|
||||
// but we want a friendly error before hitting the wire.
|
||||
if strings.Contains(strings.TrimRight(trimmed, ";"), ";") {
|
||||
return apperr.BadRequest("only a single statement is allowed", "sql", nil)
|
||||
}
|
||||
upper := strings.ToUpper(trimmed)
|
||||
if !strings.HasPrefix(upper, "SELECT") && !strings.HasPrefix(upper, "WITH") {
|
||||
return apperr.BadRequest("only SELECT statements are allowed", "sql", nil)
|
||||
}
|
||||
// Token-level keyword scan: \bKW\b to avoid false positives like "created_at".
|
||||
for _, kw := range forbiddenKeywords {
|
||||
if hasWord(upper, kw) {
|
||||
return apperr.BadRequest("statement contains forbidden keyword: "+kw, "sql", nil)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasWord(s, word string) bool {
|
||||
for {
|
||||
idx := strings.Index(s, word)
|
||||
if idx < 0 {
|
||||
return false
|
||||
}
|
||||
left := idx == 0 || !isIdent(s[idx-1])
|
||||
right := idx+len(word) == len(s) || !isIdent(s[idx+len(word)])
|
||||
if left && right {
|
||||
return true
|
||||
}
|
||||
s = s[idx+len(word):]
|
||||
}
|
||||
}
|
||||
|
||||
func isIdent(c byte) bool {
|
||||
return c == '_' || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
||||
}
|
||||
|
||||
// Run executes the (validated) SQL against the read-only ClickHouse user.
|
||||
// Results are never cached -- queries are arbitrary.
|
||||
func (s *SQLService) Run(ctx context.Context, sql string) (*model.QueryResult, error) {
|
||||
if err := validateReadOnly(sql); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
start := time.Now()
|
||||
rows, err := s.ch.Query(ctx, sql)
|
||||
if err != nil {
|
||||
// ClickHouse syntax / permission errors are user-visible, not 500.
|
||||
return nil, apperr.BadRequest("clickhouse rejected query: "+err.Error(), "sql", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
res, err := repo.ScanRows(rows)
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(err)
|
||||
}
|
||||
res.DurationMS = time.Since(start).Milliseconds()
|
||||
return res, nil
|
||||
}
|
||||
65
data-layer/api/internal/templates/templates.go
Normal file
65
data-layer/api/internal/templates/templates.go
Normal file
@@ -0,0 +1,65 @@
|
||||
// Package templates loads ClickHouse SQL templates from disk. Templates are
|
||||
// rendered via text/template so we can interpolate validated structural bits
|
||||
// (e.g. which event table to read from); value parameters are bound via
|
||||
// clickhouse.Named at call site rather than rendered.
|
||||
package templates
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
type Store struct {
|
||||
dir string
|
||||
mu sync.RWMutex
|
||||
cache map[string]*template.Template
|
||||
}
|
||||
|
||||
func New(dir string) *Store {
|
||||
return &Store{dir: dir, cache: map[string]*template.Template{}}
|
||||
}
|
||||
|
||||
// Render loads `name` (with a `.sql.tmpl` suffix appended if not given) and
|
||||
// renders it against `data`. Templates are parsed once and cached.
|
||||
func (s *Store) Render(name string, data any) (string, error) {
|
||||
tpl, err := s.load(name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := tpl.Execute(&buf, data); err != nil {
|
||||
return "", fmt.Errorf("render %s: %w", name, err)
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
func (s *Store) load(name string) (*template.Template, error) {
|
||||
if !strings.HasSuffix(name, ".sql") && !strings.HasSuffix(name, ".sql.tmpl") {
|
||||
name += ".sql.tmpl"
|
||||
}
|
||||
s.mu.RLock()
|
||||
if t, ok := s.cache[name]; ok {
|
||||
s.mu.RUnlock()
|
||||
return t, nil
|
||||
}
|
||||
s.mu.RUnlock()
|
||||
|
||||
path := filepath.Join(s.dir, name)
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read template %s: %w", path, err)
|
||||
}
|
||||
t, err := template.New(name).Parse(string(raw))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse template %s: %w", path, err)
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.cache[name] = t
|
||||
s.mu.Unlock()
|
||||
return t, nil
|
||||
}
|
||||
Reference in New Issue
Block a user