init ingestion
This commit is contained in:
79
ingestion/ingest/internal/apperr/apperr.go
Normal file
79
ingestion/ingest/internal/apperr/apperr.go
Normal file
@@ -0,0 +1,79 @@
|
||||
// Package apperr defines AppError, the single error type returned by every
|
||||
// service/repo function. Handlers translate AppError into HTTP responses.
|
||||
package apperr
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type AppError struct {
|
||||
Code int // HTTP status to return
|
||||
Message string // user-facing message (safe to expose)
|
||||
Field string // optional: which field caused the error
|
||||
RetryAfter int // seconds, for 429
|
||||
Err error // original error for logging (never exposed)
|
||||
}
|
||||
|
||||
func (e *AppError) Error() string {
|
||||
if e.Err != nil {
|
||||
return fmt.Sprintf("%s: %v", e.Message, e.Err)
|
||||
}
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func (e *AppError) Unwrap() error { return e.Err }
|
||||
|
||||
// As reports whether err is or wraps an *AppError.
|
||||
func As(err error) (*AppError, bool) {
|
||||
var ae *AppError
|
||||
if errors.As(err, &ae) {
|
||||
return ae, true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constructors
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func BadRequest(msg, field string, err error) *AppError {
|
||||
return &AppError{Code: http.StatusBadRequest, Message: msg, Field: field, Err: err}
|
||||
}
|
||||
|
||||
func Unauthorized(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusUnauthorized, Message: msg}
|
||||
}
|
||||
|
||||
func Forbidden(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusForbidden, Message: msg}
|
||||
}
|
||||
|
||||
func NotFound(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusNotFound, Message: msg}
|
||||
}
|
||||
|
||||
func Conflict(msg string, err error) *AppError {
|
||||
return &AppError{Code: http.StatusConflict, Message: msg, Err: err}
|
||||
}
|
||||
|
||||
func PayloadTooLarge(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusRequestEntityTooLarge, Message: msg}
|
||||
}
|
||||
|
||||
func UnprocessableEntity(msg string) *AppError {
|
||||
return &AppError{Code: http.StatusUnprocessableEntity, Message: msg}
|
||||
}
|
||||
|
||||
func TooManyRequests(retryAfterSeconds int) *AppError {
|
||||
return &AppError{
|
||||
Code: http.StatusTooManyRequests,
|
||||
Message: "rate limit exceeded",
|
||||
RetryAfter: retryAfterSeconds,
|
||||
}
|
||||
}
|
||||
|
||||
func Internal(err error) *AppError {
|
||||
return &AppError{Code: http.StatusInternalServerError, Message: "internal server error", Err: err}
|
||||
}
|
||||
41
ingestion/ingest/internal/config/config.go
Normal file
41
ingestion/ingest/internal/config/config.go
Normal file
@@ -0,0 +1,41 @@
|
||||
// Package config loads runtime configuration from environment variables.
|
||||
//
|
||||
// All env vars are prefixed with INGEST_ except shared infra ones
|
||||
// (POSTGRES_DSN, REDIS_ADDR, KAFKA_BROKERS).
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/caarlos0/env/v11"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
HTTPAddr string `env:"INGEST_HTTP_ADDR" envDefault:":3049"`
|
||||
LogLevel string `env:"INGEST_LOG_LEVEL" envDefault:"info"`
|
||||
PayloadLimitKB int `env:"INGEST_PAYLOAD_LIMIT_KB" envDefault:"100"`
|
||||
BatchLimitKB int `env:"INGEST_BATCH_LIMIT_KB" envDefault:"4000"`
|
||||
LateEventHours int `env:"INGEST_LATE_EVENT_HOURS" envDefault:"24"`
|
||||
DedupTTLHours int `env:"INGEST_DEDUP_TTL_HOURS" envDefault:"24"`
|
||||
WriteKeyCacheTTL time.Duration `env:"INGEST_WRITE_KEY_CACHE_TTL_SECONDS" envDefault:"45s"`
|
||||
ShutdownTimeout time.Duration `env:"INGEST_SHUTDOWN_TIMEOUT_SECONDS" envDefault:"30s"`
|
||||
LogPayloadOnSuccess bool `env:"INGEST_LOG_PAYLOAD_ON_SUCCESS" envDefault:"false"`
|
||||
LogPayloadOnError bool `env:"INGEST_LOG_PAYLOAD_ON_ERROR" envDefault:"true"`
|
||||
|
||||
PostgresDSN string `env:"POSTGRES_DSN,required"`
|
||||
RedisAddr string `env:"REDIS_ADDR" envDefault:"localhost:6379"`
|
||||
|
||||
KafkaBrokers []string `env:"KAFKA_BROKERS" envSeparator:"," envDefault:"localhost:9092"`
|
||||
KafkaTopicIngest string `env:"KAFKA_TOPIC_INGEST" envDefault:"events.ingest"`
|
||||
KafkaTopicDLQ string `env:"KAFKA_TOPIC_DLQ" envDefault:"events.dlq"`
|
||||
KafkaTopicRetry string `env:"KAFKA_TOPIC_RETRY" envDefault:"events.retry"`
|
||||
}
|
||||
|
||||
func Load() (*Config, error) {
|
||||
cfg := &Config{}
|
||||
if err := env.Parse(cfg); err != nil {
|
||||
return nil, fmt.Errorf("config load: %w", err)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
50
ingestion/ingest/internal/dedup/dedup.go
Normal file
50
ingestion/ingest/internal/dedup/dedup.go
Normal file
@@ -0,0 +1,50 @@
|
||||
// Package dedup provides idempotent event acceptance via Redis SETNX.
|
||||
//
|
||||
// Key shape: dedup:{workspace_id}:{message_id}
|
||||
// TTL: 24h by default (configurable)
|
||||
//
|
||||
// CheckAndSet returns true when the message_id is new (first time seen).
|
||||
// If it returns false the caller MUST drop the event silently and return 200.
|
||||
package dedup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/redis/rueidis"
|
||||
)
|
||||
|
||||
type Dedup interface {
|
||||
CheckAndSet(ctx context.Context, workspaceID, messageID string) (bool, error)
|
||||
}
|
||||
|
||||
type redisDedup struct {
|
||||
client rueidis.Client
|
||||
ttl time.Duration
|
||||
}
|
||||
|
||||
func New(client rueidis.Client, ttl time.Duration) Dedup {
|
||||
return &redisDedup{client: client, ttl: ttl}
|
||||
}
|
||||
|
||||
func key(workspaceID, messageID string) string {
|
||||
return fmt.Sprintf("dedup:%s:%s", workspaceID, messageID)
|
||||
}
|
||||
|
||||
func (d *redisDedup) CheckAndSet(ctx context.Context, workspaceID, messageID string) (bool, error) {
|
||||
k := key(workspaceID, messageID)
|
||||
cmd := d.client.B().Set().Key(k).Value("1").
|
||||
Nx().
|
||||
Ex(d.ttl).
|
||||
Build()
|
||||
resp := d.client.Do(ctx, cmd)
|
||||
if err := resp.Error(); err != nil {
|
||||
return false, fmt.Errorf("dedup setnx: %w", err)
|
||||
}
|
||||
// SET with NX returns "OK" when set, nil reply when key already exists.
|
||||
if resp.IsNil() {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
209
ingestion/ingest/internal/handler/handler.go
Normal file
209
ingestion/ingest/internal/handler/handler.go
Normal file
@@ -0,0 +1,209 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-playground/validator/v10"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/middleware"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/service"
|
||||
)
|
||||
|
||||
type EventHandler struct {
|
||||
svc *service.IngestService
|
||||
val *validator.Validate
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func NewEventHandler(svc *service.IngestService, log *zap.Logger) *EventHandler {
|
||||
return &EventHandler{
|
||||
svc: svc,
|
||||
val: validator.New(),
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Routes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Single-event endpoints. They differ only in the `type` they force on the
|
||||
// body, so they all funnel into one handler.
|
||||
|
||||
func (h *EventHandler) Track(w http.ResponseWriter, r *http.Request) {
|
||||
h.handleSingle(w, r, model.EventTypeTrack)
|
||||
}
|
||||
|
||||
func (h *EventHandler) Identify(w http.ResponseWriter, r *http.Request) {
|
||||
h.handleSingle(w, r, model.EventTypeIdentify)
|
||||
}
|
||||
|
||||
func (h *EventHandler) Page(w http.ResponseWriter, r *http.Request) {
|
||||
h.handleSingle(w, r, model.EventTypePage)
|
||||
}
|
||||
|
||||
func (h *EventHandler) Group(w http.ResponseWriter, r *http.Request) {
|
||||
h.handleSingle(w, r, model.EventTypeGroup)
|
||||
}
|
||||
|
||||
func (h *EventHandler) Alias(w http.ResponseWriter, r *http.Request) {
|
||||
h.handleSingle(w, r, model.EventTypeAlias)
|
||||
}
|
||||
|
||||
func (h *EventHandler) Screen(w http.ResponseWriter, r *http.Request) {
|
||||
h.handleSingle(w, r, model.EventTypeScreen)
|
||||
}
|
||||
|
||||
func (h *EventHandler) Batch(w http.ResponseWriter, r *http.Request) {
|
||||
body, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
h.writeErr(w, apperr.PayloadTooLarge("payload too large"))
|
||||
return
|
||||
}
|
||||
|
||||
var env model.BatchEnvelope
|
||||
if err := json.Unmarshal(body, &env); err != nil {
|
||||
h.writeErr(w, apperr.BadRequest("invalid json body", "", err))
|
||||
return
|
||||
}
|
||||
if err := h.val.Struct(&env); err != nil {
|
||||
h.writeErr(w, apperr.BadRequest("validation failed", firstField(err), err))
|
||||
return
|
||||
}
|
||||
|
||||
ictx := h.ingestCtx(r, body)
|
||||
results := h.svc.IngestBatch(r.Context(), ictx, env.Batch)
|
||||
|
||||
// Per-event status -- 200 OK, with an array of {message_id, ok, error}.
|
||||
type item struct {
|
||||
MessageID string `json:"messageId"`
|
||||
OK bool `json:"ok"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Field string `json:"field,omitempty"`
|
||||
}
|
||||
out := make([]item, len(env.Batch))
|
||||
for i, e := range env.Batch {
|
||||
it := item{MessageID: e.MessageID, OK: true}
|
||||
if results[i] != nil {
|
||||
it.OK = false
|
||||
if ae, ok := apperr.As(results[i]); ok {
|
||||
it.Error = ae.Message
|
||||
it.Field = ae.Field
|
||||
} else {
|
||||
it.Error = "internal error"
|
||||
}
|
||||
}
|
||||
out[i] = it
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{"results": out})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Health / Ready
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func (h *EventHandler) Health(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
|
||||
}
|
||||
|
||||
func (h *EventHandler) Ready(w http.ResponseWriter, r *http.Request) {
|
||||
// Liveness is enough for k8s readiness in this scaffold. Wire in real
|
||||
// dependency checks (PG ping, Kafka ping) when needed.
|
||||
writeJSON(w, http.StatusOK, map[string]string{"status": "ready"})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// shared helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func (h *EventHandler) handleSingle(w http.ResponseWriter, r *http.Request, t model.EventType) {
|
||||
body, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
h.writeErr(w, apperr.PayloadTooLarge("payload too large"))
|
||||
return
|
||||
}
|
||||
|
||||
var raw model.RawEvent
|
||||
if err := json.NewDecoder(bytes.NewReader(body)).Decode(&raw); err != nil {
|
||||
h.writeErr(w, apperr.BadRequest("invalid json body", "", err))
|
||||
return
|
||||
}
|
||||
if raw.Type == "" {
|
||||
raw.Type = t
|
||||
}
|
||||
if err := h.val.Struct(&raw); err != nil {
|
||||
h.writeErr(w, apperr.BadRequest("validation failed", firstField(err), err))
|
||||
return
|
||||
}
|
||||
|
||||
ictx := h.ingestCtx(r, body)
|
||||
if err := h.svc.Ingest(r.Context(), ictx, &raw); err != nil {
|
||||
h.writeErr(w, err)
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]bool{"ok": true})
|
||||
}
|
||||
|
||||
func (h *EventHandler) ingestCtx(r *http.Request, body []byte) service.IngestContext {
|
||||
wk := middleware.WriteKeyFromCtx(r.Context())
|
||||
return service.IngestContext{
|
||||
WorkspaceID: wk.WorkspaceID,
|
||||
SourceID: wk.SourceID,
|
||||
IP: clientIP(r),
|
||||
UserAgent: r.UserAgent(),
|
||||
RawBody: body,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *EventHandler) writeErr(w http.ResponseWriter, err error) {
|
||||
if ae, ok := apperr.As(err); ok {
|
||||
if ae.RetryAfter > 0 {
|
||||
w.Header().Set("Retry-After", strconv.Itoa(ae.RetryAfter))
|
||||
}
|
||||
writeJSON(w, ae.Code, errorResponse{Error: ae.Message, Field: ae.Field})
|
||||
if ae.Err != nil {
|
||||
h.log.Warn("request error",
|
||||
zap.Int("code", ae.Code),
|
||||
zap.String("msg", ae.Message),
|
||||
zap.Error(ae.Err))
|
||||
}
|
||||
return
|
||||
}
|
||||
h.log.Error("unhandled error", zap.Error(err))
|
||||
writeJSON(w, http.StatusInternalServerError, errorResponse{Error: "internal server error"})
|
||||
}
|
||||
|
||||
type errorResponse struct {
|
||||
Error string `json:"error"`
|
||||
Field string `json:"field,omitempty"`
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, status int, body any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_ = json.NewEncoder(w).Encode(body)
|
||||
}
|
||||
|
||||
func firstField(err error) string {
|
||||
var verrs validator.ValidationErrors
|
||||
if errors.As(err, &verrs) && len(verrs) > 0 {
|
||||
return verrs[0].Field()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// clientIP duplicates middleware.clientIP -- intentionally small, no shared types.
|
||||
func clientIP(r *http.Request) string {
|
||||
if h := r.Header.Get("X-Forwarded-For"); h != "" {
|
||||
return h
|
||||
}
|
||||
return r.RemoteAddr
|
||||
}
|
||||
110
ingestion/ingest/internal/kafka/producer.go
Normal file
110
ingestion/ingest/internal/kafka/producer.go
Normal file
@@ -0,0 +1,110 @@
|
||||
// Package kafka wraps franz-go for the ingest producer.
|
||||
//
|
||||
// Design notes:
|
||||
// - We use ProduceSync only for DLQ writes (rare; correctness > latency).
|
||||
// - Happy-path Produce is fire-and-forget: we return 200 OK before the
|
||||
// ack lands. franz-go buffers internally and retries.
|
||||
// - Partition key = anonymous_id for the happy topic so that all events
|
||||
// for a single visitor land on the same partition (ordering for stitching).
|
||||
package kafka
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/twmb/franz-go/pkg/kgo"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
)
|
||||
|
||||
type Producer struct {
|
||||
client *kgo.Client
|
||||
log *zap.Logger
|
||||
topicIngest string
|
||||
topicDLQ string
|
||||
topicRetry string
|
||||
}
|
||||
|
||||
func NewProducer(brokers []string, topicIngest, topicDLQ, topicRetry string, log *zap.Logger) (*Producer, error) {
|
||||
cl, err := kgo.NewClient(
|
||||
kgo.SeedBrokers(brokers...),
|
||||
kgo.ProducerLinger(5_000_000), // 5ms linger -> batch small bursts
|
||||
kgo.ProducerBatchCompression(kgo.ZstdCompression()),
|
||||
kgo.MaxBufferedRecords(100_000),
|
||||
kgo.RequiredAcks(kgo.LeaderAck()),
|
||||
kgo.ClientID("cdp-ingest"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kafka client: %w", err)
|
||||
}
|
||||
if err := cl.Ping(context.Background()); err != nil {
|
||||
cl.Close()
|
||||
return nil, fmt.Errorf("kafka ping: %w", err)
|
||||
}
|
||||
return &Producer{
|
||||
client: cl,
|
||||
log: log,
|
||||
topicIngest: topicIngest,
|
||||
topicDLQ: topicDLQ,
|
||||
topicRetry: topicRetry,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *Producer) Close() {
|
||||
p.client.Close()
|
||||
}
|
||||
|
||||
// Produce sends an event to the happy-path topic. Fire-and-forget.
|
||||
func (p *Producer) Produce(ctx context.Context, ev *model.IngestedEvent) error {
|
||||
payload, err := json.Marshal(ev)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal event: %w", err)
|
||||
}
|
||||
rec := &kgo.Record{
|
||||
Topic: p.topicIngest,
|
||||
Key: []byte(ev.PartitionKey()),
|
||||
Value: payload,
|
||||
Headers: []kgo.RecordHeader{
|
||||
{Key: "workspace_id", Value: []byte(ev.WorkspaceID)},
|
||||
{Key: "source_id", Value: []byte(ev.SourceID)},
|
||||
{Key: "type", Value: []byte(ev.Type)},
|
||||
},
|
||||
}
|
||||
p.client.Produce(ctx, rec, func(r *kgo.Record, err error) {
|
||||
if err != nil {
|
||||
p.log.Error("kafka produce failed",
|
||||
zap.String("topic", r.Topic),
|
||||
zap.String("message_id", ev.MessageID),
|
||||
zap.Error(err))
|
||||
}
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
// ProduceDLQ writes a failed event to the DLQ topic synchronously so we know
|
||||
// it landed before responding to the user with the error.
|
||||
func (p *Producer) ProduceDLQ(ctx context.Context, workspaceID, sourceID, messageID, reason, field string, raw []byte) error {
|
||||
envelope := map[string]any{
|
||||
"workspace_id": workspaceID,
|
||||
"source_id": sourceID,
|
||||
"message_id": messageID,
|
||||
"reason": reason,
|
||||
"field": field,
|
||||
"raw_payload": string(raw),
|
||||
}
|
||||
payload, _ := json.Marshal(envelope)
|
||||
rec := &kgo.Record{
|
||||
Topic: p.topicDLQ,
|
||||
Key: []byte(workspaceID),
|
||||
Value: payload,
|
||||
Headers: []kgo.RecordHeader{
|
||||
{Key: "reason", Value: []byte(reason)},
|
||||
},
|
||||
}
|
||||
if err := p.client.ProduceSync(ctx, rec).FirstErr(); err != nil {
|
||||
return fmt.Errorf("dlq produce: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
193
ingestion/ingest/internal/middleware/middleware.go
Normal file
193
ingestion/ingest/internal/middleware/middleware.go
Normal file
@@ -0,0 +1,193 @@
|
||||
// Package middleware provides chi-compatible HTTP middleware: auth, logging,
|
||||
// payload-limit, request-id, panic recovery, CORS.
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"net/http"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/service"
|
||||
)
|
||||
|
||||
type ctxKey string
|
||||
|
||||
const (
|
||||
ctxKeyRequestID ctxKey = "request_id"
|
||||
ctxKeyWriteKey ctxKey = "write_key"
|
||||
)
|
||||
|
||||
// RequestID assigns a uuid v4 to each request and stores it in context.
|
||||
func RequestID(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
id := r.Header.Get("X-Request-Id")
|
||||
if id == "" {
|
||||
id = uuid.NewString()
|
||||
}
|
||||
ctx := context.WithValue(r.Context(), ctxKeyRequestID, id)
|
||||
w.Header().Set("X-Request-Id", id)
|
||||
next.ServeHTTP(w, r.WithContext(ctx))
|
||||
})
|
||||
}
|
||||
|
||||
func RequestIDFromCtx(ctx context.Context) string {
|
||||
v, _ := ctx.Value(ctxKeyRequestID).(string)
|
||||
return v
|
||||
}
|
||||
|
||||
// Recover handles panics so a buggy handler can't take down the server.
|
||||
func Recover(log *zap.Logger) func(http.Handler) http.Handler {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
defer func() {
|
||||
if rec := recover(); rec != nil {
|
||||
log.Error("panic in handler",
|
||||
zap.Any("panic", rec),
|
||||
zap.String("path", r.URL.Path),
|
||||
zap.ByteString("stack", debug.Stack()))
|
||||
http.Error(w, `{"error":"internal server error"}`, http.StatusInternalServerError)
|
||||
}
|
||||
}()
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// PayloadLimit caps the request body size to limitKB kilobytes.
|
||||
func PayloadLimit(limitKB int) func(http.Handler) http.Handler {
|
||||
max := int64(limitKB) * 1024
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
r.Body = http.MaxBytesReader(w, r.Body, max)
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Logger logs one structured line per request.
|
||||
func Logger(log *zap.Logger) func(http.Handler) http.Handler {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
start := time.Now()
|
||||
rw := &statusRecorder{ResponseWriter: w, status: 200}
|
||||
next.ServeHTTP(rw, r)
|
||||
log.Info("http",
|
||||
zap.String("method", r.Method),
|
||||
zap.String("path", r.URL.Path),
|
||||
zap.Int("status", rw.status),
|
||||
zap.Int64("duration_ms", time.Since(start).Milliseconds()),
|
||||
zap.String("request_id", RequestIDFromCtx(r.Context())),
|
||||
zap.String("ip", clientIP(r)))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// CORS returns a permissive CORS handler. Browser SDKs (web tracker) require it.
|
||||
func CORS(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
|
||||
w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, X-Request-Id")
|
||||
w.Header().Set("Access-Control-Max-Age", "86400")
|
||||
if r.Method == http.MethodOptions {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return
|
||||
}
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// Auth resolves the write key from the request and stores it in context.
|
||||
// Accepts both `Authorization: Basic <base64(key:)>` (Segment-style) and
|
||||
// `Authorization: Bearer <key>`.
|
||||
func Auth(s *service.AuthService) func(http.Handler) http.Handler {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
key, err := extractWriteKey(r)
|
||||
if err != nil {
|
||||
writeAuthError(w, err)
|
||||
return
|
||||
}
|
||||
wk, err := s.Resolve(r.Context(), key)
|
||||
if err != nil {
|
||||
writeAuthError(w, err)
|
||||
return
|
||||
}
|
||||
ctx := context.WithValue(r.Context(), ctxKeyWriteKey, wk)
|
||||
next.ServeHTTP(w, r.WithContext(ctx))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// WriteKeyFromCtx returns the resolved key set by Auth middleware.
|
||||
func WriteKeyFromCtx(ctx context.Context) *model.WriteKey {
|
||||
v, _ := ctx.Value(ctxKeyWriteKey).(*model.WriteKey)
|
||||
return v
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func extractWriteKey(r *http.Request) (string, error) {
|
||||
h := r.Header.Get("Authorization")
|
||||
if h == "" {
|
||||
return "", apperr.Unauthorized("missing Authorization header")
|
||||
}
|
||||
if strings.HasPrefix(h, "Bearer ") {
|
||||
return strings.TrimPrefix(h, "Bearer "), nil
|
||||
}
|
||||
if strings.HasPrefix(h, "Basic ") {
|
||||
raw, err := base64.StdEncoding.DecodeString(strings.TrimPrefix(h, "Basic "))
|
||||
if err != nil {
|
||||
return "", apperr.Unauthorized("invalid basic auth")
|
||||
}
|
||||
// Segment uses `key:` (no password). Take everything before the first colon.
|
||||
s := string(raw)
|
||||
if i := strings.Index(s, ":"); i >= 0 {
|
||||
return s[:i], nil
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
return "", apperr.Unauthorized("unsupported auth scheme")
|
||||
}
|
||||
|
||||
func writeAuthError(w http.ResponseWriter, err error) {
|
||||
if ae, ok := apperr.As(err); ok {
|
||||
http.Error(w, `{"error":"`+ae.Message+`"}`, ae.Code)
|
||||
return
|
||||
}
|
||||
http.Error(w, `{"error":"unauthorized"}`, http.StatusUnauthorized)
|
||||
}
|
||||
|
||||
func clientIP(r *http.Request) string {
|
||||
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
|
||||
if i := strings.Index(xff, ","); i >= 0 {
|
||||
return strings.TrimSpace(xff[:i])
|
||||
}
|
||||
return strings.TrimSpace(xff)
|
||||
}
|
||||
if rip := r.Header.Get("X-Real-Ip"); rip != "" {
|
||||
return rip
|
||||
}
|
||||
return r.RemoteAddr
|
||||
}
|
||||
|
||||
type statusRecorder struct {
|
||||
http.ResponseWriter
|
||||
status int
|
||||
}
|
||||
|
||||
func (s *statusRecorder) WriteHeader(code int) {
|
||||
s.status = code
|
||||
s.ResponseWriter.WriteHeader(code)
|
||||
}
|
||||
|
||||
81
ingestion/ingest/internal/model/event.go
Normal file
81
ingestion/ingest/internal/model/event.go
Normal file
@@ -0,0 +1,81 @@
|
||||
// Package model holds the wire and domain types passed between layers.
|
||||
package model
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
)
|
||||
|
||||
// EventType is the Segment-compatible call kind.
|
||||
type EventType string
|
||||
|
||||
const (
|
||||
EventTypeTrack EventType = "track"
|
||||
EventTypeIdentify EventType = "identify"
|
||||
EventTypePage EventType = "page"
|
||||
EventTypeGroup EventType = "group"
|
||||
EventTypeAlias EventType = "alias"
|
||||
EventTypeScreen EventType = "screen"
|
||||
)
|
||||
|
||||
// RawEvent is the parsed-but-not-yet-validated payload from a client.
|
||||
// We keep Properties / Traits / Context as json.RawMessage so the handler can
|
||||
// pass them through to the service untouched; flattening happens in service.
|
||||
type RawEvent struct {
|
||||
Type EventType `json:"type" validate:"required,oneof=track identify page group alias screen"`
|
||||
MessageID string `json:"messageId" validate:"required,max=128"`
|
||||
AnonymousID string `json:"anonymousId" validate:"max=128"`
|
||||
UserID string `json:"userId" validate:"max=128"`
|
||||
GroupID string `json:"groupId" validate:"max=128"`
|
||||
Event string `json:"event" validate:"max=255"`
|
||||
Name string `json:"name" validate:"max=255"`
|
||||
Category string `json:"category" validate:"max=255"`
|
||||
Properties json.RawMessage `json:"properties"`
|
||||
Traits json.RawMessage `json:"traits"`
|
||||
Context json.RawMessage `json:"context"`
|
||||
Timestamp *time.Time `json:"timestamp"`
|
||||
SentAt *time.Time `json:"sentAt"`
|
||||
}
|
||||
|
||||
// BatchEnvelope is the body of /batch — Segment-compatible.
|
||||
type BatchEnvelope struct {
|
||||
Batch []RawEvent `json:"batch" validate:"required,min=1,max=1000,dive"`
|
||||
SentAt *time.Time `json:"sentAt"`
|
||||
Context json.RawMessage `json:"context"`
|
||||
}
|
||||
|
||||
// IngestedEvent is the canonical record we push onto Kafka. Flat fields,
|
||||
// timestamps already normalized, payload sanitized.
|
||||
type IngestedEvent struct {
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
SourceID string `json:"source_id"`
|
||||
MessageID string `json:"message_id"`
|
||||
Type EventType `json:"type"`
|
||||
AnonymousID string `json:"anonymous_id,omitempty"`
|
||||
UserID string `json:"user_id,omitempty"`
|
||||
GroupID string `json:"group_id,omitempty"`
|
||||
Event string `json:"event,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
Properties map[string]any `json:"properties,omitempty"`
|
||||
Traits map[string]any `json:"traits,omitempty"`
|
||||
Context map[string]any `json:"context,omitempty"`
|
||||
IP string `json:"ip,omitempty"`
|
||||
UserAgent string `json:"user_agent,omitempty"`
|
||||
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
SentAt time.Time `json:"sent_at"`
|
||||
ReceivedAt time.Time `json:"received_at"`
|
||||
}
|
||||
|
||||
// PartitionKey returns the key used for Kafka partitioning. We use
|
||||
// anonymous_id to keep identity-stitching ordering per visitor.
|
||||
func (e *IngestedEvent) PartitionKey() string {
|
||||
if e.AnonymousID != "" {
|
||||
return e.AnonymousID
|
||||
}
|
||||
if e.UserID != "" {
|
||||
return e.UserID
|
||||
}
|
||||
return e.MessageID
|
||||
}
|
||||
19
ingestion/ingest/internal/model/writekey.go
Normal file
19
ingestion/ingest/internal/model/writekey.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package model
|
||||
|
||||
import "time"
|
||||
|
||||
// WriteKey is the auth credential supplied via Authorization header.
|
||||
// We never store the raw value — only its sha256 hash and a short prefix
|
||||
// for display in the console.
|
||||
type WriteKey struct {
|
||||
ID string
|
||||
WorkspaceID string
|
||||
SourceID string
|
||||
KeyPrefix string
|
||||
Label string
|
||||
RevokedAt *time.Time
|
||||
LastUsedAt *time.Time
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
func (k *WriteKey) Revoked() bool { return k.RevokedAt != nil }
|
||||
102
ingestion/ingest/internal/ratelimit/ratelimit.go
Normal file
102
ingestion/ingest/internal/ratelimit/ratelimit.go
Normal file
@@ -0,0 +1,102 @@
|
||||
// Package ratelimit implements a Redis-backed sliding-window limiter.
|
||||
//
|
||||
// We use a sorted-set per workspace where the score is the unix-nano
|
||||
// timestamp. On each request we:
|
||||
// 1. ZREMRANGEBYSCORE -- evict entries older than window
|
||||
// 2. ZCARD -- count current
|
||||
// 3. if count < limit : ZADD + EXPIRE, allow
|
||||
// 4. else : compute retry-after from oldest entry, deny
|
||||
//
|
||||
// Steps 1-3/4 are wrapped in a Lua script for atomicity.
|
||||
package ratelimit
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/redis/rueidis"
|
||||
)
|
||||
|
||||
type Decision struct {
|
||||
Allowed bool
|
||||
Remaining int
|
||||
RetryAfterMS int
|
||||
}
|
||||
|
||||
type Limiter interface {
|
||||
Allow(ctx context.Context, workspaceID string, limit int, window time.Duration) (Decision, error)
|
||||
}
|
||||
|
||||
type redisLimiter struct {
|
||||
client rueidis.Client
|
||||
}
|
||||
|
||||
func New(client rueidis.Client) Limiter {
|
||||
return &redisLimiter{client: client}
|
||||
}
|
||||
|
||||
// Lua script: KEYS[1]=zset key, ARGV[1]=now_ms, ARGV[2]=window_ms,
|
||||
// ARGV[3]=limit, ARGV[4]=member (unique per request).
|
||||
//
|
||||
// Returns: {allowed (1/0), remaining, retry_after_ms}
|
||||
const slidingWindowLua = `
|
||||
local key = KEYS[1]
|
||||
local now = tonumber(ARGV[1])
|
||||
local window = tonumber(ARGV[2])
|
||||
local limit = tonumber(ARGV[3])
|
||||
local member = ARGV[4]
|
||||
local cutoff = now - window
|
||||
|
||||
redis.call('ZREMRANGEBYSCORE', key, 0, cutoff)
|
||||
local count = tonumber(redis.call('ZCARD', key))
|
||||
|
||||
if count < limit then
|
||||
redis.call('ZADD', key, now, member)
|
||||
redis.call('PEXPIRE', key, window)
|
||||
return {1, limit - count - 1, 0}
|
||||
end
|
||||
|
||||
local oldest = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
|
||||
local retry = window
|
||||
if oldest and oldest[2] then
|
||||
retry = (tonumber(oldest[2]) + window) - now
|
||||
if retry < 0 then retry = 0 end
|
||||
end
|
||||
return {0, 0, retry}
|
||||
`
|
||||
|
||||
func (l *redisLimiter) Allow(ctx context.Context, workspaceID string, limit int, window time.Duration) (Decision, error) {
|
||||
key := "rate:" + workspaceID
|
||||
now := time.Now().UnixMilli()
|
||||
member := strconv.FormatInt(now, 10) + ":" + workspaceID
|
||||
|
||||
cmd := l.client.B().Eval().Script(slidingWindowLua).
|
||||
Numkeys(1).
|
||||
Key(key).
|
||||
Arg(strconv.FormatInt(now, 10),
|
||||
strconv.FormatInt(window.Milliseconds(), 10),
|
||||
strconv.Itoa(limit),
|
||||
member).
|
||||
Build()
|
||||
|
||||
res := l.client.Do(ctx, cmd)
|
||||
if err := res.Error(); err != nil {
|
||||
return Decision{}, fmt.Errorf("ratelimit eval: %w", err)
|
||||
}
|
||||
|
||||
arr, err := res.ToArray()
|
||||
if err != nil || len(arr) != 3 {
|
||||
return Decision{}, fmt.Errorf("ratelimit bad reply: %w", err)
|
||||
}
|
||||
allowed, _ := arr[0].AsInt64()
|
||||
remaining, _ := arr[1].AsInt64()
|
||||
retry, _ := arr[2].AsInt64()
|
||||
|
||||
return Decision{
|
||||
Allowed: allowed == 1,
|
||||
Remaining: int(remaining),
|
||||
RetryAfterMS: int(retry),
|
||||
}, nil
|
||||
}
|
||||
33
ingestion/ingest/internal/repo/pool.go
Normal file
33
ingestion/ingest/internal/repo/pool.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// NewPool creates a pgxpool with sensible defaults for ingest workloads.
|
||||
// Pool size is small because ingest is mostly cache hits — Postgres is only
|
||||
// touched on cache miss (write key lookup, schema upsert).
|
||||
func NewPool(ctx context.Context, dsn string) (*pgxpool.Pool, error) {
|
||||
cfg, err := pgxpool.ParseConfig(dsn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse pg dsn: %w", err)
|
||||
}
|
||||
cfg.MaxConns = 16
|
||||
cfg.MinConns = 2
|
||||
cfg.MaxConnIdleTime = 5 * time.Minute
|
||||
cfg.HealthCheckPeriod = 30 * time.Second
|
||||
|
||||
pool, err := pgxpool.NewWithConfig(ctx, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pg connect: %w", err)
|
||||
}
|
||||
if err := pool.Ping(ctx); err != nil {
|
||||
pool.Close()
|
||||
return nil, fmt.Errorf("pg ping: %w", err)
|
||||
}
|
||||
return pool, nil
|
||||
}
|
||||
61
ingestion/ingest/internal/repo/schema_repo.go
Normal file
61
ingestion/ingest/internal/repo/schema_repo.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
)
|
||||
|
||||
// SchemaRepo records the data type observed for each (workspace, event_type, field)
|
||||
// triple. The bulker / analytics layer uses this to detect type conflicts.
|
||||
//
|
||||
// In the ingest hot path we only *check* for conflict via UpsertField; the
|
||||
// rebuild of the cached map is left to a background loader. We do not block
|
||||
// the request waiting for upsert -- it is fire-and-forget.
|
||||
type SchemaRepo interface {
|
||||
// GetType returns the recorded type, or "" if the field has never been seen.
|
||||
GetType(ctx context.Context, workspaceID, eventType, field string) (string, error)
|
||||
// UpsertField records a new (or re-confirmed) field type.
|
||||
UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error
|
||||
}
|
||||
|
||||
type schemaRepo struct {
|
||||
db *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewSchemaRepo(db *pgxpool.Pool) SchemaRepo {
|
||||
return &schemaRepo{db: db}
|
||||
}
|
||||
|
||||
func (r *schemaRepo) GetType(ctx context.Context, workspaceID, eventType, field string) (string, error) {
|
||||
const q = `
|
||||
SELECT data_type FROM schema_fields
|
||||
WHERE workspace_id = $1::uuid AND event_type = $2 AND field = $3`
|
||||
var t string
|
||||
err := r.db.QueryRow(ctx, q, workspaceID, eventType, field).Scan(&t)
|
||||
if err != nil {
|
||||
// pgx.ErrNoRows → return "" with nil error so caller treats as new field
|
||||
if err.Error() == "no rows in result set" {
|
||||
return "", nil
|
||||
}
|
||||
return "", apperr.Internal(fmt.Errorf("schema get: %w", err))
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func (r *schemaRepo) UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error {
|
||||
const q = `
|
||||
INSERT INTO schema_fields (workspace_id, event_type, field, data_type)
|
||||
VALUES ($1::uuid, $2, $3, $4)
|
||||
ON CONFLICT (workspace_id, event_type, field) DO UPDATE
|
||||
SET last_seen_at = now(),
|
||||
sample_count = schema_fields.sample_count + 1`
|
||||
_, err := r.db.Exec(ctx, q, workspaceID, eventType, field, dataType)
|
||||
if err != nil {
|
||||
return apperr.Internal(fmt.Errorf("schema upsert: %w", err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
66
ingestion/ingest/internal/repo/writekey_repo.go
Normal file
66
ingestion/ingest/internal/repo/writekey_repo.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
)
|
||||
|
||||
// WriteKeyRepo loads WriteKey records by their plaintext value.
|
||||
// The plaintext is hashed before the lookup; the DB only stores hashes.
|
||||
type WriteKeyRepo interface {
|
||||
FindByPlaintext(ctx context.Context, plaintext string) (*model.WriteKey, error)
|
||||
MarkUsed(ctx context.Context, id string) error
|
||||
}
|
||||
|
||||
type writeKeyRepo struct {
|
||||
db *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewWriteKeyRepo(db *pgxpool.Pool) WriteKeyRepo {
|
||||
return &writeKeyRepo{db: db}
|
||||
}
|
||||
|
||||
func hashKey(plaintext string) string {
|
||||
sum := sha256.Sum256([]byte(plaintext))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func (r *writeKeyRepo) FindByPlaintext(ctx context.Context, plaintext string) (*model.WriteKey, error) {
|
||||
const q = `
|
||||
SELECT id::text, workspace_id::text, source_id::text,
|
||||
key_prefix, COALESCE(label, ''),
|
||||
revoked_at, last_used_at, created_at
|
||||
FROM write_keys
|
||||
WHERE key_hash = $1`
|
||||
row := r.db.QueryRow(ctx, q, hashKey(plaintext))
|
||||
|
||||
var k model.WriteKey
|
||||
err := row.Scan(&k.ID, &k.WorkspaceID, &k.SourceID,
|
||||
&k.KeyPrefix, &k.Label,
|
||||
&k.RevokedAt, &k.LastUsedAt, &k.CreatedAt)
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return nil, apperr.Unauthorized("invalid write key")
|
||||
}
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(fmt.Errorf("writekey lookup: %w", err))
|
||||
}
|
||||
return &k, nil
|
||||
}
|
||||
|
||||
func (r *writeKeyRepo) MarkUsed(ctx context.Context, id string) error {
|
||||
const q = `UPDATE write_keys SET last_used_at = now() WHERE id = $1`
|
||||
_, err := r.db.Exec(ctx, q, id)
|
||||
if err != nil {
|
||||
return apperr.Internal(fmt.Errorf("writekey mark used: %w", err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
92
ingestion/ingest/internal/schema/flatten.go
Normal file
92
ingestion/ingest/internal/schema/flatten.go
Normal file
@@ -0,0 +1,92 @@
|
||||
// Package schema turns nested JSON objects into flat key/value maps and
|
||||
// classifies field types for conflict detection.
|
||||
//
|
||||
// Rules:
|
||||
// - keys are joined with "_" : {"a": {"b": 1}} -> {"a_b": 1}
|
||||
// - arrays are preserved as-is and not descended into
|
||||
// - keys are sanitized: lowercase, non-[a-z0-9_] replaced with "_"
|
||||
package schema
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type DataType string
|
||||
|
||||
const (
|
||||
TypeString DataType = "string"
|
||||
TypeNumber DataType = "number"
|
||||
TypeBoolean DataType = "boolean"
|
||||
TypeObject DataType = "object"
|
||||
TypeArray DataType = "array"
|
||||
TypeTimestamp DataType = "timestamp"
|
||||
TypeNull DataType = "null"
|
||||
)
|
||||
|
||||
// Flatten flattens nested objects under a snake_case prefix.
|
||||
// Returns a new map, never mutates input.
|
||||
func Flatten(in map[string]any) map[string]any {
|
||||
out := make(map[string]any, len(in))
|
||||
for k, v := range in {
|
||||
flattenInto(out, sanitize(k), v)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func flattenInto(out map[string]any, prefix string, v any) {
|
||||
switch x := v.(type) {
|
||||
case map[string]any:
|
||||
if len(x) == 0 {
|
||||
out[prefix] = x
|
||||
return
|
||||
}
|
||||
for k, child := range x {
|
||||
flattenInto(out, prefix+"_"+sanitize(k), child)
|
||||
}
|
||||
default:
|
||||
out[prefix] = v
|
||||
}
|
||||
}
|
||||
|
||||
// sanitize replaces characters outside [a-z0-9_] with "_" and lowercases.
|
||||
// Leading underscores are kept; trailing underscores are trimmed.
|
||||
func sanitize(k string) string {
|
||||
if k == "" {
|
||||
return k
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(k))
|
||||
for _, r := range k {
|
||||
switch {
|
||||
case unicode.IsLetter(r):
|
||||
b.WriteRune(unicode.ToLower(r))
|
||||
case unicode.IsDigit(r) || r == '_':
|
||||
b.WriteRune(r)
|
||||
default:
|
||||
b.WriteRune('_')
|
||||
}
|
||||
}
|
||||
return strings.TrimRight(b.String(), "_")
|
||||
}
|
||||
|
||||
// Classify maps a Go value (from json.Unmarshal) to a DataType.
|
||||
func Classify(v any) DataType {
|
||||
switch x := v.(type) {
|
||||
case nil:
|
||||
return TypeNull
|
||||
case bool:
|
||||
return TypeBoolean
|
||||
case float64, float32, int, int32, int64, uint, uint32, uint64:
|
||||
return TypeNumber
|
||||
case string:
|
||||
_ = x
|
||||
return TypeString
|
||||
case []any:
|
||||
return TypeArray
|
||||
case map[string]any:
|
||||
return TypeObject
|
||||
default:
|
||||
return TypeString
|
||||
}
|
||||
}
|
||||
53
ingestion/ingest/internal/schema/flatten_test.go
Normal file
53
ingestion/ingest/internal/schema/flatten_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFlatten_NestedObject(t *testing.T) {
|
||||
in := map[string]any{
|
||||
"user": map[string]any{
|
||||
"id": "u_1",
|
||||
"profile": map[string]any{"age": 30, "name": "Phuoc"},
|
||||
},
|
||||
"plan": "pro",
|
||||
}
|
||||
got := Flatten(in)
|
||||
assert.Equal(t, "u_1", got["user_id"])
|
||||
assert.Equal(t, 30, got["user_profile_age"])
|
||||
assert.Equal(t, "Phuoc", got["user_profile_name"])
|
||||
assert.Equal(t, "pro", got["plan"])
|
||||
}
|
||||
|
||||
func TestFlatten_SanitizesKeys(t *testing.T) {
|
||||
in := map[string]any{
|
||||
"User Email": "x@y",
|
||||
"price.usd": 9.99,
|
||||
"meta!": map[string]any{"X-Y": 1},
|
||||
}
|
||||
got := Flatten(in)
|
||||
assert.Equal(t, "x@y", got["user_email"])
|
||||
assert.Equal(t, 9.99, got["price_usd"])
|
||||
assert.Equal(t, 1, got["meta_x_y"])
|
||||
}
|
||||
|
||||
func TestFlatten_PreservesArrays(t *testing.T) {
|
||||
in := map[string]any{
|
||||
"tags": []any{"a", "b"},
|
||||
}
|
||||
got := Flatten(in)
|
||||
arr, ok := got["tags"].([]any)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, 2, len(arr))
|
||||
}
|
||||
|
||||
func TestClassify(t *testing.T) {
|
||||
assert.Equal(t, TypeString, Classify("hi"))
|
||||
assert.Equal(t, TypeNumber, Classify(float64(1.5)))
|
||||
assert.Equal(t, TypeBoolean, Classify(true))
|
||||
assert.Equal(t, TypeNull, Classify(nil))
|
||||
assert.Equal(t, TypeArray, Classify([]any{1, 2}))
|
||||
assert.Equal(t, TypeObject, Classify(map[string]any{}))
|
||||
}
|
||||
115
ingestion/ingest/internal/service/auth.go
Normal file
115
ingestion/ingest/internal/service/auth.go
Normal file
@@ -0,0 +1,115 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/redis/rueidis"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/repo"
|
||||
)
|
||||
|
||||
// AuthService resolves a plaintext Write Key into the workspace + source it
|
||||
// authorizes for. Lookups are cached in process AND in Redis. Pub/sub
|
||||
// invalidation lets the console revoke a key and have it propagate within
|
||||
// the cache TTL.
|
||||
type AuthService struct {
|
||||
repo repo.WriteKeyRepo
|
||||
redis rueidis.Client
|
||||
log *zap.Logger
|
||||
ttl time.Duration
|
||||
|
||||
mu sync.RWMutex
|
||||
cache map[string]cachedKey
|
||||
}
|
||||
|
||||
type cachedKey struct {
|
||||
key *model.WriteKey
|
||||
expires time.Time
|
||||
}
|
||||
|
||||
const (
|
||||
redisKeyWritePrefix = "wk:" // wk:{plaintext} -> json
|
||||
pubsubChannel = "wk:invalidate"
|
||||
)
|
||||
|
||||
func NewAuthService(r repo.WriteKeyRepo, redis rueidis.Client, ttl time.Duration, log *zap.Logger) *AuthService {
|
||||
s := &AuthService{
|
||||
repo: r,
|
||||
redis: redis,
|
||||
log: log,
|
||||
ttl: ttl,
|
||||
cache: make(map[string]cachedKey),
|
||||
}
|
||||
go s.watchInvalidations()
|
||||
return s
|
||||
}
|
||||
|
||||
// Resolve returns the WriteKey for a plaintext token. Cached.
|
||||
func (s *AuthService) Resolve(ctx context.Context, plaintext string) (*model.WriteKey, error) {
|
||||
if plaintext == "" {
|
||||
return nil, apperr.Unauthorized("missing write key")
|
||||
}
|
||||
|
||||
// in-process cache
|
||||
s.mu.RLock()
|
||||
if entry, ok := s.cache[plaintext]; ok && time.Now().Before(entry.expires) {
|
||||
s.mu.RUnlock()
|
||||
if entry.key.Revoked() {
|
||||
return nil, apperr.Unauthorized("write key revoked")
|
||||
}
|
||||
return entry.key, nil
|
||||
}
|
||||
s.mu.RUnlock()
|
||||
|
||||
// fall through to DB (Redis cache is optional and intentionally skipped
|
||||
// here -- the in-process map is plenty fast; Redis is only used for the
|
||||
// pub/sub invalidation channel below)
|
||||
k, err := s.repo.FindByPlaintext(ctx, plaintext)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if k.Revoked() {
|
||||
return nil, apperr.Unauthorized("write key revoked")
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
s.cache[plaintext] = cachedKey{key: k, expires: time.Now().Add(s.ttl)}
|
||||
s.mu.Unlock()
|
||||
return k, nil
|
||||
}
|
||||
|
||||
// Invalidate clears the cache entry for one key. Called by the console via
|
||||
// pub/sub when a key is revoked.
|
||||
func (s *AuthService) Invalidate(plaintext string) {
|
||||
s.mu.Lock()
|
||||
delete(s.cache, plaintext)
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
func (s *AuthService) watchInvalidations() {
|
||||
if s.redis == nil {
|
||||
return
|
||||
}
|
||||
ctx := context.Background()
|
||||
err := s.redis.Receive(ctx, s.redis.B().Subscribe().Channel(pubsubChannel).Build(),
|
||||
func(msg rueidis.PubSubMessage) {
|
||||
s.Invalidate(msg.Message)
|
||||
s.log.Info("write key invalidated via pubsub", zap.String("prefix", maskKey(msg.Message)))
|
||||
})
|
||||
if err != nil {
|
||||
s.log.Warn("pubsub subscribe ended", zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
// maskKey returns the first 8 chars + "***" for safe logging.
|
||||
func maskKey(k string) string {
|
||||
if len(k) <= 8 {
|
||||
return "***"
|
||||
}
|
||||
return k[:8] + "***"
|
||||
}
|
||||
223
ingestion/ingest/internal/service/ingest.go
Normal file
223
ingestion/ingest/internal/service/ingest.go
Normal file
@@ -0,0 +1,223 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/dedup"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/ratelimit"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/repo"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/schema"
|
||||
)
|
||||
|
||||
// Producer is the small surface IngestService needs from the Kafka client.
|
||||
// Defined here so it can be stubbed in tests without pulling in franz-go.
|
||||
type Producer interface {
|
||||
Produce(ctx context.Context, ev *model.IngestedEvent) error
|
||||
ProduceDLQ(ctx context.Context, workspaceID, sourceID, messageID, reason, field string, raw []byte) error
|
||||
}
|
||||
|
||||
// IngestService is the core pipeline: validate → ratelimit → timestamp normalize
|
||||
// → late-check → dedup → flatten → schema-conflict → push Kafka.
|
||||
type IngestService struct {
|
||||
producer Producer
|
||||
limiter ratelimit.Limiter
|
||||
dedup dedup.Dedup
|
||||
schema repo.SchemaRepo
|
||||
log *zap.Logger
|
||||
lateAfter time.Duration
|
||||
}
|
||||
|
||||
// IngestDeps groups dependencies for cleaner construction.
|
||||
type IngestDeps struct {
|
||||
Producer Producer
|
||||
Limiter ratelimit.Limiter
|
||||
Dedup dedup.Dedup
|
||||
Schema repo.SchemaRepo
|
||||
Log *zap.Logger
|
||||
LateAfter time.Duration
|
||||
}
|
||||
|
||||
func NewIngestService(d IngestDeps) *IngestService {
|
||||
return &IngestService{
|
||||
producer: d.Producer,
|
||||
limiter: d.Limiter,
|
||||
dedup: d.Dedup,
|
||||
schema: d.Schema,
|
||||
log: d.Log,
|
||||
lateAfter: d.LateAfter,
|
||||
}
|
||||
}
|
||||
|
||||
// IngestContext carries per-request data set by middleware.
|
||||
type IngestContext struct {
|
||||
WorkspaceID string
|
||||
SourceID string
|
||||
IP string
|
||||
UserAgent string
|
||||
RawBody []byte // original body, used for DLQ payload
|
||||
}
|
||||
|
||||
// Ingest runs the full pipeline for a single event.
|
||||
func (s *IngestService) Ingest(ctx context.Context, ictx IngestContext, raw *model.RawEvent) error {
|
||||
now := time.Now().UTC()
|
||||
|
||||
// 3. rate limit per workspace
|
||||
dec, err := s.limiter.Allow(ctx, ictx.WorkspaceID, defaultTierLimit, time.Second)
|
||||
if err != nil {
|
||||
return apperr.Internal(err)
|
||||
}
|
||||
if !dec.Allowed {
|
||||
retry := (dec.RetryAfterMS / 1000) + 1
|
||||
return apperr.TooManyRequests(retry)
|
||||
}
|
||||
|
||||
// 4-5. timestamps + late-event check
|
||||
sentAt := derefTime(raw.SentAt, now)
|
||||
if now.Sub(sentAt) > s.lateAfter {
|
||||
return apperr.UnprocessableEntity("event too old (>24h)")
|
||||
}
|
||||
timestamp := derefTime(raw.Timestamp, sentAt)
|
||||
|
||||
// 6. dedup
|
||||
if raw.MessageID == "" {
|
||||
return apperr.BadRequest("messageId required", "messageId", nil)
|
||||
}
|
||||
fresh, err := s.dedup.CheckAndSet(ctx, ictx.WorkspaceID, raw.MessageID)
|
||||
if err != nil {
|
||||
return apperr.Internal(err)
|
||||
}
|
||||
if !fresh {
|
||||
// silently drop -- duplicate message
|
||||
return nil
|
||||
}
|
||||
|
||||
// 7. flatten properties / traits / context
|
||||
props, err := decodeAndFlatten(raw.Properties)
|
||||
if err != nil {
|
||||
_ = s.toDLQ(ctx, ictx, raw, "properties_invalid_json", "properties")
|
||||
return apperr.BadRequest("properties is not valid JSON object", "properties", err)
|
||||
}
|
||||
traits, err := decodeAndFlatten(raw.Traits)
|
||||
if err != nil {
|
||||
_ = s.toDLQ(ctx, ictx, raw, "traits_invalid_json", "traits")
|
||||
return apperr.BadRequest("traits is not valid JSON object", "traits", err)
|
||||
}
|
||||
contextMap, err := decodeAndFlatten(raw.Context)
|
||||
if err != nil {
|
||||
// context is best-effort: keep going without it
|
||||
contextMap = nil
|
||||
}
|
||||
|
||||
// 8. schema validation -- type conflict detection (best-effort, async upsert)
|
||||
if err := s.checkSchema(ctx, ictx.WorkspaceID, string(raw.Type), props); err != nil {
|
||||
_ = s.toDLQ(ctx, ictx, raw, "schema_conflict", "")
|
||||
return err
|
||||
}
|
||||
|
||||
ev := &model.IngestedEvent{
|
||||
WorkspaceID: ictx.WorkspaceID,
|
||||
SourceID: ictx.SourceID,
|
||||
MessageID: raw.MessageID,
|
||||
Type: raw.Type,
|
||||
AnonymousID: raw.AnonymousID,
|
||||
UserID: raw.UserID,
|
||||
GroupID: raw.GroupID,
|
||||
Event: raw.Event,
|
||||
Name: raw.Name,
|
||||
Category: raw.Category,
|
||||
Properties: props,
|
||||
Traits: traits,
|
||||
Context: contextMap,
|
||||
IP: ictx.IP,
|
||||
UserAgent: ictx.UserAgent,
|
||||
Timestamp: timestamp,
|
||||
SentAt: sentAt,
|
||||
ReceivedAt: now,
|
||||
}
|
||||
|
||||
// 9. push Kafka -- fire-and-forget
|
||||
if err := s.producer.Produce(ctx, ev); err != nil {
|
||||
return apperr.Internal(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IngestBatch processes a batch envelope; each failure is recorded but the
|
||||
// good events still ship. Returns the first error so the handler can pick a
|
||||
// status; in practice batch endpoints return 200 with per-event status.
|
||||
func (s *IngestService) IngestBatch(ctx context.Context, ictx IngestContext, batch []model.RawEvent) []error {
|
||||
errs := make([]error, len(batch))
|
||||
for i := range batch {
|
||||
errs[i] = s.Ingest(ctx, ictx, &batch[i])
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const defaultTierLimit = 100 // rps; per-tier override comes from workspace.tier later
|
||||
|
||||
func derefTime(p *time.Time, fallback time.Time) time.Time {
|
||||
if p == nil || p.IsZero() {
|
||||
return fallback
|
||||
}
|
||||
return p.UTC()
|
||||
}
|
||||
|
||||
func decodeAndFlatten(raw json.RawMessage) (map[string]any, error) {
|
||||
if len(raw) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
var m map[string]any
|
||||
if err := json.Unmarshal(raw, &m); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if m == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return schema.Flatten(m), nil
|
||||
}
|
||||
|
||||
// checkSchema looks up the recorded type per (workspace, event_type, field)
|
||||
// and rejects with 400 on conflict. New fields are recorded asynchronously --
|
||||
// we do not block the request waiting on the DB write.
|
||||
func (s *IngestService) checkSchema(ctx context.Context, workspaceID, eventType string, props map[string]any) error {
|
||||
for field, v := range props {
|
||||
dt := string(schema.Classify(v))
|
||||
if dt == string(schema.TypeNull) {
|
||||
continue
|
||||
}
|
||||
existing, err := s.schema.GetType(ctx, workspaceID, eventType, field)
|
||||
if err != nil {
|
||||
// soft-fail: don't block ingest on schema DB errors
|
||||
s.log.Warn("schema lookup failed", zap.String("field", field), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
if existing == "" {
|
||||
// fire-and-forget upsert
|
||||
go func(f, t string) {
|
||||
if err := s.schema.UpsertField(context.Background(), workspaceID, eventType, f, t); err != nil {
|
||||
s.log.Warn("schema upsert failed", zap.String("field", f), zap.Error(err))
|
||||
}
|
||||
}(field, dt)
|
||||
continue
|
||||
}
|
||||
if existing != dt {
|
||||
return apperr.BadRequest("schema type conflict", field, nil)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *IngestService) toDLQ(ctx context.Context, ictx IngestContext, raw *model.RawEvent, reason, field string) error {
|
||||
return s.producer.ProduceDLQ(ctx,
|
||||
ictx.WorkspaceID, ictx.SourceID, raw.MessageID, reason, field, ictx.RawBody)
|
||||
}
|
||||
150
ingestion/ingest/internal/service/ingest_test.go
Normal file
150
ingestion/ingest/internal/service/ingest_test.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/ratelimit"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Stubs -- enough surface to drive the IngestService without spinning Kafka
|
||||
// or Redis. We exercise the pipeline branches: late event, dedup hit, schema
|
||||
// conflict, happy path.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type fakeLimiter struct{ allow bool }
|
||||
|
||||
func (f *fakeLimiter) Allow(_ context.Context, _ string, _ int, _ time.Duration) (ratelimit.Decision, error) {
|
||||
if f.allow {
|
||||
return ratelimit.Decision{Allowed: true, Remaining: 99}, nil
|
||||
}
|
||||
return ratelimit.Decision{Allowed: false, RetryAfterMS: 500}, nil
|
||||
}
|
||||
|
||||
type fakeDedup struct{ fresh bool }
|
||||
|
||||
func (f *fakeDedup) CheckAndSet(_ context.Context, _, _ string) (bool, error) { return f.fresh, nil }
|
||||
|
||||
type fakeSchema struct {
|
||||
stored map[string]string
|
||||
}
|
||||
|
||||
func (f *fakeSchema) GetType(_ context.Context, _, _, field string) (string, error) {
|
||||
if t, ok := f.stored[field]; ok {
|
||||
return t, nil
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
func (f *fakeSchema) UpsertField(_ context.Context, _, _, field, dt string) error {
|
||||
if f.stored == nil {
|
||||
f.stored = map[string]string{}
|
||||
}
|
||||
f.stored[field] = dt
|
||||
return nil
|
||||
}
|
||||
|
||||
// fakeProducer captures pushes so tests can assert side effects.
|
||||
type fakeProducer struct {
|
||||
produced []*model.IngestedEvent
|
||||
dlq []string // reason values
|
||||
}
|
||||
|
||||
func (f *fakeProducer) Produce(_ context.Context, ev *model.IngestedEvent) error {
|
||||
f.produced = append(f.produced, ev)
|
||||
return nil
|
||||
}
|
||||
func (f *fakeProducer) ProduceDLQ(_ context.Context, _, _, _, reason, _ string, _ []byte) error {
|
||||
f.dlq = append(f.dlq, reason)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func newSvc(t *testing.T, limiter *fakeLimiter, dedupSvc *fakeDedup, sch *fakeSchema) (*IngestService, *fakeProducer) {
|
||||
t.Helper()
|
||||
prod := &fakeProducer{}
|
||||
return &IngestService{
|
||||
producer: prod,
|
||||
limiter: limiter,
|
||||
dedup: dedupSvc,
|
||||
schema: sch,
|
||||
log: zap.NewNop(),
|
||||
lateAfter: 24 * time.Hour,
|
||||
}, prod
|
||||
}
|
||||
|
||||
func TestIngest_RateLimited(t *testing.T) {
|
||||
svc, _ := newSvc(t, &fakeLimiter{allow: false}, &fakeDedup{fresh: true}, &fakeSchema{})
|
||||
err := svc.Ingest(context.Background(), IngestContext{WorkspaceID: "ws"},
|
||||
&model.RawEvent{Type: model.EventTypeTrack, MessageID: "m1"})
|
||||
ae, ok := apperr.As(err)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, 429, ae.Code)
|
||||
assert.Greater(t, ae.RetryAfter, 0)
|
||||
}
|
||||
|
||||
func TestIngest_LateEvent(t *testing.T) {
|
||||
svc, _ := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: true}, &fakeSchema{})
|
||||
old := time.Now().Add(-48 * time.Hour)
|
||||
err := svc.Ingest(context.Background(), IngestContext{WorkspaceID: "ws"},
|
||||
&model.RawEvent{Type: model.EventTypeTrack, MessageID: "m1", SentAt: &old})
|
||||
ae, ok := apperr.As(err)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, 422, ae.Code)
|
||||
}
|
||||
|
||||
func TestIngest_DuplicateMessageSilentlyDropped(t *testing.T) {
|
||||
svc, prod := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: false}, &fakeSchema{})
|
||||
err := svc.Ingest(context.Background(), IngestContext{WorkspaceID: "ws"},
|
||||
&model.RawEvent{Type: model.EventTypeTrack, MessageID: "m1"})
|
||||
assert.NoError(t, err)
|
||||
assert.Empty(t, prod.produced, "duplicate must not be produced")
|
||||
}
|
||||
|
||||
func TestIngest_SchemaConflict(t *testing.T) {
|
||||
svc, prod := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: true},
|
||||
&fakeSchema{stored: map[string]string{"price": "string"}})
|
||||
props, _ := json.Marshal(map[string]any{"price": 9.99})
|
||||
err := svc.Ingest(context.Background(),
|
||||
IngestContext{WorkspaceID: "ws"},
|
||||
&model.RawEvent{
|
||||
Type: model.EventTypeTrack,
|
||||
MessageID: "m1",
|
||||
Properties: props,
|
||||
})
|
||||
ae, ok := apperr.As(err)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, 400, ae.Code)
|
||||
assert.Equal(t, "price", ae.Field)
|
||||
assert.Equal(t, []string{"schema_conflict"}, prod.dlq)
|
||||
assert.Empty(t, prod.produced)
|
||||
}
|
||||
|
||||
func TestIngest_HappyPath(t *testing.T) {
|
||||
svc, prod := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: true}, &fakeSchema{})
|
||||
props, _ := json.Marshal(map[string]any{"plan": "pro"})
|
||||
err := svc.Ingest(context.Background(),
|
||||
IngestContext{WorkspaceID: "ws", SourceID: "src", IP: "1.1.1.1"},
|
||||
&model.RawEvent{
|
||||
Type: model.EventTypeTrack,
|
||||
MessageID: "m1",
|
||||
AnonymousID: "anon-1",
|
||||
Event: "Signed Up",
|
||||
Properties: props,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, prod.produced, 1)
|
||||
ev := prod.produced[0]
|
||||
assert.Equal(t, "ws", ev.WorkspaceID)
|
||||
assert.Equal(t, "anon-1", ev.PartitionKey())
|
||||
assert.Equal(t, "pro", ev.Properties["plan"])
|
||||
}
|
||||
Reference in New Issue
Block a user