init ingestion

This commit is contained in:
2026-05-24 22:59:24 +07:00
commit 4e8c11d545
80 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,150 @@
package service
import (
"context"
"encoding/json"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
"github.com/dbiz/cdp/ingestion/ingest/internal/ratelimit"
)
// ---------------------------------------------------------------------------
// Stubs -- enough surface to drive the IngestService without spinning Kafka
// or Redis. We exercise the pipeline branches: late event, dedup hit, schema
// conflict, happy path.
// ---------------------------------------------------------------------------
type fakeLimiter struct{ allow bool }
func (f *fakeLimiter) Allow(_ context.Context, _ string, _ int, _ time.Duration) (ratelimit.Decision, error) {
if f.allow {
return ratelimit.Decision{Allowed: true, Remaining: 99}, nil
}
return ratelimit.Decision{Allowed: false, RetryAfterMS: 500}, nil
}
type fakeDedup struct{ fresh bool }
func (f *fakeDedup) CheckAndSet(_ context.Context, _, _ string) (bool, error) { return f.fresh, nil }
type fakeSchema struct {
stored map[string]string
}
func (f *fakeSchema) GetType(_ context.Context, _, _, field string) (string, error) {
if t, ok := f.stored[field]; ok {
return t, nil
}
return "", nil
}
func (f *fakeSchema) UpsertField(_ context.Context, _, _, field, dt string) error {
if f.stored == nil {
f.stored = map[string]string{}
}
f.stored[field] = dt
return nil
}
// fakeProducer captures pushes so tests can assert side effects.
type fakeProducer struct {
produced []*model.IngestedEvent
dlq []string // reason values
}
func (f *fakeProducer) Produce(_ context.Context, ev *model.IngestedEvent) error {
f.produced = append(f.produced, ev)
return nil
}
func (f *fakeProducer) ProduceDLQ(_ context.Context, _, _, _, reason, _ string, _ []byte) error {
f.dlq = append(f.dlq, reason)
return nil
}
// ---------------------------------------------------------------------------
func newSvc(t *testing.T, limiter *fakeLimiter, dedupSvc *fakeDedup, sch *fakeSchema) (*IngestService, *fakeProducer) {
t.Helper()
prod := &fakeProducer{}
return &IngestService{
producer: prod,
limiter: limiter,
dedup: dedupSvc,
schema: sch,
log: zap.NewNop(),
lateAfter: 24 * time.Hour,
}, prod
}
func TestIngest_RateLimited(t *testing.T) {
svc, _ := newSvc(t, &fakeLimiter{allow: false}, &fakeDedup{fresh: true}, &fakeSchema{})
err := svc.Ingest(context.Background(), IngestContext{WorkspaceID: "ws"},
&model.RawEvent{Type: model.EventTypeTrack, MessageID: "m1"})
ae, ok := apperr.As(err)
require.True(t, ok)
assert.Equal(t, 429, ae.Code)
assert.Greater(t, ae.RetryAfter, 0)
}
func TestIngest_LateEvent(t *testing.T) {
svc, _ := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: true}, &fakeSchema{})
old := time.Now().Add(-48 * time.Hour)
err := svc.Ingest(context.Background(), IngestContext{WorkspaceID: "ws"},
&model.RawEvent{Type: model.EventTypeTrack, MessageID: "m1", SentAt: &old})
ae, ok := apperr.As(err)
require.True(t, ok)
assert.Equal(t, 422, ae.Code)
}
func TestIngest_DuplicateMessageSilentlyDropped(t *testing.T) {
svc, prod := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: false}, &fakeSchema{})
err := svc.Ingest(context.Background(), IngestContext{WorkspaceID: "ws"},
&model.RawEvent{Type: model.EventTypeTrack, MessageID: "m1"})
assert.NoError(t, err)
assert.Empty(t, prod.produced, "duplicate must not be produced")
}
func TestIngest_SchemaConflict(t *testing.T) {
svc, prod := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: true},
&fakeSchema{stored: map[string]string{"price": "string"}})
props, _ := json.Marshal(map[string]any{"price": 9.99})
err := svc.Ingest(context.Background(),
IngestContext{WorkspaceID: "ws"},
&model.RawEvent{
Type: model.EventTypeTrack,
MessageID: "m1",
Properties: props,
})
ae, ok := apperr.As(err)
require.True(t, ok)
assert.Equal(t, 400, ae.Code)
assert.Equal(t, "price", ae.Field)
assert.Equal(t, []string{"schema_conflict"}, prod.dlq)
assert.Empty(t, prod.produced)
}
func TestIngest_HappyPath(t *testing.T) {
svc, prod := newSvc(t, &fakeLimiter{allow: true}, &fakeDedup{fresh: true}, &fakeSchema{})
props, _ := json.Marshal(map[string]any{"plan": "pro"})
err := svc.Ingest(context.Background(),
IngestContext{WorkspaceID: "ws", SourceID: "src", IP: "1.1.1.1"},
&model.RawEvent{
Type: model.EventTypeTrack,
MessageID: "m1",
AnonymousID: "anon-1",
Event: "Signed Up",
Properties: props,
})
require.NoError(t, err)
require.Len(t, prod.produced, 1)
ev := prod.produced[0]
assert.Equal(t, "ws", ev.WorkspaceID)
assert.Equal(t, "anon-1", ev.PartitionKey())
assert.Equal(t, "pro", ev.Properties["plan"])
}