package repo import ( "context" "fmt" "github.com/jackc/pgx/v5/pgxpool" "github.com/dbiz/cdp/ingestion/ingest/internal/apperr" ) // SchemaRepo records the data type observed for each (workspace, event_type, field) // triple. The bulker / analytics layer uses this to detect type conflicts. // // In the ingest hot path we only *check* for conflict via UpsertField; the // rebuild of the cached map is left to a background loader. We do not block // the request waiting for upsert -- it is fire-and-forget. type SchemaRepo interface { // GetType returns the recorded type, or "" if the field has never been seen. GetType(ctx context.Context, workspaceID, eventType, field string) (string, error) // UpsertField records a new (or re-confirmed) field type. UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error } type schemaRepo struct { db *pgxpool.Pool } func NewSchemaRepo(db *pgxpool.Pool) SchemaRepo { return &schemaRepo{db: db} } func (r *schemaRepo) GetType(ctx context.Context, workspaceID, eventType, field string) (string, error) { const q = ` SELECT data_type FROM schema_fields WHERE workspace_id = $1::uuid AND event_type = $2 AND field = $3` var t string err := r.db.QueryRow(ctx, q, workspaceID, eventType, field).Scan(&t) if err != nil { // pgx.ErrNoRows → return "" with nil error so caller treats as new field if err.Error() == "no rows in result set" { return "", nil } return "", apperr.Internal(fmt.Errorf("schema get: %w", err)) } return t, nil } func (r *schemaRepo) UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error { const q = ` INSERT INTO schema_fields (workspace_id, event_type, field, data_type) VALUES ($1::uuid, $2, $3, $4) ON CONFLICT (workspace_id, event_type, field) DO UPDATE SET last_seen_at = now(), sample_count = schema_fields.sample_count + 1` _, err := r.db.Exec(ctx, q, workspaceID, eventType, field, dataType) if err != nil { return apperr.Internal(fmt.Errorf("schema upsert: %w", err)) } return nil }