init ingestion

This commit is contained in:
2026-05-24 22:59:24 +07:00
commit 4e8c11d545
80 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,61 @@
package repo
import (
"context"
"fmt"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
)
// SchemaRepo records the data type observed for each (workspace, event_type, field)
// triple. The bulker / analytics layer uses this to detect type conflicts.
//
// In the ingest hot path we only *check* for conflict via UpsertField; the
// rebuild of the cached map is left to a background loader. We do not block
// the request waiting for upsert -- it is fire-and-forget.
type SchemaRepo interface {
// GetType returns the recorded type, or "" if the field has never been seen.
GetType(ctx context.Context, workspaceID, eventType, field string) (string, error)
// UpsertField records a new (or re-confirmed) field type.
UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error
}
type schemaRepo struct {
db *pgxpool.Pool
}
func NewSchemaRepo(db *pgxpool.Pool) SchemaRepo {
return &schemaRepo{db: db}
}
func (r *schemaRepo) GetType(ctx context.Context, workspaceID, eventType, field string) (string, error) {
const q = `
SELECT data_type FROM schema_fields
WHERE workspace_id = $1::uuid AND event_type = $2 AND field = $3`
var t string
err := r.db.QueryRow(ctx, q, workspaceID, eventType, field).Scan(&t)
if err != nil {
// pgx.ErrNoRows → return "" with nil error so caller treats as new field
if err.Error() == "no rows in result set" {
return "", nil
}
return "", apperr.Internal(fmt.Errorf("schema get: %w", err))
}
return t, nil
}
func (r *schemaRepo) UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error {
const q = `
INSERT INTO schema_fields (workspace_id, event_type, field, data_type)
VALUES ($1::uuid, $2, $3, $4)
ON CONFLICT (workspace_id, event_type, field) DO UPDATE
SET last_seen_at = now(),
sample_count = schema_fields.sample_count + 1`
_, err := r.db.Exec(ctx, q, workspaceID, eventType, field, dataType)
if err != nil {
return apperr.Internal(fmt.Errorf("schema upsert: %w", err))
}
return nil
}