init ingestion
This commit is contained in:
33
ingestion/ingest/internal/repo/pool.go
Normal file
33
ingestion/ingest/internal/repo/pool.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// NewPool creates a pgxpool with sensible defaults for ingest workloads.
|
||||
// Pool size is small because ingest is mostly cache hits — Postgres is only
|
||||
// touched on cache miss (write key lookup, schema upsert).
|
||||
func NewPool(ctx context.Context, dsn string) (*pgxpool.Pool, error) {
|
||||
cfg, err := pgxpool.ParseConfig(dsn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse pg dsn: %w", err)
|
||||
}
|
||||
cfg.MaxConns = 16
|
||||
cfg.MinConns = 2
|
||||
cfg.MaxConnIdleTime = 5 * time.Minute
|
||||
cfg.HealthCheckPeriod = 30 * time.Second
|
||||
|
||||
pool, err := pgxpool.NewWithConfig(ctx, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pg connect: %w", err)
|
||||
}
|
||||
if err := pool.Ping(ctx); err != nil {
|
||||
pool.Close()
|
||||
return nil, fmt.Errorf("pg ping: %w", err)
|
||||
}
|
||||
return pool, nil
|
||||
}
|
||||
61
ingestion/ingest/internal/repo/schema_repo.go
Normal file
61
ingestion/ingest/internal/repo/schema_repo.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
)
|
||||
|
||||
// SchemaRepo records the data type observed for each (workspace, event_type, field)
|
||||
// triple. The bulker / analytics layer uses this to detect type conflicts.
|
||||
//
|
||||
// In the ingest hot path we only *check* for conflict via UpsertField; the
|
||||
// rebuild of the cached map is left to a background loader. We do not block
|
||||
// the request waiting for upsert -- it is fire-and-forget.
|
||||
type SchemaRepo interface {
|
||||
// GetType returns the recorded type, or "" if the field has never been seen.
|
||||
GetType(ctx context.Context, workspaceID, eventType, field string) (string, error)
|
||||
// UpsertField records a new (or re-confirmed) field type.
|
||||
UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error
|
||||
}
|
||||
|
||||
type schemaRepo struct {
|
||||
db *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewSchemaRepo(db *pgxpool.Pool) SchemaRepo {
|
||||
return &schemaRepo{db: db}
|
||||
}
|
||||
|
||||
func (r *schemaRepo) GetType(ctx context.Context, workspaceID, eventType, field string) (string, error) {
|
||||
const q = `
|
||||
SELECT data_type FROM schema_fields
|
||||
WHERE workspace_id = $1::uuid AND event_type = $2 AND field = $3`
|
||||
var t string
|
||||
err := r.db.QueryRow(ctx, q, workspaceID, eventType, field).Scan(&t)
|
||||
if err != nil {
|
||||
// pgx.ErrNoRows → return "" with nil error so caller treats as new field
|
||||
if err.Error() == "no rows in result set" {
|
||||
return "", nil
|
||||
}
|
||||
return "", apperr.Internal(fmt.Errorf("schema get: %w", err))
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func (r *schemaRepo) UpsertField(ctx context.Context, workspaceID, eventType, field, dataType string) error {
|
||||
const q = `
|
||||
INSERT INTO schema_fields (workspace_id, event_type, field, data_type)
|
||||
VALUES ($1::uuid, $2, $3, $4)
|
||||
ON CONFLICT (workspace_id, event_type, field) DO UPDATE
|
||||
SET last_seen_at = now(),
|
||||
sample_count = schema_fields.sample_count + 1`
|
||||
_, err := r.db.Exec(ctx, q, workspaceID, eventType, field, dataType)
|
||||
if err != nil {
|
||||
return apperr.Internal(fmt.Errorf("schema upsert: %w", err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
66
ingestion/ingest/internal/repo/writekey_repo.go
Normal file
66
ingestion/ingest/internal/repo/writekey_repo.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/apperr"
|
||||
"github.com/dbiz/cdp/ingestion/ingest/internal/model"
|
||||
)
|
||||
|
||||
// WriteKeyRepo loads WriteKey records by their plaintext value.
|
||||
// The plaintext is hashed before the lookup; the DB only stores hashes.
|
||||
type WriteKeyRepo interface {
|
||||
FindByPlaintext(ctx context.Context, plaintext string) (*model.WriteKey, error)
|
||||
MarkUsed(ctx context.Context, id string) error
|
||||
}
|
||||
|
||||
type writeKeyRepo struct {
|
||||
db *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewWriteKeyRepo(db *pgxpool.Pool) WriteKeyRepo {
|
||||
return &writeKeyRepo{db: db}
|
||||
}
|
||||
|
||||
func hashKey(plaintext string) string {
|
||||
sum := sha256.Sum256([]byte(plaintext))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func (r *writeKeyRepo) FindByPlaintext(ctx context.Context, plaintext string) (*model.WriteKey, error) {
|
||||
const q = `
|
||||
SELECT id::text, workspace_id::text, source_id::text,
|
||||
key_prefix, COALESCE(label, ''),
|
||||
revoked_at, last_used_at, created_at
|
||||
FROM write_keys
|
||||
WHERE key_hash = $1`
|
||||
row := r.db.QueryRow(ctx, q, hashKey(plaintext))
|
||||
|
||||
var k model.WriteKey
|
||||
err := row.Scan(&k.ID, &k.WorkspaceID, &k.SourceID,
|
||||
&k.KeyPrefix, &k.Label,
|
||||
&k.RevokedAt, &k.LastUsedAt, &k.CreatedAt)
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return nil, apperr.Unauthorized("invalid write key")
|
||||
}
|
||||
if err != nil {
|
||||
return nil, apperr.Internal(fmt.Errorf("writekey lookup: %w", err))
|
||||
}
|
||||
return &k, nil
|
||||
}
|
||||
|
||||
func (r *writeKeyRepo) MarkUsed(ctx context.Context, id string) error {
|
||||
const q = `UPDATE write_keys SET last_used_at = now() WHERE id = $1`
|
||||
_, err := r.db.Exec(ctx, q, id)
|
||||
if err != nil {
|
||||
return apperr.Internal(fmt.Errorf("writekey mark used: %w", err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user