// Package writer wraps the ClickHouse client for batch inserts. // // We use the native clickhouse-go v2 client. One PrepareBatch / Append / Send // cycle per (table, batch). All maps are stringified before insertion -- the // ClickHouse schema uses Map(String, String) which keeps the table flat and // avoids column explosion. Analytics queries cast on read. package writer import ( "context" "encoding/json" "fmt" "strconv" "github.com/ClickHouse/clickhouse-go/v2" "github.com/ClickHouse/clickhouse-go/v2/lib/driver" "github.com/dbiz/cdp/ingestion/bulker/internal/model" ) type ClickHouse struct { conn driver.Conn db string } func New(ctx context.Context, addr, db, user, password string) (*ClickHouse, error) { conn, err := clickhouse.Open(&clickhouse.Options{ Addr: []string{addr}, Auth: clickhouse.Auth{ Database: db, Username: user, Password: password, }, Settings: clickhouse.Settings{ "async_insert": 0, "wait_for_async_insert": 0, }, }) if err != nil { return nil, fmt.Errorf("clickhouse open: %w", err) } if err := conn.Ping(ctx); err != nil { return nil, fmt.Errorf("clickhouse ping: %w", err) } return &ClickHouse{conn: conn, db: db}, nil } func (c *ClickHouse) Close() error { return c.conn.Close() } // WriteEvents fans out a mixed-type batch into the per-type tables. // Returns the number of rows successfully inserted across all tables. func (c *ClickHouse) WriteEvents(ctx context.Context, events []*model.IngestedEvent) (int, error) { if len(events) == 0 { return 0, nil } // Bucket by event type so each insert hits one table. buckets := map[string][]*model.IngestedEvent{} for _, e := range events { buckets[e.Type] = append(buckets[e.Type], e) } total := 0 for t, evs := range buckets { var err error switch t { case "track": err = c.writeTrack(ctx, evs) case "identify": err = c.writeIdentify(ctx, evs) case "page", "screen": err = c.writePage(ctx, evs) case "group": err = c.writeGroup(ctx, evs) default: // alias / unknown types -- write to track for now err = c.writeTrack(ctx, evs) } if err != nil { return total, fmt.Errorf("write %s: %w", t, err) } total += len(evs) } return total, nil } // --------------------------------------------------------------------------- // per-table batch inserts // --------------------------------------------------------------------------- func (c *ClickHouse) writeTrack(ctx context.Context, evs []*model.IngestedEvent) error { batch, err := c.conn.PrepareBatch(ctx, "INSERT INTO events_track") if err != nil { return err } for _, e := range evs { err := batch.Append( e.WorkspaceID, e.SourceID, e.MessageID, e.AnonymousID, e.UserID, e.Event, e.Timestamp, e.SentAt, e.ReceivedAt, mapToStr(e.Properties), mapToStr(e.Context), e.IP, e.UserAgent, libraryName(e.Context), libraryVersion(e.Context), ) if err != nil { return err } } return batch.Send() } func (c *ClickHouse) writeIdentify(ctx context.Context, evs []*model.IngestedEvent) error { batch, err := c.conn.PrepareBatch(ctx, "INSERT INTO events_identify") if err != nil { return err } for _, e := range evs { err := batch.Append( e.WorkspaceID, e.SourceID, e.MessageID, e.AnonymousID, e.UserID, e.Timestamp, e.SentAt, e.ReceivedAt, mapToStr(e.Traits), mapToStr(e.Context), e.IP, e.UserAgent, ) if err != nil { return err } } return batch.Send() } func (c *ClickHouse) writePage(ctx context.Context, evs []*model.IngestedEvent) error { batch, err := c.conn.PrepareBatch(ctx, "INSERT INTO events_page") if err != nil { return err } for _, e := range evs { path, _ := e.Properties["path"].(string) url, _ := e.Properties["url"].(string) referrer, _ := e.Properties["referrer"].(string) err := batch.Append( e.WorkspaceID, e.SourceID, e.MessageID, e.AnonymousID, e.UserID, e.Name, e.Category, e.Timestamp, e.SentAt, e.ReceivedAt, mapToStr(e.Properties), mapToStr(e.Context), e.IP, e.UserAgent, referrer, path, url, ) if err != nil { return err } } return batch.Send() } func (c *ClickHouse) writeGroup(ctx context.Context, evs []*model.IngestedEvent) error { batch, err := c.conn.PrepareBatch(ctx, "INSERT INTO events_group") if err != nil { return err } for _, e := range evs { err := batch.Append( e.WorkspaceID, e.SourceID, e.MessageID, e.AnonymousID, e.UserID, e.GroupID, e.Timestamp, e.SentAt, e.ReceivedAt, mapToStr(e.Traits), mapToStr(e.Context), e.IP, e.UserAgent, ) if err != nil { return err } } return batch.Send() } // WriteDLQ inserts records from the DLQ topic. func (c *ClickHouse) WriteDLQ(ctx context.Context, recs []*model.DLQRecord) error { if len(recs) == 0 { return nil } batch, err := c.conn.PrepareBatch(ctx, "INSERT INTO events_dlq") if err != nil { return err } for _, r := range recs { if err := batch.Append( r.WorkspaceID, r.SourceID, r.MessageID, r.ReceivedAt, r.Reason, r.Field, r.RawPayload, ); err != nil { return err } } return batch.Send() } // --------------------------------------------------------------------------- // helpers // --------------------------------------------------------------------------- // mapToStr converts a map[string]any into the Map(String, String) shape // ClickHouse expects. Non-string values are JSON-encoded. func mapToStr(in map[string]any) map[string]string { if in == nil { return map[string]string{} } out := make(map[string]string, len(in)) for k, v := range in { out[k] = anyToStr(v) } return out } func anyToStr(v any) string { switch x := v.(type) { case nil: return "" case string: return x case float64: return strconv.FormatFloat(x, 'f', -1, 64) case int: return strconv.Itoa(x) case int64: return strconv.FormatInt(x, 10) case bool: return strconv.FormatBool(x) default: b, _ := json.Marshal(v) return string(b) } } func libraryName(ctx map[string]any) string { if ctx == nil { return "" } if v, ok := ctx["library_name"].(string); ok { return v } return "" } func libraryVersion(ctx map[string]any) string { if ctx == nil { return "" } if v, ok := ctx["library_version"].(string); ok { return v } return "" }