Files
cdp/ingestion/ingest/internal/live/stream.go
2026-05-25 11:00:13 +07:00

177 lines
4.4 KiB
Go

// Package live streams events from the Kafka ingest topic over Server-Sent
// Events so the console can show what is flowing through the pipeline in
// real time. Each SSE connection spins up its own consumer group so the
// bulker's offsets are untouched.
package live
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/google/uuid"
"github.com/twmb/franz-go/pkg/kgo"
"go.uber.org/zap"
)
type Streamer struct {
brokers []string
topic string
log *zap.Logger
}
func New(brokers []string, topic string, log *zap.Logger) *Streamer {
return &Streamer{brokers: brokers, topic: topic, log: log}
}
// Filter narrows which records are forwarded. Empty values mean "no filter".
type Filter struct {
WorkspaceID string
SourceID string
EventType string // track | identify | page | group
}
// Stream writes SSE frames to w until the request context is cancelled. It
// joins a fresh consumer group seeded at the latest offset so the client
// sees events that arrive *after* subscription (no replay of history).
func (s *Streamer) Stream(ctx context.Context, w http.ResponseWriter, flt Filter) error {
flusher, ok := w.(http.Flusher)
if !ok {
return fmt.Errorf("response writer does not support flushing")
}
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache")
w.Header().Set("Connection", "keep-alive")
w.Header().Set("X-Accel-Buffering", "no") // disable nginx proxy buffering
w.WriteHeader(http.StatusOK)
// Tell the client we're alive.
fmt.Fprintf(w, ": connected\n\n")
flusher.Flush()
groupID := "live-" + uuid.NewString()
cl, err := kgo.NewClient(
kgo.SeedBrokers(s.brokers...),
kgo.ConsumerGroup(groupID),
kgo.ConsumeTopics(s.topic),
kgo.ConsumeResetOffset(kgo.NewOffset().AtEnd()),
kgo.DisableAutoCommit(), // tail mode -- never commit
kgo.ClientID("cdp-live"),
)
if err != nil {
return fmt.Errorf("kafka client: %w", err)
}
defer cl.Close()
// keep-alive comments every 25s so proxies don't time out the connection.
keepAlive := time.NewTicker(25 * time.Second)
defer keepAlive.Stop()
for {
select {
case <-ctx.Done():
return nil
case <-keepAlive.C:
fmt.Fprintf(w, ": keepalive\n\n")
flusher.Flush()
default:
}
// Short poll so we react quickly to ctx cancel.
pollCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
fetches := cl.PollFetches(pollCtx)
cancel()
if ctx.Err() != nil {
return nil
}
if errs := fetches.Errors(); len(errs) > 0 {
for _, e := range errs {
if e.Err == context.DeadlineExceeded || e.Err == context.Canceled {
continue
}
s.log.Warn("live fetch error", zap.Error(e.Err))
}
}
var stopped bool
fetches.EachRecord(func(r *kgo.Record) {
if stopped {
return
}
if !matches(r, flt) {
return
}
frame, err := sseFrame(r)
if err != nil {
return
}
if _, werr := w.Write(frame); werr != nil {
stopped = true
return
}
flusher.Flush()
})
if stopped {
return nil
}
}
}
// matches returns true when the record passes the filter.
func matches(r *kgo.Record, flt Filter) bool {
if flt.WorkspaceID == "" && flt.SourceID == "" && flt.EventType == "" {
return true
}
get := func(key string) string {
for _, h := range r.Headers {
if h.Key == key {
return string(h.Value)
}
}
return ""
}
if flt.WorkspaceID != "" && get("workspace_id") != flt.WorkspaceID {
return false
}
if flt.SourceID != "" && get("source_id") != flt.SourceID {
return false
}
if flt.EventType != "" && !strings.EqualFold(get("type"), flt.EventType) {
return false
}
return true
}
// sseFrame builds an `event: ...\ndata: ...\n\n` block from a Kafka record.
func sseFrame(r *kgo.Record) ([]byte, error) {
// We pass the raw event value through; the console decodes it.
// Each frame also carries Kafka metadata under `meta`.
envelope := struct {
Topic string `json:"topic"`
Partition int32 `json:"partition"`
Offset int64 `json:"offset"`
Timestamp time.Time `json:"timestamp"`
Event json.RawMessage `json:"event"`
}{
Topic: r.Topic,
Partition: r.Partition,
Offset: r.Offset,
Timestamp: r.Timestamp,
Event: r.Value,
}
body, err := json.Marshal(envelope)
if err != nil {
return nil, err
}
out := make([]byte, 0, len(body)+16)
out = append(out, "event: ingest\ndata: "...)
out = append(out, body...)
out = append(out, '\n', '\n')
return out, nil
}