testable
This commit is contained in:
176
ingestion/ingest/internal/live/stream.go
Normal file
176
ingestion/ingest/internal/live/stream.go
Normal file
@@ -0,0 +1,176 @@
|
||||
// Package live streams events from the Kafka ingest topic over Server-Sent
|
||||
// Events so the console can show what is flowing through the pipeline in
|
||||
// real time. Each SSE connection spins up its own consumer group so the
|
||||
// bulker's offsets are untouched.
|
||||
package live
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/twmb/franz-go/pkg/kgo"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type Streamer struct {
|
||||
brokers []string
|
||||
topic string
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func New(brokers []string, topic string, log *zap.Logger) *Streamer {
|
||||
return &Streamer{brokers: brokers, topic: topic, log: log}
|
||||
}
|
||||
|
||||
// Filter narrows which records are forwarded. Empty values mean "no filter".
|
||||
type Filter struct {
|
||||
WorkspaceID string
|
||||
SourceID string
|
||||
EventType string // track | identify | page | group
|
||||
}
|
||||
|
||||
// Stream writes SSE frames to w until the request context is cancelled. It
|
||||
// joins a fresh consumer group seeded at the latest offset so the client
|
||||
// sees events that arrive *after* subscription (no replay of history).
|
||||
func (s *Streamer) Stream(ctx context.Context, w http.ResponseWriter, flt Filter) error {
|
||||
flusher, ok := w.(http.Flusher)
|
||||
if !ok {
|
||||
return fmt.Errorf("response writer does not support flushing")
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.Header().Set("Cache-Control", "no-cache")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
w.Header().Set("X-Accel-Buffering", "no") // disable nginx proxy buffering
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
// Tell the client we're alive.
|
||||
fmt.Fprintf(w, ": connected\n\n")
|
||||
flusher.Flush()
|
||||
|
||||
groupID := "live-" + uuid.NewString()
|
||||
cl, err := kgo.NewClient(
|
||||
kgo.SeedBrokers(s.brokers...),
|
||||
kgo.ConsumerGroup(groupID),
|
||||
kgo.ConsumeTopics(s.topic),
|
||||
kgo.ConsumeResetOffset(kgo.NewOffset().AtEnd()),
|
||||
kgo.DisableAutoCommit(), // tail mode -- never commit
|
||||
kgo.ClientID("cdp-live"),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("kafka client: %w", err)
|
||||
}
|
||||
defer cl.Close()
|
||||
|
||||
// keep-alive comments every 25s so proxies don't time out the connection.
|
||||
keepAlive := time.NewTicker(25 * time.Second)
|
||||
defer keepAlive.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
case <-keepAlive.C:
|
||||
fmt.Fprintf(w, ": keepalive\n\n")
|
||||
flusher.Flush()
|
||||
default:
|
||||
}
|
||||
|
||||
// Short poll so we react quickly to ctx cancel.
|
||||
pollCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
fetches := cl.PollFetches(pollCtx)
|
||||
cancel()
|
||||
|
||||
if ctx.Err() != nil {
|
||||
return nil
|
||||
}
|
||||
if errs := fetches.Errors(); len(errs) > 0 {
|
||||
for _, e := range errs {
|
||||
if e.Err == context.DeadlineExceeded || e.Err == context.Canceled {
|
||||
continue
|
||||
}
|
||||
s.log.Warn("live fetch error", zap.Error(e.Err))
|
||||
}
|
||||
}
|
||||
|
||||
var stopped bool
|
||||
fetches.EachRecord(func(r *kgo.Record) {
|
||||
if stopped {
|
||||
return
|
||||
}
|
||||
if !matches(r, flt) {
|
||||
return
|
||||
}
|
||||
frame, err := sseFrame(r)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if _, werr := w.Write(frame); werr != nil {
|
||||
stopped = true
|
||||
return
|
||||
}
|
||||
flusher.Flush()
|
||||
})
|
||||
if stopped {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// matches returns true when the record passes the filter.
|
||||
func matches(r *kgo.Record, flt Filter) bool {
|
||||
if flt.WorkspaceID == "" && flt.SourceID == "" && flt.EventType == "" {
|
||||
return true
|
||||
}
|
||||
get := func(key string) string {
|
||||
for _, h := range r.Headers {
|
||||
if h.Key == key {
|
||||
return string(h.Value)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
if flt.WorkspaceID != "" && get("workspace_id") != flt.WorkspaceID {
|
||||
return false
|
||||
}
|
||||
if flt.SourceID != "" && get("source_id") != flt.SourceID {
|
||||
return false
|
||||
}
|
||||
if flt.EventType != "" && !strings.EqualFold(get("type"), flt.EventType) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// sseFrame builds an `event: ...\ndata: ...\n\n` block from a Kafka record.
|
||||
func sseFrame(r *kgo.Record) ([]byte, error) {
|
||||
// We pass the raw event value through; the console decodes it.
|
||||
// Each frame also carries Kafka metadata under `meta`.
|
||||
envelope := struct {
|
||||
Topic string `json:"topic"`
|
||||
Partition int32 `json:"partition"`
|
||||
Offset int64 `json:"offset"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Event json.RawMessage `json:"event"`
|
||||
}{
|
||||
Topic: r.Topic,
|
||||
Partition: r.Partition,
|
||||
Offset: r.Offset,
|
||||
Timestamp: r.Timestamp,
|
||||
Event: r.Value,
|
||||
}
|
||||
body, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := make([]byte, 0, len(body)+16)
|
||||
out = append(out, "event: ingest\ndata: "...)
|
||||
out = append(out, body...)
|
||||
out = append(out, '\n', '\n')
|
||||
return out, nil
|
||||
}
|
||||
Reference in New Issue
Block a user