// Package batcher accumulates events from the consumer until either the // size cap or the time cap is hit, then flushes them to the writer. // // Flush semantics: // - on size cap: flush immediately // - on time cap: flush whatever is buffered (even 0 events: no-op) // - on shutdown: flush whatever is buffered, then return package batcher import ( "context" "sync" "time" "go.uber.org/zap" "github.com/dbiz/cdp/ingestion/bulker/internal/model" ) type FlushFunc func(ctx context.Context, events []*model.IngestedEvent) error type Batcher struct { size int interval time.Duration flush FlushFunc log *zap.Logger mu sync.Mutex buffer []*model.IngestedEvent } func New(size int, interval time.Duration, flush FlushFunc, log *zap.Logger) *Batcher { return &Batcher{ size: size, interval: interval, flush: flush, log: log, buffer: make([]*model.IngestedEvent, 0, size), } } // Add appends an event. If the size cap is reached we flush synchronously // before returning so the consumer commit can rely on durability. func (b *Batcher) Add(ctx context.Context, e *model.IngestedEvent) error { b.mu.Lock() b.buffer = append(b.buffer, e) if len(b.buffer) < b.size { b.mu.Unlock() return nil } batch := b.swap() b.mu.Unlock() return b.doFlush(ctx, batch) } // Run blocks until ctx is cancelled, flushing the buffer every interval. func (b *Batcher) Run(ctx context.Context) { t := time.NewTicker(b.interval) defer t.Stop() for { select { case <-ctx.Done(): b.FlushNow(context.Background()) return case <-t.C: if err := b.FlushNow(ctx); err != nil { b.log.Warn("batch flush failed", zap.Error(err)) } } } } // FlushNow swaps the buffer and flushes synchronously. func (b *Batcher) FlushNow(ctx context.Context) error { b.mu.Lock() batch := b.swap() b.mu.Unlock() return b.doFlush(ctx, batch) } // swap returns the current buffer and replaces it with a fresh slice. // Caller must hold b.mu. func (b *Batcher) swap() []*model.IngestedEvent { if len(b.buffer) == 0 { return nil } out := b.buffer b.buffer = make([]*model.IngestedEvent, 0, b.size) return out } func (b *Batcher) doFlush(ctx context.Context, batch []*model.IngestedEvent) error { if len(batch) == 0 { return nil } start := time.Now() if err := b.flush(ctx, batch); err != nil { b.log.Error("flush failed", zap.Int("count", len(batch)), zap.Error(err)) return err } b.log.Info("flushed", zap.Int("count", len(batch)), zap.Int64("duration_ms", time.Since(start).Milliseconds())) return nil }