108 lines
2.5 KiB
Go
108 lines
2.5 KiB
Go
// Package batcher accumulates events from the consumer until either the
|
|
// size cap or the time cap is hit, then flushes them to the writer.
|
|
//
|
|
// Flush semantics:
|
|
// - on size cap: flush immediately
|
|
// - on time cap: flush whatever is buffered (even 0 events: no-op)
|
|
// - on shutdown: flush whatever is buffered, then return
|
|
package batcher
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/dbiz/cdp/ingestion/bulker/internal/model"
|
|
)
|
|
|
|
type FlushFunc func(ctx context.Context, events []*model.IngestedEvent) error
|
|
|
|
type Batcher struct {
|
|
size int
|
|
interval time.Duration
|
|
flush FlushFunc
|
|
log *zap.Logger
|
|
|
|
mu sync.Mutex
|
|
buffer []*model.IngestedEvent
|
|
}
|
|
|
|
func New(size int, interval time.Duration, flush FlushFunc, log *zap.Logger) *Batcher {
|
|
return &Batcher{
|
|
size: size,
|
|
interval: interval,
|
|
flush: flush,
|
|
log: log,
|
|
buffer: make([]*model.IngestedEvent, 0, size),
|
|
}
|
|
}
|
|
|
|
// Add appends an event. If the size cap is reached we flush synchronously
|
|
// before returning so the consumer commit can rely on durability.
|
|
func (b *Batcher) Add(ctx context.Context, e *model.IngestedEvent) error {
|
|
b.mu.Lock()
|
|
b.buffer = append(b.buffer, e)
|
|
if len(b.buffer) < b.size {
|
|
b.mu.Unlock()
|
|
return nil
|
|
}
|
|
batch := b.swap()
|
|
b.mu.Unlock()
|
|
return b.doFlush(ctx, batch)
|
|
}
|
|
|
|
// Run blocks until ctx is cancelled, flushing the buffer every interval.
|
|
func (b *Batcher) Run(ctx context.Context) {
|
|
t := time.NewTicker(b.interval)
|
|
defer t.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
b.FlushNow(context.Background())
|
|
return
|
|
case <-t.C:
|
|
if err := b.FlushNow(ctx); err != nil {
|
|
b.log.Warn("batch flush failed", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// FlushNow swaps the buffer and flushes synchronously.
|
|
func (b *Batcher) FlushNow(ctx context.Context) error {
|
|
b.mu.Lock()
|
|
batch := b.swap()
|
|
b.mu.Unlock()
|
|
return b.doFlush(ctx, batch)
|
|
}
|
|
|
|
// swap returns the current buffer and replaces it with a fresh slice.
|
|
// Caller must hold b.mu.
|
|
func (b *Batcher) swap() []*model.IngestedEvent {
|
|
if len(b.buffer) == 0 {
|
|
return nil
|
|
}
|
|
out := b.buffer
|
|
b.buffer = make([]*model.IngestedEvent, 0, b.size)
|
|
return out
|
|
}
|
|
|
|
func (b *Batcher) doFlush(ctx context.Context, batch []*model.IngestedEvent) error {
|
|
if len(batch) == 0 {
|
|
return nil
|
|
}
|
|
start := time.Now()
|
|
if err := b.flush(ctx, batch); err != nil {
|
|
b.log.Error("flush failed",
|
|
zap.Int("count", len(batch)),
|
|
zap.Error(err))
|
|
return err
|
|
}
|
|
b.log.Info("flushed",
|
|
zap.Int("count", len(batch)),
|
|
zap.Int64("duration_ms", time.Since(start).Milliseconds()))
|
|
return nil
|
|
}
|