init ingestion
This commit is contained in:
107
ingestion/bulker/internal/batcher/batcher.go
Normal file
107
ingestion/bulker/internal/batcher/batcher.go
Normal file
@@ -0,0 +1,107 @@
|
||||
// Package batcher accumulates events from the consumer until either the
|
||||
// size cap or the time cap is hit, then flushes them to the writer.
|
||||
//
|
||||
// Flush semantics:
|
||||
// - on size cap: flush immediately
|
||||
// - on time cap: flush whatever is buffered (even 0 events: no-op)
|
||||
// - on shutdown: flush whatever is buffered, then return
|
||||
package batcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/bulker/internal/model"
|
||||
)
|
||||
|
||||
type FlushFunc func(ctx context.Context, events []*model.IngestedEvent) error
|
||||
|
||||
type Batcher struct {
|
||||
size int
|
||||
interval time.Duration
|
||||
flush FlushFunc
|
||||
log *zap.Logger
|
||||
|
||||
mu sync.Mutex
|
||||
buffer []*model.IngestedEvent
|
||||
}
|
||||
|
||||
func New(size int, interval time.Duration, flush FlushFunc, log *zap.Logger) *Batcher {
|
||||
return &Batcher{
|
||||
size: size,
|
||||
interval: interval,
|
||||
flush: flush,
|
||||
log: log,
|
||||
buffer: make([]*model.IngestedEvent, 0, size),
|
||||
}
|
||||
}
|
||||
|
||||
// Add appends an event. If the size cap is reached we flush synchronously
|
||||
// before returning so the consumer commit can rely on durability.
|
||||
func (b *Batcher) Add(ctx context.Context, e *model.IngestedEvent) error {
|
||||
b.mu.Lock()
|
||||
b.buffer = append(b.buffer, e)
|
||||
if len(b.buffer) < b.size {
|
||||
b.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
batch := b.swap()
|
||||
b.mu.Unlock()
|
||||
return b.doFlush(ctx, batch)
|
||||
}
|
||||
|
||||
// Run blocks until ctx is cancelled, flushing the buffer every interval.
|
||||
func (b *Batcher) Run(ctx context.Context) {
|
||||
t := time.NewTicker(b.interval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
b.FlushNow(context.Background())
|
||||
return
|
||||
case <-t.C:
|
||||
if err := b.FlushNow(ctx); err != nil {
|
||||
b.log.Warn("batch flush failed", zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FlushNow swaps the buffer and flushes synchronously.
|
||||
func (b *Batcher) FlushNow(ctx context.Context) error {
|
||||
b.mu.Lock()
|
||||
batch := b.swap()
|
||||
b.mu.Unlock()
|
||||
return b.doFlush(ctx, batch)
|
||||
}
|
||||
|
||||
// swap returns the current buffer and replaces it with a fresh slice.
|
||||
// Caller must hold b.mu.
|
||||
func (b *Batcher) swap() []*model.IngestedEvent {
|
||||
if len(b.buffer) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := b.buffer
|
||||
b.buffer = make([]*model.IngestedEvent, 0, b.size)
|
||||
return out
|
||||
}
|
||||
|
||||
func (b *Batcher) doFlush(ctx context.Context, batch []*model.IngestedEvent) error {
|
||||
if len(batch) == 0 {
|
||||
return nil
|
||||
}
|
||||
start := time.Now()
|
||||
if err := b.flush(ctx, batch); err != nil {
|
||||
b.log.Error("flush failed",
|
||||
zap.Int("count", len(batch)),
|
||||
zap.Error(err))
|
||||
return err
|
||||
}
|
||||
b.log.Info("flushed",
|
||||
zap.Int("count", len(batch)),
|
||||
zap.Int64("duration_ms", time.Since(start).Milliseconds()))
|
||||
return nil
|
||||
}
|
||||
56
ingestion/bulker/internal/batcher/batcher_test.go
Normal file
56
ingestion/bulker/internal/batcher/batcher_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package batcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/dbiz/cdp/ingestion/bulker/internal/model"
|
||||
)
|
||||
|
||||
func TestBatcher_FlushesOnSizeCap(t *testing.T) {
|
||||
var flushed int32
|
||||
flush := func(_ context.Context, evs []*model.IngestedEvent) error {
|
||||
atomic.AddInt32(&flushed, int32(len(evs)))
|
||||
return nil
|
||||
}
|
||||
b := New(3, time.Hour, flush, zap.NewNop())
|
||||
for i := 0; i < 3; i++ {
|
||||
_ = b.Add(context.Background(), &model.IngestedEvent{MessageID: "x"})
|
||||
}
|
||||
assert.Equal(t, int32(3), atomic.LoadInt32(&flushed))
|
||||
}
|
||||
|
||||
func TestBatcher_FlushNow_NoOpOnEmpty(t *testing.T) {
|
||||
var called int32
|
||||
flush := func(_ context.Context, _ []*model.IngestedEvent) error {
|
||||
atomic.AddInt32(&called, 1)
|
||||
return nil
|
||||
}
|
||||
b := New(10, time.Hour, flush, zap.NewNop())
|
||||
_ = b.FlushNow(context.Background())
|
||||
assert.Equal(t, int32(0), atomic.LoadInt32(&called))
|
||||
}
|
||||
|
||||
func TestBatcher_FlushesOnTimer(t *testing.T) {
|
||||
var flushed int32
|
||||
flush := func(_ context.Context, evs []*model.IngestedEvent) error {
|
||||
atomic.AddInt32(&flushed, int32(len(evs)))
|
||||
return nil
|
||||
}
|
||||
b := New(1000, 50*time.Millisecond, flush, zap.NewNop())
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go b.Run(ctx)
|
||||
|
||||
_ = b.Add(context.Background(), &model.IngestedEvent{MessageID: "a"})
|
||||
_ = b.Add(context.Background(), &model.IngestedEvent{MessageID: "b"})
|
||||
|
||||
time.Sleep(120 * time.Millisecond)
|
||||
cancel()
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
assert.Equal(t, int32(2), atomic.LoadInt32(&flushed))
|
||||
}
|
||||
Reference in New Issue
Block a user