init ingestion

This commit is contained in:
2026-05-24 22:59:24 +07:00
commit 4e8c11d545
80 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,107 @@
// Package batcher accumulates events from the consumer until either the
// size cap or the time cap is hit, then flushes them to the writer.
//
// Flush semantics:
// - on size cap: flush immediately
// - on time cap: flush whatever is buffered (even 0 events: no-op)
// - on shutdown: flush whatever is buffered, then return
package batcher
import (
"context"
"sync"
"time"
"go.uber.org/zap"
"github.com/dbiz/cdp/ingestion/bulker/internal/model"
)
type FlushFunc func(ctx context.Context, events []*model.IngestedEvent) error
type Batcher struct {
size int
interval time.Duration
flush FlushFunc
log *zap.Logger
mu sync.Mutex
buffer []*model.IngestedEvent
}
func New(size int, interval time.Duration, flush FlushFunc, log *zap.Logger) *Batcher {
return &Batcher{
size: size,
interval: interval,
flush: flush,
log: log,
buffer: make([]*model.IngestedEvent, 0, size),
}
}
// Add appends an event. If the size cap is reached we flush synchronously
// before returning so the consumer commit can rely on durability.
func (b *Batcher) Add(ctx context.Context, e *model.IngestedEvent) error {
b.mu.Lock()
b.buffer = append(b.buffer, e)
if len(b.buffer) < b.size {
b.mu.Unlock()
return nil
}
batch := b.swap()
b.mu.Unlock()
return b.doFlush(ctx, batch)
}
// Run blocks until ctx is cancelled, flushing the buffer every interval.
func (b *Batcher) Run(ctx context.Context) {
t := time.NewTicker(b.interval)
defer t.Stop()
for {
select {
case <-ctx.Done():
b.FlushNow(context.Background())
return
case <-t.C:
if err := b.FlushNow(ctx); err != nil {
b.log.Warn("batch flush failed", zap.Error(err))
}
}
}
}
// FlushNow swaps the buffer and flushes synchronously.
func (b *Batcher) FlushNow(ctx context.Context) error {
b.mu.Lock()
batch := b.swap()
b.mu.Unlock()
return b.doFlush(ctx, batch)
}
// swap returns the current buffer and replaces it with a fresh slice.
// Caller must hold b.mu.
func (b *Batcher) swap() []*model.IngestedEvent {
if len(b.buffer) == 0 {
return nil
}
out := b.buffer
b.buffer = make([]*model.IngestedEvent, 0, b.size)
return out
}
func (b *Batcher) doFlush(ctx context.Context, batch []*model.IngestedEvent) error {
if len(batch) == 0 {
return nil
}
start := time.Now()
if err := b.flush(ctx, batch); err != nil {
b.log.Error("flush failed",
zap.Int("count", len(batch)),
zap.Error(err))
return err
}
b.log.Info("flushed",
zap.Int("count", len(batch)),
zap.Int64("duration_ms", time.Since(start).Milliseconds()))
return nil
}