init ingestion

This commit is contained in:
2026-05-24 22:59:24 +07:00
commit 4e8c11d545
80 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,126 @@
// HTTP API for rotor.
//
// Endpoints
// POST /v1/run run an ad-hoc function against a single event
// POST /v1/transform run a registered function (by workspace/slug)
// POST /v1/functions upsert function code (admin)
// DELETE /v1/functions/:workspace/:slug invalidate
// GET /health liveness
// GET /ready readiness
//
// All responses are JSON. Errors return { error, kind, field? }.
import Fastify from 'fastify';
import { z } from 'zod';
import { FunctionError, IsolateRunner } from '../runtime/isolate.js';
import { Registry } from '../registry/registry.js';
const eventSchema = z.object({
workspace_id: z.string(),
source_id: z.string().optional(),
message_id: z.string(),
type: z.enum(['track', 'identify', 'page', 'group', 'alias', 'screen']),
anonymous_id: z.string().optional(),
user_id: z.string().optional(),
event: z.string().optional(),
properties: z.record(z.unknown()).optional(),
traits: z.record(z.unknown()).optional(),
context: z.record(z.unknown()).optional(),
timestamp: z.string().optional(),
sent_at: z.string().optional(),
received_at: z.string().optional(),
}).passthrough();
const runSchema = z.object({
code: z.string().min(1).max(64 * 1024),
event: eventSchema,
});
const transformSchema = z.object({
workspace_id: z.string(),
function: z.string(),
event: eventSchema,
});
const upsertSchema = z.object({
workspace_id: z.string(),
slug: z.string(),
code: z.string().min(1).max(64 * 1024),
version: z.number().int().optional(),
});
export function buildServer({ config, logger }) {
const runner = new IsolateRunner(config.isolate);
const registry = new Registry();
const app = Fastify({
logger: logger,
bodyLimit: 1 * 1024 * 1024, // 1MB cap on incoming code/payloads
disableRequestLogging: false,
});
app.get('/health', async () => ({ status: 'ok' }));
app.get('/ready', async () => ({ status: 'ready' }));
app.post('/v1/run', async (req, reply) => {
const parsed = runSchema.safeParse(req.body);
if (!parsed.success) {
reply.status(400);
return { error: 'invalid request', issues: parsed.error.flatten() };
}
try {
const result = await runner.run(parsed.data.code, parsed.data.event);
return { result };
} catch (err) {
return handleFnError(reply, err);
}
});
app.post('/v1/transform', async (req, reply) => {
const parsed = transformSchema.safeParse(req.body);
if (!parsed.success) {
reply.status(400);
return { error: 'invalid request', issues: parsed.error.flatten() };
}
const { workspace_id, function: slug, event } = parsed.data;
const entry = await registry.get(workspace_id, slug);
if (!entry) {
reply.status(404);
return { error: 'function not found', workspace_id, slug };
}
try {
const result = await runner.run(entry.code, event);
return { result, version: entry.version };
} catch (err) {
return handleFnError(reply, err);
}
});
app.post('/v1/functions', async (req, reply) => {
const parsed = upsertSchema.safeParse(req.body);
if (!parsed.success) {
reply.status(400);
return { error: 'invalid request', issues: parsed.error.flatten() };
}
const { workspace_id, slug, code, version } = parsed.data;
registry.set(workspace_id, slug, code, version ?? 1);
return { ok: true };
});
app.delete('/v1/functions/:workspace/:slug', async (req) => {
registry.invalidate(req.params.workspace, req.params.slug);
return { ok: true };
});
return { app, registry, runner };
}
function handleFnError(reply, err) {
if (err instanceof FunctionError) {
const code = err.kind === 'timeout' ? 422 : 400;
reply.status(code);
return { error: err.message, kind: err.kind };
}
reply.status(500);
return { error: 'internal error' };
}

View File

@@ -0,0 +1,9 @@
// Runtime config loaded from env. Defaults mirror .env.example.
export const config = {
port: parseInt(process.env.ROTOR_PORT ?? '3401', 10),
logLevel: process.env.ROTOR_LOG_LEVEL ?? 'info',
isolate: {
memoryLimitMB: parseInt(process.env.ROTOR_ISOLATE_MEMORY_MB ?? '128', 10),
timeoutMs: parseInt(process.env.ROTOR_FUNCTION_TIMEOUT_MS ?? '2000', 10),
},
};

View File

@@ -0,0 +1,34 @@
// rotor entry point -- starts the Fastify HTTP API.
import pino from 'pino';
import { config } from './config.js';
import { buildServer } from './api/server.js';
const logger = pino({ level: config.logLevel });
const { app } = buildServer({ config, logger });
async function start() {
try {
await app.listen({ host: '0.0.0.0', port: config.port });
logger.info({ port: config.port }, 'rotor listening');
} catch (err) {
logger.error({ err }, 'rotor failed to start');
process.exit(1);
}
}
function shutdown(signal) {
logger.info({ signal }, 'shutdown signal received');
app.close()
.then(() => process.exit(0))
.catch((err) => {
logger.error({ err }, 'shutdown error');
process.exit(1);
});
}
process.on('SIGINT', () => shutdown('SIGINT'));
process.on('SIGTERM', () => shutdown('SIGTERM'));
start();

View File

@@ -0,0 +1,41 @@
// Function registry -- an in-memory cache of (workspace_id, slug) -> code.
//
// The console writes function code into Postgres; rotor loads it lazily on
// first miss and refreshes on pub/sub invalidation.
//
// For this scaffold we keep it dumb: a Map you can preload via /api/admin
// or set directly in tests. Replace `loader` with a real PG loader when the
// console exists.
export class Registry {
/**
* @param {{ loader?: (workspaceId: string, slug: string) => Promise<string|null> }} opts
*/
constructor(opts = {}) {
this.loader = opts.loader ?? (async () => null);
/** @type {Map<string, { code: string, version: number }>} */
this.cache = new Map();
}
key(workspaceId, slug) { return `${workspaceId}:${slug}`; }
async get(workspaceId, slug) {
const k = this.key(workspaceId, slug);
if (this.cache.has(k)) return this.cache.get(k);
const code = await this.loader(workspaceId, slug);
if (code == null) return null;
const entry = { code, version: 1 };
this.cache.set(k, entry);
return entry;
}
set(workspaceId, slug, code, version = 1) {
this.cache.set(this.key(workspaceId, slug), { code, version });
}
invalidate(workspaceId, slug) {
this.cache.delete(this.key(workspaceId, slug));
}
}

View File

@@ -0,0 +1,97 @@
// V8 isolate wrapper for running user-supplied JS functions safely.
//
// Each invocation:
// 1. Build a fresh isolate + context (cheap to recycle for cold safety).
// 2. Compile the user code once per function (cached in registry).
// 3. Call `transform(event)` with a deep-copied event payload.
// 4. Receive a return value (deep-copied back) within `timeoutMs`.
//
// Failures (compile error / runtime error / timeout) are surfaced as
// FunctionError so the caller can route the event to DLQ.
import ivm from 'isolated-vm';
export class FunctionError extends Error {
constructor(kind, message, cause) {
super(message);
this.name = 'FunctionError';
this.kind = kind; // 'compile' | 'runtime' | 'timeout' | 'oom'
this.cause = cause;
}
}
export class IsolateRunner {
constructor({ memoryLimitMB, timeoutMs }) {
this.memoryLimitMB = memoryLimitMB;
this.timeoutMs = timeoutMs;
}
/**
* Run `code` against `event` and return the transformed value.
* `code` must export a function named `transform` -- e.g.:
*
* function transform(event) {
* event.properties.hashed_email = sha256(event.user_id);
* return event;
* }
*
* The function may return:
* - the event (possibly mutated)
* - null -- drop the event
* - array -- fan-out into multiple events
*/
async run(code, event) {
const isolate = new ivm.Isolate({ memoryLimit: this.memoryLimitMB });
try {
const context = await isolate.createContext();
const jail = context.global;
await jail.set('global', jail.derefInto());
let script;
try {
script = await isolate.compileScript(buildHarness(code));
} catch (err) {
throw new FunctionError('compile', err.message, err);
}
try {
await script.run(context, { timeout: this.timeoutMs });
} catch (err) {
if (err.message?.includes('Script execution timed out')) {
throw new FunctionError('timeout', `function timed out after ${this.timeoutMs}ms`, err);
}
if (err.message?.includes('Isolate was disposed')) {
throw new FunctionError('oom', 'isolate ran out of memory', err);
}
throw new FunctionError('runtime', err.message, err);
}
const fn = await context.global.get('__cdp_transform__', { reference: true });
const result = await fn.apply(undefined, [new ivm.ExternalCopy(event).copyInto()], {
timeout: this.timeoutMs,
result: { copy: true },
});
return result;
} finally {
isolate.dispose();
}
}
}
/**
* Wrap user code so we can call it deterministically. The harness:
* - injects a `console.log` shim that drops output (we'll add capture later)
* - exposes `transform` on the global as `__cdp_transform__`
*/
function buildHarness(code) {
return `
const console = {
log: () => {}, warn: () => {}, error: () => {}, info: () => {},
};
${code}
if (typeof transform !== 'function') {
throw new Error('user code must define a global function named "transform"');
}
global.__cdp_transform__ = transform;
`;
}