Add equity candle aggregation pipeline

This commit is contained in:
dirtydishes 2026-01-07 09:51:54 -05:00
parent f889a2597b
commit a87df21baa
13 changed files with 1188 additions and 10 deletions

View file

@ -0,0 +1,253 @@
import type { EquityCandle, EquityPrint } from "@islandflow/types";
export type CandleAggregationConfig = {
intervalsMs: number[];
maxLateMs: number;
};
export type CandleAggregationResult = {
emitted: EquityCandle[];
droppedLate: number;
};
type CandleBuilder = {
windowStart: number;
intervalMs: number;
underlyingId: string;
open: number;
high: number;
low: number;
close: number;
volume: number;
tradeCount: number;
openTs: number;
openSeq: number;
openSourceTs: number;
closeTs: number;
closeSeq: number;
closeIngestTs: number;
};
type IntervalState = {
intervalMs: number;
underlyingId: string;
lastTsSeen: number;
windows: Map<number, CandleBuilder>;
};
const toPositiveInt = (value: number): number | null => {
if (!Number.isFinite(value)) {
return null;
}
const normalized = Math.floor(value);
if (normalized <= 0) {
return null;
}
return normalized;
};
export const normalizeIntervals = (intervals: number[]): number[] => {
const unique = new Set<number>();
for (const interval of intervals) {
const normalized = toPositiveInt(interval);
if (normalized) {
unique.add(normalized);
}
}
return Array.from(unique).sort((a, b) => a - b);
};
export const parseIntervals = (value: string | undefined, fallback: number[]): number[] => {
if (!value) {
return normalizeIntervals(fallback);
}
const parsed = value
.split(",")
.map((entry) => Number(entry.trim()))
.filter((entry) => Number.isFinite(entry));
const normalized = normalizeIntervals(parsed);
return normalized.length > 0 ? normalized : normalizeIntervals(fallback);
};
const buildStateKey = (underlyingId: string, intervalMs: number): string => {
return `${underlyingId}:${intervalMs}`;
};
const getWindowStart = (ts: number, intervalMs: number): number => {
return Math.floor(ts / intervalMs) * intervalMs;
};
const isEarlier = (ts: number, seq: number, otherTs: number, otherSeq: number): boolean => {
if (ts !== otherTs) {
return ts < otherTs;
}
return seq < otherSeq;
};
const isLater = (ts: number, seq: number, otherTs: number, otherSeq: number): boolean => {
if (ts !== otherTs) {
return ts > otherTs;
}
return seq > otherSeq;
};
const createBuilder = (
print: EquityPrint,
intervalMs: number,
windowStart: number
): CandleBuilder => {
return {
windowStart,
intervalMs,
underlyingId: print.underlying_id,
open: print.price,
high: print.price,
low: print.price,
close: print.price,
volume: print.size,
tradeCount: 1,
openTs: print.ts,
openSeq: print.seq,
openSourceTs: print.source_ts,
closeTs: print.ts,
closeSeq: print.seq,
closeIngestTs: print.ingest_ts
};
};
const updateBuilder = (builder: CandleBuilder, print: EquityPrint): CandleBuilder => {
builder.volume += print.size;
builder.tradeCount += 1;
builder.high = Math.max(builder.high, print.price);
builder.low = Math.min(builder.low, print.price);
if (isEarlier(print.ts, print.seq, builder.openTs, builder.openSeq)) {
builder.open = print.price;
builder.openTs = print.ts;
builder.openSeq = print.seq;
builder.openSourceTs = print.source_ts;
}
if (isLater(print.ts, print.seq, builder.closeTs, builder.closeSeq)) {
builder.close = print.price;
builder.closeTs = print.ts;
builder.closeSeq = print.seq;
builder.closeIngestTs = print.ingest_ts;
}
return builder;
};
const toEquityCandle = (builder: CandleBuilder): EquityCandle => {
return {
source_ts: builder.openSourceTs,
ingest_ts: builder.closeIngestTs,
seq: builder.closeSeq,
trace_id: `candle:${builder.underlyingId}:${builder.intervalMs}:${builder.windowStart}`,
ts: builder.windowStart,
interval_ms: builder.intervalMs,
underlying_id: builder.underlyingId,
open: builder.open,
high: builder.high,
low: builder.low,
close: builder.close,
volume: builder.volume,
trade_count: builder.tradeCount
};
};
const flushState = (state: IntervalState, watermark: number): EquityCandle[] => {
const eligibleStarts: number[] = [];
for (const start of state.windows.keys()) {
if (start + state.intervalMs <= watermark) {
eligibleStarts.push(start);
}
}
if (eligibleStarts.length === 0) {
return [];
}
eligibleStarts.sort((a, b) => a - b);
const emitted: EquityCandle[] = [];
for (const start of eligibleStarts) {
const builder = state.windows.get(start);
if (!builder) {
continue;
}
state.windows.delete(start);
emitted.push(toEquityCandle(builder));
}
return emitted;
};
export class CandleAggregator {
private readonly intervalsMs: number[];
private readonly maxLateMs: number;
private readonly stateByKey = new Map<string, IntervalState>();
constructor(config: CandleAggregationConfig) {
this.intervalsMs = normalizeIntervals(config.intervalsMs);
this.maxLateMs = Math.max(0, Math.floor(config.maxLateMs));
}
ingest(print: EquityPrint): CandleAggregationResult {
const emitted: EquityCandle[] = [];
let droppedLate = 0;
for (const intervalMs of this.intervalsMs) {
const key = buildStateKey(print.underlying_id, intervalMs);
const state =
this.stateByKey.get(key) ??
({
intervalMs,
underlyingId: print.underlying_id,
lastTsSeen: 0,
windows: new Map()
} satisfies IntervalState);
state.lastTsSeen = Math.max(state.lastTsSeen, print.ts);
this.stateByKey.set(key, state);
const windowStart = getWindowStart(print.ts, intervalMs);
const windowEnd = windowStart + intervalMs;
const watermark = Math.max(0, state.lastTsSeen - this.maxLateMs);
if (windowEnd <= watermark && !state.windows.has(windowStart)) {
droppedLate += 1;
} else {
const existing = state.windows.get(windowStart);
if (existing) {
updateBuilder(existing, print);
} else {
state.windows.set(windowStart, createBuilder(print, intervalMs, windowStart));
}
}
emitted.push(...flushState(state, watermark));
}
return { emitted, droppedLate };
}
drain(): EquityCandle[] {
const emitted: EquityCandle[] = [];
for (const state of this.stateByKey.values()) {
const starts = Array.from(state.windows.keys()).sort((a, b) => a - b);
for (const start of starts) {
const builder = state.windows.get(start);
if (!builder) {
continue;
}
state.windows.delete(start);
emitted.push(toEquityCandle(builder));
}
}
return emitted;
}
}

View file

@ -1,17 +1,387 @@
import { createLogger } from "@islandflow/observability";
import { readEnv } from "@islandflow/config";
import { createLogger, createMetrics } from "@islandflow/observability";
import {
SUBJECT_EQUITY_CANDLES,
SUBJECT_EQUITY_PRINTS,
STREAM_EQUITY_CANDLES,
STREAM_EQUITY_PRINTS,
buildDurableConsumer,
connectJetStreamWithRetry,
ensureStream,
publishJson,
subscribeJson
} from "@islandflow/bus";
import {
createClickHouseClient,
ensureEquityCandlesTable,
insertEquityCandle
} from "@islandflow/storage";
import { EquityCandleSchema, EquityPrintSchema, type EquityCandle } from "@islandflow/types";
import { createClient } from "redis";
import { z } from "zod";
import { CandleAggregator, parseIntervals } from "./aggregator";
const service = "candles";
const logger = createLogger({ service });
const metrics = createMetrics({ service });
logger.info("service starting");
const envSchema = z.object({
NATS_URL: z.string().default("nats://localhost:4222"),
CLICKHOUSE_URL: z.string().default("http://localhost:8123"),
CLICKHOUSE_DATABASE: z.string().default("default"),
REDIS_URL: z.string().default("redis://localhost:6379"),
CANDLE_INTERVALS_MS: z.string().default("1000,5000,60000"),
CANDLE_MAX_LATE_MS: z.coerce.number().int().nonnegative().default(0),
CANDLE_CACHE_LIMIT: z.coerce.number().int().nonnegative().default(2000),
CANDLE_DELIVER_POLICY: z
.enum(["new", "all", "last", "last_per_subject"])
.default("new"),
CANDLE_CONSUMER_RESET: z
.preprocess((value) => {
if (typeof value === "string") {
const normalized = value.trim().toLowerCase();
if (["1", "true", "yes", "on"].includes(normalized)) {
return true;
}
if (["0", "false", "no", "off"].includes(normalized)) {
return false;
}
}
return value;
}, z.boolean())
.default(false)
});
const shutdown = (signal: string) => {
logger.info("service stopping", { signal });
process.exit(0);
const env = readEnv(envSchema);
const retry = async <T>(
label: string,
attempts: number,
delayMs: number,
task: () => Promise<T>
): Promise<T> => {
let lastError: unknown;
for (let attempt = 1; attempt <= attempts; attempt += 1) {
try {
return await task();
} catch (error) {
lastError = error;
logger.warn(`${label} attempt failed`, {
attempt,
error: error instanceof Error ? error.message : String(error)
});
if (attempt < attempts) {
await new Promise((resolve) => setTimeout(resolve, delayMs));
}
}
}
throw lastError ?? new Error(`${label} failed after retries`);
};
process.on("SIGINT", () => shutdown("SIGINT"));
process.on("SIGTERM", () => shutdown("SIGTERM"));
const applyDeliverPolicy = (
opts: ReturnType<typeof buildDurableConsumer>,
policy: typeof env.CANDLE_DELIVER_POLICY
) => {
switch (policy) {
case "all":
opts.deliverAll();
break;
case "last":
opts.deliverLast();
break;
case "last_per_subject":
opts.deliverLastPerSubject();
break;
case "new":
default:
opts.deliverNew();
break;
}
};
// Keep the process alive until real listeners are wired.
setInterval(() => {}, 60_000);
const createRedisClient = (url: string) => {
return createClient({ url });
};
const buildCacheKey = (underlyingId: string, intervalMs: number): string => {
return `candles:equity:${intervalMs}:${underlyingId}`;
};
const cacheCandle = async (
client: ReturnType<typeof createClient>,
candle: EquityCandle,
cacheLimit: number
): Promise<void> => {
if (cacheLimit <= 0) {
return;
}
const key = buildCacheKey(candle.underlying_id, candle.interval_ms);
const payload = JSON.stringify(candle);
const maxAgeMs = candle.interval_ms * cacheLimit;
const trimBefore = Math.max(0, candle.ts - maxAgeMs);
const multi = client.multi();
multi.zAdd(key, { score: candle.ts, value: payload });
if (trimBefore > 0) {
multi.zRemRangeByScore(key, 0, trimBefore);
}
await multi.exec();
};
const emitCandle = async (
clickhouse: ReturnType<typeof createClickHouseClient>,
js: Awaited<ReturnType<typeof connectJetStreamWithRetry>>["js"],
redis: ReturnType<typeof createClient> | null,
candle: EquityCandle,
cacheLimit: number
): Promise<void> => {
try {
await insertEquityCandle(clickhouse, candle);
} catch (error) {
metrics.count("candles.persist_failed", 1);
logger.error("failed to persist candle", {
error: error instanceof Error ? error.message : String(error),
trace_id: candle.trace_id,
underlying_id: candle.underlying_id,
interval_ms: candle.interval_ms
});
return;
}
metrics.count("candles.emitted", 1, {
interval_ms: String(candle.interval_ms)
});
try {
await publishJson(js, SUBJECT_EQUITY_CANDLES, candle);
} catch (error) {
metrics.count("candles.publish_failed", 1);
logger.error("failed to publish candle", {
error: error instanceof Error ? error.message : String(error),
trace_id: candle.trace_id,
underlying_id: candle.underlying_id,
interval_ms: candle.interval_ms
});
}
if (redis && redis.isOpen) {
try {
await cacheCandle(redis, candle, cacheLimit);
} catch (error) {
metrics.count("candles.cache_failed", 1);
logger.warn("failed to cache candle", {
error: error instanceof Error ? error.message : String(error),
trace_id: candle.trace_id,
underlying_id: candle.underlying_id,
interval_ms: candle.interval_ms
});
}
}
};
const run = async () => {
logger.info("service starting");
const intervalsMs = parseIntervals(env.CANDLE_INTERVALS_MS, [1000, 5000, 60000]);
if (intervalsMs.length === 0) {
throw new Error("CANDLE_INTERVALS_MS produced no valid intervals");
}
const aggregator = new CandleAggregator({
intervalsMs,
maxLateMs: env.CANDLE_MAX_LATE_MS
});
const { nc, js, jsm } = await connectJetStreamWithRetry(
{
servers: env.NATS_URL,
name: service
},
{ attempts: 20, delayMs: 500 }
);
await ensureStream(jsm, {
name: STREAM_EQUITY_PRINTS,
subjects: [SUBJECT_EQUITY_PRINTS],
retention: "limits",
storage: "file",
discard: "old",
max_msgs_per_subject: -1,
max_msgs: -1,
max_bytes: -1,
max_age: 0,
num_replicas: 1
});
await ensureStream(jsm, {
name: STREAM_EQUITY_CANDLES,
subjects: [SUBJECT_EQUITY_CANDLES],
retention: "limits",
storage: "file",
discard: "old",
max_msgs_per_subject: -1,
max_msgs: -1,
max_bytes: -1,
max_age: 0,
num_replicas: 1
});
const clickhouse = createClickHouseClient({
url: env.CLICKHOUSE_URL,
database: env.CLICKHOUSE_DATABASE
});
await retry("clickhouse table init", 20, 500, async () => {
await ensureEquityCandlesTable(clickhouse);
});
let redis: ReturnType<typeof createClient> | null = null;
try {
redis = createRedisClient(env.REDIS_URL);
redis.on("error", (error) => {
logger.warn("redis client error", {
error: error instanceof Error ? error.message : String(error)
});
});
await retry("redis connect", 20, 500, async () => {
if (!redis) {
return;
}
await redis.connect();
});
} catch (error) {
logger.warn("redis unavailable, skipping hot cache", {
error: error instanceof Error ? error.message : String(error)
});
redis = null;
}
const durableName = "candles-equity-prints";
if (env.CANDLE_CONSUMER_RESET) {
try {
await jsm.consumers.delete(STREAM_EQUITY_PRINTS, durableName);
logger.warn("reset jetstream consumer", { durable: durableName });
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
if (!message.includes("not found")) {
logger.warn("failed to reset jetstream consumer", { durable: durableName, error: message });
}
}
} else {
try {
const info = await jsm.consumers.info(STREAM_EQUITY_PRINTS, durableName);
if (info?.config?.deliver_policy && info.config.deliver_policy !== env.CANDLE_DELIVER_POLICY) {
logger.warn("resetting consumer due to deliver policy change", {
durable: durableName,
current: info.config.deliver_policy,
desired: env.CANDLE_DELIVER_POLICY
});
await jsm.consumers.delete(STREAM_EQUITY_PRINTS, durableName);
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
if (!message.includes("not found")) {
logger.warn("failed to inspect jetstream consumer", { durable: durableName, error: message });
}
}
}
const subscribeWithReset = async () => {
const opts = buildDurableConsumer(durableName);
applyDeliverPolicy(opts, env.CANDLE_DELIVER_POLICY);
try {
return await subscribeJson(js, SUBJECT_EQUITY_PRINTS, opts);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
const shouldReset =
message.includes("duplicate subscription") ||
message.includes("durable requires") ||
message.includes("subject does not match consumer");
if (!shouldReset) {
throw error;
}
logger.warn("resetting jetstream consumer", { durable: durableName, error: message });
try {
await jsm.consumers.delete(STREAM_EQUITY_PRINTS, durableName);
} catch (deleteError) {
const deleteMessage = deleteError instanceof Error ? deleteError.message : String(deleteError);
if (!deleteMessage.includes("not found")) {
logger.warn("failed to delete jetstream consumer", {
durable: durableName,
error: deleteMessage
});
}
}
const resetOpts = buildDurableConsumer(durableName);
applyDeliverPolicy(resetOpts, env.CANDLE_DELIVER_POLICY);
return await subscribeJson(js, SUBJECT_EQUITY_PRINTS, resetOpts);
}
};
const subscription = await subscribeWithReset();
let droppedLate = 0;
let lastLateLog = Date.now();
const loop = async () => {
for await (const msg of subscription.messages) {
try {
const print = EquityPrintSchema.parse(subscription.decode(msg));
metrics.count("candles.prints", 1);
const result = aggregator.ingest(print);
if (result.droppedLate > 0) {
droppedLate += result.droppedLate;
metrics.count("candles.prints_late", result.droppedLate);
const now = Date.now();
if (now - lastLateLog > 5000) {
logger.warn("late equity prints dropped", { dropped: droppedLate });
droppedLate = 0;
lastLateLog = now;
}
}
for (const candle of result.emitted) {
const validated = EquityCandleSchema.parse(candle);
await emitCandle(clickhouse, js, redis, validated, env.CANDLE_CACHE_LIMIT);
}
msg.ack();
} catch (error) {
metrics.count("candles.prints_failed", 1);
logger.error("failed to process equity print", {
error: error instanceof Error ? error.message : String(error)
});
msg.term();
}
}
};
const shutdown = async (signal: string) => {
logger.info("service stopping", { signal });
const remaining = aggregator.drain();
for (const candle of remaining) {
const validated = EquityCandleSchema.parse(candle);
await emitCandle(clickhouse, js, redis, validated, env.CANDLE_CACHE_LIMIT);
}
if (redis && redis.isOpen) {
await redis.quit();
}
await nc.drain();
await clickhouse.close();
process.exit(0);
};
process.on("SIGINT", () => void shutdown("SIGINT"));
process.on("SIGTERM", () => void shutdown("SIGTERM"));
void loop();
};
await run();