Add flow packet clustering

This commit is contained in:
dirtydishes 2025-12-27 20:25:32 -05:00
parent a21d513f32
commit 6c376b26dc
8 changed files with 347 additions and 11 deletions

View file

@ -13,7 +13,9 @@ import {
import {
createClickHouseClient,
ensureEquityPrintsTable,
ensureFlowPacketsTable,
ensureOptionPrintsTable,
fetchRecentFlowPackets,
fetchEquityPrintsAfter,
fetchRecentEquityPrints,
fetchOptionPrintsAfter,
@ -141,6 +143,7 @@ const run = async () => {
await ensureOptionPrintsTable(clickhouse);
await ensureEquityPrintsTable(clickhouse);
await ensureFlowPacketsTable(clickhouse);
const optionSubscription = await subscribeJson(
js,
@ -208,6 +211,12 @@ const run = async () => {
return jsonResponse({ data });
}
if (req.method === "GET" && url.pathname === "/flow/packets") {
const limit = parseLimit(url.searchParams.get("limit"));
const data = await fetchRecentFlowPackets(clickhouse, limit);
return jsonResponse({ data });
}
if (req.method === "GET" && url.pathname === "/replay/options") {
const { afterTs, afterSeq, limit } = parseReplayParams(url);
const data = await fetchOptionPrintsAfter(clickhouse, afterTs, afterSeq, limit);

View file

@ -9,6 +9,7 @@
"@islandflow/bus": "workspace:*",
"@islandflow/config": "workspace:*",
"@islandflow/observability": "workspace:*",
"@islandflow/storage": "workspace:*",
"@islandflow/types": "workspace:*",
"zod": "^3.23.8"
}

View file

@ -1,25 +1,137 @@
import { readEnv } from "@islandflow/config";
import { createLogger } from "@islandflow/observability";
import {
SUBJECT_FLOW_PACKETS,
SUBJECT_OPTION_PRINTS,
STREAM_FLOW_PACKETS,
STREAM_OPTION_PRINTS,
buildDurableConsumer,
connectJetStreamWithRetry,
ensureStream,
publishJson,
subscribeJson
} from "@islandflow/bus";
import { OptionPrintSchema } from "@islandflow/types";
import {
createClickHouseClient,
ensureFlowPacketsTable,
insertFlowPacket
} from "@islandflow/storage";
import { FlowPacketSchema, OptionPrintSchema, type FlowPacket, type OptionPrint } from "@islandflow/types";
import { z } from "zod";
const service = "compute";
const logger = createLogger({ service });
const envSchema = z.object({
NATS_URL: z.string().default("nats://localhost:4222")
NATS_URL: z.string().default("nats://localhost:4222"),
CLICKHOUSE_URL: z.string().default("http://localhost:8123"),
CLICKHOUSE_DATABASE: z.string().default("default"),
CLUSTER_WINDOW_MS: z.coerce.number().int().positive().default(500)
});
const env = readEnv(envSchema);
type ClusterState = {
contractId: string;
startTs: number;
endTs: number;
startSourceTs: number;
endIngestTs: number;
endSeq: number;
members: string[];
totalSize: number;
totalPremium: number;
firstPrice: number;
lastPrice: number;
};
const clusters = new Map<string, ClusterState>();
const buildCluster = (print: OptionPrint): ClusterState => {
return {
contractId: print.option_contract_id,
startTs: print.ts,
endTs: print.ts,
startSourceTs: print.source_ts,
endIngestTs: print.ingest_ts,
endSeq: print.seq,
members: [print.trace_id],
totalSize: print.size,
totalPremium: print.price * print.size,
firstPrice: print.price,
lastPrice: print.price
};
};
const updateCluster = (cluster: ClusterState, print: OptionPrint): ClusterState => {
cluster.endTs = Math.max(cluster.endTs, print.ts);
cluster.endIngestTs = Math.max(cluster.endIngestTs, print.ingest_ts);
cluster.endSeq = Math.max(cluster.endSeq, print.seq);
cluster.members.push(print.trace_id);
cluster.totalSize += print.size;
cluster.totalPremium += print.price * print.size;
cluster.lastPrice = print.price;
return cluster;
};
const flushCluster = async (
clickhouse: ReturnType<typeof createClickHouseClient>,
js: Awaited<ReturnType<typeof connectJetStreamWithRetry>>["js"],
cluster: ClusterState
): Promise<void> => {
const features = {
option_contract_id: cluster.contractId,
count: cluster.members.length,
total_size: cluster.totalSize,
total_premium: Number(cluster.totalPremium.toFixed(4)),
first_price: cluster.firstPrice,
last_price: cluster.lastPrice,
start_ts: cluster.startTs,
end_ts: cluster.endTs,
window_ms: env.CLUSTER_WINDOW_MS
};
const packet: FlowPacket = {
source_ts: cluster.startSourceTs,
ingest_ts: cluster.endIngestTs,
seq: cluster.endSeq,
trace_id: `flowpacket:${cluster.contractId}:${cluster.startTs}:${cluster.endTs}`,
id: `flowpacket:${cluster.contractId}:${cluster.startTs}:${cluster.endTs}`,
members: cluster.members,
features,
join_quality: {}
};
const validated = FlowPacketSchema.parse(packet);
await insertFlowPacket(clickhouse, validated);
await publishJson(js, SUBJECT_FLOW_PACKETS, validated);
logger.info("emitted flow packet", {
id: validated.id,
contract: cluster.contractId,
count: cluster.members.length
});
};
const flushEligibleClusters = async (
clickhouse: ReturnType<typeof createClickHouseClient>,
js: Awaited<ReturnType<typeof connectJetStreamWithRetry>>["js"],
currentTs: number,
skipContractId: string
): Promise<void> => {
for (const [contractId, cluster] of clusters) {
if (contractId === skipContractId) {
continue;
}
if (currentTs - cluster.endTs > env.CLUSTER_WINDOW_MS) {
clusters.delete(contractId);
await flushCluster(clickhouse, js, cluster);
}
}
};
const run = async () => {
logger.info("service starting");
@ -44,13 +156,42 @@ const run = async () => {
num_replicas: 1
});
const opts = buildDurableConsumer("compute-option-prints");
await ensureStream(jsm, {
name: STREAM_FLOW_PACKETS,
subjects: [SUBJECT_FLOW_PACKETS],
retention: "limits",
storage: "file",
discard: "old",
max_msgs_per_subject: -1,
max_msgs: -1,
max_bytes: -1,
max_age: 0,
num_replicas: 1
});
const subscription = await subscribeJson(js, SUBJECT_OPTION_PRINTS, opts);
const clickhouse = createClickHouseClient({
url: env.CLICKHOUSE_URL,
database: env.CLICKHOUSE_DATABASE
});
await ensureFlowPacketsTable(clickhouse);
const subscription = await subscribeJson(
js,
SUBJECT_OPTION_PRINTS,
buildDurableConsumer("compute-option-prints")
);
const shutdown = async (signal: string) => {
logger.info("service stopping", { signal });
for (const cluster of clusters.values()) {
await flushCluster(clickhouse, js, cluster);
}
clusters.clear();
await nc.drain();
await clickhouse.close();
process.exit(0);
};
@ -60,11 +201,19 @@ const run = async () => {
for await (const msg of subscription.messages) {
try {
const print = OptionPrintSchema.parse(subscription.decode(msg));
logger.info("received option print", {
trace_id: print.trace_id,
seq: print.seq,
option_contract_id: print.option_contract_id
});
await flushEligibleClusters(clickhouse, js, print.ts, print.option_contract_id);
const existing = clusters.get(print.option_contract_id);
if (!existing) {
clusters.set(print.option_contract_id, buildCluster(print));
} else if (print.ts - existing.startTs <= env.CLUSTER_WINDOW_MS) {
updateCluster(existing, print);
} else {
clusters.delete(print.option_contract_id);
await flushCluster(clickhouse, js, existing);
clusters.set(print.option_contract_id, buildCluster(print));
}
msg.ack();
} catch (error) {
logger.error("failed to process option print", {