Add classifier hits and alerts pipeline

Add NATS subjects + ClickHouse tables for classifier hits/alerts, evaluate sweep/spike rules in compute, expose API/WS endpoints, and cover storage helpers with tests.
This commit is contained in:
dirtydishes 2025-12-29 15:59:37 -05:00
parent ad58c62c37
commit 58485b4d97
11 changed files with 861 additions and 8 deletions

View file

@ -0,0 +1,221 @@
import type { ClassifierHit, FlowPacket } from "@islandflow/types";
type ParsedContract = {
root: string;
expiry: string;
strike: number;
right: "C" | "P";
};
export type ClassifierConfig = {
sweepMinPremium: number;
sweepMinCount: number;
spikeMinPremium: number;
spikeMinSize: number;
};
const clamp = (value: number, min = 0, max = 1): number => {
if (!Number.isFinite(value)) {
return min;
}
return Math.max(min, Math.min(max, value));
};
const formatUsd = (value: number): string => {
if (!Number.isFinite(value)) {
return "$0";
}
return `$${value.toFixed(2)}`;
};
const parseDashedContract = (value: string): ParsedContract | null => {
const parts = value.split("-");
if (parts.length < 6) {
return null;
}
const rightRaw = parts.at(-1) ?? "";
if (rightRaw !== "C" && rightRaw !== "P") {
return null;
}
const strikeRaw = parts.at(-2) ?? "";
const strike = Number(strikeRaw);
const expiryParts = parts.slice(-5, -2);
const expiry = expiryParts.join("-");
const root = parts.slice(0, -5).join("-");
if (!root || !expiry || !Number.isFinite(strike)) {
return null;
}
return {
root,
expiry,
strike,
right: rightRaw
};
};
const parseOccContract = (value: string): ParsedContract | null => {
if (value.length < 15) {
return null;
}
const tail = value.slice(-15);
const root = value.slice(0, -15).trim();
const expiryRaw = tail.slice(0, 6);
const right = tail.slice(6, 7);
const strikeRaw = tail.slice(7);
if (!/^\d{6}$/.test(expiryRaw) || !/^\d{8}$/.test(strikeRaw)) {
return null;
}
if (right !== "C" && right !== "P") {
return null;
}
const year = 2000 + Number(expiryRaw.slice(0, 2));
const month = Number(expiryRaw.slice(2, 4)) - 1;
const day = Number(expiryRaw.slice(4, 6));
const expiryDate = new Date(Date.UTC(year, month, day));
const expiry = expiryDate.toISOString().slice(0, 10);
const strike = Number(strikeRaw) / 1000;
if (!root || !Number.isFinite(strike)) {
return null;
}
return {
root,
expiry,
strike,
right
};
};
const parseContractId = (value: string | undefined): ParsedContract | null => {
if (!value) {
return null;
}
return parseDashedContract(value) ?? parseOccContract(value);
};
const getNumberFeature = (packet: FlowPacket, key: string): number => {
const value = packet.features[key];
return typeof value === "number" && Number.isFinite(value) ? value : 0;
};
const buildSweepHit = (
packet: FlowPacket,
contract: ParsedContract,
direction: "bullish" | "bearish",
config: ClassifierConfig
): ClassifierHit | null => {
const count = getNumberFeature(packet, "count");
const totalPremium = getNumberFeature(packet, "total_premium");
const totalSize = getNumberFeature(packet, "total_size");
const firstPrice = getNumberFeature(packet, "first_price");
const lastPrice = getNumberFeature(packet, "last_price");
const windowMs = getNumberFeature(packet, "window_ms");
if (count < config.sweepMinCount || totalPremium < config.sweepMinPremium) {
return null;
}
const priceDelta = lastPrice - firstPrice;
const priceTrend = priceDelta >= 0 ? "up" : "down";
let confidence = 0.55;
if (priceDelta >= 0) {
confidence += 0.1;
}
if (count >= config.sweepMinCount + 2) {
confidence += 0.1;
}
if (totalPremium >= config.sweepMinPremium * 2) {
confidence += 0.15;
}
confidence = clamp(confidence, 0, 0.95);
return {
classifier_id: direction === "bullish" ? "large_bullish_call_sweep" : "large_bearish_put_sweep",
confidence,
direction,
explanations: [
`Likely ${direction === "bullish" ? "call" : "put"} sweep: ${count} prints in ${Math.round(windowMs)}ms for ${packet.features.option_contract_id ?? packet.id}.`,
`Premium ${formatUsd(totalPremium)} across ${Math.round(totalSize)} contracts; price ${priceTrend}.`,
`Thresholds: >=${config.sweepMinCount} prints and >=${formatUsd(config.sweepMinPremium)} premium.`
]
};
};
const buildSpikeHit = (packet: FlowPacket, config: ClassifierConfig): ClassifierHit | null => {
const count = getNumberFeature(packet, "count");
const totalPremium = getNumberFeature(packet, "total_premium");
const totalSize = getNumberFeature(packet, "total_size");
const windowMs = getNumberFeature(packet, "window_ms");
if (totalSize < config.spikeMinSize || totalPremium < config.spikeMinPremium) {
return null;
}
let confidence = 0.5;
if (totalSize >= config.spikeMinSize * 2) {
confidence += 0.15;
}
if (totalPremium >= config.spikeMinPremium * 2) {
confidence += 0.15;
}
if (count >= 3) {
confidence += 0.1;
}
confidence = clamp(confidence, 0, 0.9);
return {
classifier_id: "unusual_contract_spike",
confidence,
direction: "neutral",
explanations: [
`Unusual contract spike: ${count} prints in ${Math.round(windowMs)}ms for ${packet.features.option_contract_id ?? packet.id}.`,
`Premium ${formatUsd(totalPremium)} across ${Math.round(totalSize)} contracts.`,
`Thresholds: >=${config.spikeMinSize} contracts and >=${formatUsd(config.spikeMinPremium)} premium.`
]
};
};
export const evaluateClassifiers = (
packet: FlowPacket,
config: ClassifierConfig
): ClassifierHit[] => {
const contractId = typeof packet.features.option_contract_id === "string"
? packet.features.option_contract_id
: "";
const contract = parseContractId(contractId);
const hits: ClassifierHit[] = [];
if (contract?.right === "C") {
const hit = buildSweepHit(packet, contract, "bullish", config);
if (hit) {
hits.push(hit);
}
}
if (contract?.right === "P") {
const hit = buildSweepHit(packet, contract, "bearish", config);
if (hit) {
hits.push(hit);
}
}
const spikeHit = buildSpikeHit(packet, config);
if (spikeHit) {
hits.push(spikeHit);
}
return hits;
};

View file

@ -1,8 +1,12 @@
import { readEnv } from "@islandflow/config";
import { createLogger } from "@islandflow/observability";
import {
SUBJECT_ALERTS,
SUBJECT_CLASSIFIER_HITS,
SUBJECT_FLOW_PACKETS,
SUBJECT_OPTION_PRINTS,
STREAM_ALERTS,
STREAM_CLASSIFIER_HITS,
STREAM_FLOW_PACKETS,
STREAM_OPTION_PRINTS,
buildDurableConsumer,
@ -13,11 +17,25 @@ import {
} from "@islandflow/bus";
import {
createClickHouseClient,
ensureAlertsTable,
ensureClassifierHitsTable,
ensureFlowPacketsTable,
insertAlert,
insertClassifierHit,
insertFlowPacket
} from "@islandflow/storage";
import { FlowPacketSchema, OptionPrintSchema, type FlowPacket, type OptionPrint } from "@islandflow/types";
import {
AlertEventSchema,
ClassifierHitEventSchema,
FlowPacketSchema,
OptionPrintSchema,
type AlertEvent,
type ClassifierHitEvent,
type FlowPacket,
type OptionPrint
} from "@islandflow/types";
import { z } from "zod";
import { evaluateClassifiers, type ClassifierConfig } from "./classifiers";
const service = "compute";
const logger = createLogger({ service });
@ -41,11 +59,22 @@ const envSchema = z.object({
}
return value;
}, z.boolean())
.default(false)
.default(false),
CLASSIFIER_SWEEP_MIN_PREMIUM: z.coerce.number().positive().default(50_000),
CLASSIFIER_SWEEP_MIN_COUNT: z.coerce.number().int().positive().default(3),
CLASSIFIER_SPIKE_MIN_PREMIUM: z.coerce.number().positive().default(25_000),
CLASSIFIER_SPIKE_MIN_SIZE: z.coerce.number().int().positive().default(500)
});
const env = readEnv(envSchema);
const classifierConfig: ClassifierConfig = {
sweepMinPremium: env.CLASSIFIER_SWEEP_MIN_PREMIUM,
sweepMinCount: env.CLASSIFIER_SWEEP_MIN_COUNT,
spikeMinPremium: env.CLASSIFIER_SPIKE_MIN_PREMIUM,
spikeMinSize: env.CLASSIFIER_SPIKE_MIN_SIZE
};
const retry = async <T>(
label: string,
attempts: number,
@ -170,6 +199,8 @@ const flushCluster = async (
await insertFlowPacket(clickhouse, validated);
await publishJson(js, SUBJECT_FLOW_PACKETS, validated);
await emitClassifiers(clickhouse, js, validated);
logger.info("emitted flow packet", {
id: validated.id,
contract: cluster.contractId,
@ -177,6 +208,79 @@ const flushCluster = async (
});
};
const scoreAlert = (packet: FlowPacket, hits: ClassifierHitEvent[]): { score: number; severity: string } => {
const premium =
typeof packet.features.total_premium === "number" ? packet.features.total_premium : 0;
const premiumScore = Math.min(70, Math.round(premium / 1000));
const maxConfidence = hits.reduce((max, hit) => Math.max(max, hit.confidence), 0);
const confidenceScore = Math.round(maxConfidence * 20);
const hitScore = Math.min(20, hits.length * 5);
const score = Math.max(0, Math.min(100, premiumScore + confidenceScore + hitScore));
const severity = score >= 80 ? "high" : score >= 45 ? "medium" : "low";
return { score, severity };
};
const emitClassifiers = async (
clickhouse: ReturnType<typeof createClickHouseClient>,
js: Awaited<ReturnType<typeof connectJetStreamWithRetry>>["js"],
packet: FlowPacket
): Promise<void> => {
const hits = evaluateClassifiers(packet, classifierConfig);
if (hits.length === 0) {
return;
}
const hitEvents: ClassifierHitEvent[] = hits.map((hit) =>
ClassifierHitEventSchema.parse({
source_ts: packet.source_ts,
ingest_ts: packet.ingest_ts,
seq: packet.seq,
trace_id: `classifier:${hit.classifier_id}:${packet.id}`,
...hit
})
);
for (const hit of hitEvents) {
try {
await insertClassifierHit(clickhouse, hit);
await publishJson(js, SUBJECT_CLASSIFIER_HITS, hit);
} catch (error) {
logger.error("failed to emit classifier hit", {
error: error instanceof Error ? error.message : String(error),
classifier_id: hit.classifier_id,
packet_id: packet.id
});
}
}
const { score, severity } = scoreAlert(packet, hitEvents);
const alert: AlertEvent = AlertEventSchema.parse({
source_ts: packet.source_ts,
ingest_ts: packet.ingest_ts,
seq: packet.seq,
trace_id: `alert:${packet.id}`,
score,
severity,
hits: hitEvents.map((hit) => ({
classifier_id: hit.classifier_id,
confidence: hit.confidence,
direction: hit.direction,
explanations: hit.explanations
})),
evidence_refs: [packet.id, ...packet.members]
});
try {
await insertAlert(clickhouse, alert);
await publishJson(js, SUBJECT_ALERTS, alert);
} catch (error) {
logger.error("failed to emit alert", {
error: error instanceof Error ? error.message : String(error),
packet_id: packet.id
});
}
};
const flushEligibleClusters = async (
clickhouse: ReturnType<typeof createClickHouseClient>,
js: Awaited<ReturnType<typeof connectJetStreamWithRetry>>["js"],
@ -232,6 +336,32 @@ const run = async () => {
num_replicas: 1
});
await ensureStream(jsm, {
name: STREAM_CLASSIFIER_HITS,
subjects: [SUBJECT_CLASSIFIER_HITS],
retention: "limits",
storage: "file",
discard: "old",
max_msgs_per_subject: -1,
max_msgs: -1,
max_bytes: -1,
max_age: 0,
num_replicas: 1
});
await ensureStream(jsm, {
name: STREAM_ALERTS,
subjects: [SUBJECT_ALERTS],
retention: "limits",
storage: "file",
discard: "old",
max_msgs_per_subject: -1,
max_msgs: -1,
max_bytes: -1,
max_age: 0,
num_replicas: 1
});
const clickhouse = createClickHouseClient({
url: env.CLICKHOUSE_URL,
database: env.CLICKHOUSE_DATABASE
@ -239,6 +369,8 @@ const run = async () => {
await retry("clickhouse table init", 20, 500, async () => {
await ensureFlowPacketsTable(clickhouse);
await ensureClassifierHitsTable(clickhouse);
await ensureAlertsTable(clickhouse);
});
const durableName = "compute-option-prints";