Add dark inference pipeline

This commit is contained in:
dirtydishes 2026-01-04 17:29:21 -05:00
parent 3164167bee
commit ea61c3b013
9 changed files with 699 additions and 1 deletions

View file

@ -0,0 +1,243 @@
import type { EquityPrintJoin, InferredDarkEvent } from "@islandflow/types";
export type DarkInferenceConfig = {
windowMs: number;
cooldownMs: number;
minBlockSize: number;
minAccumulationSize: number;
minAccumulationCount: number;
minPrintSize: number;
maxEvidence: number;
maxSpreadPct: number;
maxQuoteAgeMs: number;
};
type Evidence = {
id: string;
ts: number;
size: number;
placement: string;
offExchange: boolean;
};
export type DarkInferenceState = {
evidenceByUnderlying: Map<string, Evidence[]>;
lastEmittedByUnderlying: Map<string, Record<string, number>>;
};
const clamp01 = (value: number): number => {
if (!Number.isFinite(value)) {
return 0;
}
return Math.max(0, Math.min(1, value));
};
const getNumber = (value: unknown): number | null => {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string") {
const parsed = Number(value);
if (Number.isFinite(parsed)) {
return parsed;
}
}
return null;
};
const getString = (value: unknown): string | null => {
if (typeof value === "string") {
return value;
}
return null;
};
const getBoolean = (value: unknown): boolean | null => {
if (typeof value === "boolean") {
return value;
}
if (typeof value === "number") {
return value !== 0;
}
if (typeof value === "string") {
const normalized = value.trim().toLowerCase();
if (["true", "1", "yes", "on"].includes(normalized)) {
return true;
}
if (["false", "0", "no", "off"].includes(normalized)) {
return false;
}
}
return null;
};
const isBuyPlacement = (placement: string): boolean => {
return placement === "A" || placement === "AA";
};
const isSellPlacement = (placement: string): boolean => {
return placement === "B" || placement === "BB";
};
const getSpreadPct = (features: Record<string, unknown>): number | null => {
const spread = getNumber(features.quote_spread);
const mid = getNumber(features.quote_mid);
if (spread === null || mid === null || mid <= 0) {
return null;
}
return spread / mid;
};
export const createDarkInferenceState = (): DarkInferenceState => {
return {
evidenceByUnderlying: new Map(),
lastEmittedByUnderlying: new Map()
};
};
const shouldEmit = (
state: DarkInferenceState,
underlyingId: string,
type: string,
ts: number,
cooldownMs: number
): boolean => {
const record = state.lastEmittedByUnderlying.get(underlyingId) ?? {};
const last = record[type] ?? -Infinity;
if (ts - last < cooldownMs) {
return false;
}
record[type] = ts;
state.lastEmittedByUnderlying.set(underlyingId, record);
return true;
};
export const evaluateDarkInferences = (
join: EquityPrintJoin,
config: DarkInferenceConfig,
state: DarkInferenceState
): InferredDarkEvent[] => {
const features = join.features ?? {};
const joinQuality = join.join_quality ?? {};
const underlyingId = getString(features.underlying_id);
if (!underlyingId) {
return [];
}
const size = getNumber(features.size);
if (size === null) {
return [];
}
const placement = getString(features.quote_placement) ?? "MISSING";
const offExchange = getBoolean(features.off_exchange_flag) ?? false;
const ts = Number.isFinite(join.source_ts) ? join.source_ts : 0;
const quoteAgeMs = getNumber(joinQuality.quote_age_ms) ?? config.maxQuoteAgeMs + 1;
const quoteMissing = getNumber(joinQuality.quote_missing) === 1;
const quoteStale = getNumber(joinQuality.quote_stale) === 1;
const spreadPct = getSpreadPct(features);
const goodQuality =
!quoteMissing &&
!quoteStale &&
quoteAgeMs <= config.maxQuoteAgeMs &&
(spreadPct === null || spreadPct <= config.maxSpreadPct);
const events: InferredDarkEvent[] = [];
if (
offExchange &&
goodQuality &&
placement === "MID" &&
size >= config.minBlockSize &&
shouldEmit(state, underlyingId, "absorbed_block", ts, config.cooldownMs)
) {
const sizeRatio = Math.min(1, size / (config.minBlockSize * 2));
const spreadScore =
spreadPct === null || spreadPct <= 0 ? 0.5 : Math.max(0, 1 - spreadPct / config.maxSpreadPct);
const confidence = clamp01(0.35 + sizeRatio * 0.45 + spreadScore * 0.2);
events.push({
source_ts: join.source_ts,
ingest_ts: join.ingest_ts,
seq: join.seq,
trace_id: `dark:absorbed_block:${join.id}`,
type: "absorbed_block",
confidence,
evidence_refs: [join.id]
});
}
if (
offExchange &&
goodQuality &&
size >= config.minPrintSize &&
(isBuyPlacement(placement) || isSellPlacement(placement))
) {
const existing = state.evidenceByUnderlying.get(underlyingId) ?? [];
const nextEvidence = [
...existing,
{
id: join.id,
ts,
size,
placement,
offExchange
}
].filter((entry) => ts - entry.ts <= config.windowMs);
state.evidenceByUnderlying.set(underlyingId, nextEvidence);
const buys = nextEvidence.filter((entry) => isBuyPlacement(entry.placement));
const sells = nextEvidence.filter((entry) => isSellPlacement(entry.placement));
const buySize = buys.reduce((sum, entry) => sum + entry.size, 0);
const sellSize = sells.reduce((sum, entry) => sum + entry.size, 0);
if (
buys.length >= config.minAccumulationCount &&
buySize >= config.minAccumulationSize &&
shouldEmit(state, underlyingId, "stealth_accumulation", ts, config.cooldownMs)
) {
const sizeRatio = Math.min(1, buySize / (config.minAccumulationSize * 2));
const countRatio = Math.min(1, buys.length / (config.minAccumulationCount * 2));
const confidence = clamp01(0.3 + sizeRatio * 0.4 + countRatio * 0.3);
const evidence = buys.slice(-config.maxEvidence).map((entry) => entry.id);
events.push({
source_ts: join.source_ts,
ingest_ts: join.ingest_ts,
seq: join.seq,
trace_id: `dark:stealth_accumulation:${underlyingId}:${ts}`,
type: "stealth_accumulation",
confidence,
evidence_refs: evidence
});
}
if (
sells.length >= config.minAccumulationCount &&
sellSize >= config.minAccumulationSize &&
shouldEmit(state, underlyingId, "distribution", ts, config.cooldownMs)
) {
const sizeRatio = Math.min(1, sellSize / (config.minAccumulationSize * 2));
const countRatio = Math.min(1, sells.length / (config.minAccumulationCount * 2));
const confidence = clamp01(0.3 + sizeRatio * 0.4 + countRatio * 0.3);
const evidence = sells.slice(-config.maxEvidence).map((entry) => entry.id);
events.push({
source_ts: join.source_ts,
ingest_ts: join.ingest_ts,
seq: join.seq,
trace_id: `dark:distribution:${underlyingId}:${ts}`,
type: "distribution",
confidence,
evidence_refs: evidence
});
}
}
return events;
};

View file

@ -6,6 +6,7 @@ import {
SUBJECT_EQUITY_JOINS,
SUBJECT_EQUITY_PRINTS,
SUBJECT_EQUITY_QUOTES,
SUBJECT_INFERRED_DARK,
SUBJECT_FLOW_PACKETS,
SUBJECT_OPTION_NBBO,
SUBJECT_OPTION_PRINTS,
@ -14,6 +15,7 @@ import {
STREAM_EQUITY_JOINS,
STREAM_EQUITY_PRINTS,
STREAM_EQUITY_QUOTES,
STREAM_INFERRED_DARK,
STREAM_FLOW_PACKETS,
STREAM_OPTION_NBBO,
STREAM_OPTION_PRINTS,
@ -28,10 +30,12 @@ import {
ensureAlertsTable,
ensureClassifierHitsTable,
ensureEquityPrintJoinsTable,
ensureInferredDarkTable,
ensureFlowPacketsTable,
insertAlert,
insertClassifierHit,
insertEquityPrintJoin,
insertInferredDark,
insertFlowPacket
} from "@islandflow/storage";
import {
@ -40,6 +44,7 @@ import {
EquityPrintJoinSchema,
EquityPrintSchema,
EquityQuoteSchema,
InferredDarkEventSchema,
FlowPacketSchema,
OptionNBBOSchema,
OptionPrintSchema,
@ -48,6 +53,7 @@ import {
type EquityPrint,
type EquityQuote,
type EquityPrintJoin,
type InferredDarkEvent,
type FlowPacket,
type OptionNBBO,
type OptionPrint
@ -55,6 +61,11 @@ import {
import { z } from "zod";
import { evaluateClassifiers, type ClassifierConfig } from "./classifiers";
import { parseContractId } from "./contracts";
import {
createDarkInferenceState,
evaluateDarkInferences,
type DarkInferenceConfig
} from "./dark-inference";
import { buildEquityPrintJoin, type EquityQuoteJoin } from "./equity-joins";
import { createRedisClient, updateRollingStats, type RollingStatsConfig } from "./rolling-stats";
import { summarizeStructure, type ContractLeg } from "./structures";
@ -87,6 +98,14 @@ const envSchema = z.object({
.default(false),
NBBO_MAX_AGE_MS: z.coerce.number().int().positive().default(1000),
EQUITY_QUOTE_MAX_AGE_MS: z.coerce.number().int().positive().default(1000),
DARK_INFER_WINDOW_MS: z.coerce.number().int().positive().default(60000),
DARK_INFER_COOLDOWN_MS: z.coerce.number().int().nonnegative().default(30000),
DARK_INFER_MIN_BLOCK_SIZE: z.coerce.number().int().positive().default(2000),
DARK_INFER_MIN_ACCUM_SIZE: z.coerce.number().int().positive().default(3000),
DARK_INFER_MIN_ACCUM_COUNT: z.coerce.number().int().positive().default(4),
DARK_INFER_MIN_PRINT_SIZE: z.coerce.number().int().positive().default(200),
DARK_INFER_MAX_EVIDENCE: z.coerce.number().int().positive().default(20),
DARK_INFER_MAX_SPREAD_PCT: z.coerce.number().positive().default(0.005),
CLASSIFIER_SWEEP_MIN_PREMIUM: z.coerce.number().positive().default(40_000),
CLASSIFIER_SWEEP_MIN_COUNT: z.coerce.number().int().positive().default(3),
CLASSIFIER_SWEEP_MIN_PREMIUM_Z: z.coerce.number().nonnegative().default(2),
@ -114,6 +133,18 @@ const classifierConfig: ClassifierConfig = {
minAggressorRatio: env.CLASSIFIER_MIN_AGGRESSOR_RATIO
};
const darkInferenceConfig: DarkInferenceConfig = {
windowMs: env.DARK_INFER_WINDOW_MS,
cooldownMs: env.DARK_INFER_COOLDOWN_MS,
minBlockSize: env.DARK_INFER_MIN_BLOCK_SIZE,
minAccumulationSize: env.DARK_INFER_MIN_ACCUM_SIZE,
minAccumulationCount: env.DARK_INFER_MIN_ACCUM_COUNT,
minPrintSize: env.DARK_INFER_MIN_PRINT_SIZE,
maxEvidence: env.DARK_INFER_MAX_EVIDENCE,
maxSpreadPct: env.DARK_INFER_MAX_SPREAD_PCT,
maxQuoteAgeMs: env.EQUITY_QUOTE_MAX_AGE_MS
};
const retry = async <T>(
label: string,
attempts: number,
@ -178,6 +209,7 @@ type ClusterState = {
const clusters = new Map<string, ClusterState>();
const nbboCache = new Map<string, OptionNBBO>();
const equityQuoteCache = new Map<string, EquityQuote>();
const darkInferenceState = createDarkInferenceState();
const recentLegsByKey = new Map<string, ContractLeg[]>();
const MAX_RECENT_LEGS = 20;
@ -658,12 +690,43 @@ const emitEquityJoin = async (
try {
await insertEquityPrintJoin(clickhouse, payload);
await publishJson(js, SUBJECT_EQUITY_JOINS, payload);
} catch (error) {
logger.error("failed to emit equity print join", {
error: error instanceof Error ? error.message : String(error),
trace_id: payload.trace_id
});
return;
}
try {
await publishJson(js, SUBJECT_EQUITY_JOINS, payload);
} catch (error) {
logger.error("failed to publish equity print join", {
error: error instanceof Error ? error.message : String(error),
trace_id: payload.trace_id
});
}
await emitDarkInferences(clickhouse, js, payload);
};
const emitDarkInferences = async (
clickhouse: ReturnType<typeof createClickHouseClient>,
js: Awaited<ReturnType<typeof connectJetStreamWithRetry>>["js"],
join: EquityPrintJoin
): Promise<void> => {
const events = evaluateDarkInferences(join, darkInferenceConfig, darkInferenceState);
for (const event of events) {
const validated: InferredDarkEvent = InferredDarkEventSchema.parse(event);
try {
await insertInferredDark(clickhouse, validated);
await publishJson(js, SUBJECT_INFERRED_DARK, validated);
} catch (error) {
logger.error("failed to emit inferred dark event", {
error: error instanceof Error ? error.message : String(error),
trace_id: validated.trace_id
});
}
}
};
@ -776,6 +839,19 @@ const run = async () => {
num_replicas: 1
});
await ensureStream(jsm, {
name: STREAM_INFERRED_DARK,
subjects: [SUBJECT_INFERRED_DARK],
retention: "limits",
storage: "file",
discard: "old",
max_msgs_per_subject: -1,
max_msgs: -1,
max_bytes: -1,
max_age: 0,
num_replicas: 1
});
await ensureStream(jsm, {
name: STREAM_CLASSIFIER_HITS,
subjects: [SUBJECT_CLASSIFIER_HITS],
@ -824,6 +900,7 @@ const run = async () => {
await retry("clickhouse table init", 20, 500, async () => {
await ensureFlowPacketsTable(clickhouse);
await ensureEquityPrintJoinsTable(clickhouse);
await ensureInferredDarkTable(clickhouse);
await ensureClassifierHitsTable(clickhouse);
await ensureAlertsTable(clickhouse);
});

View file

@ -0,0 +1,119 @@
import { describe, expect, it } from "bun:test";
import {
createDarkInferenceState,
evaluateDarkInferences,
type DarkInferenceConfig
} from "../src/dark-inference";
const config: DarkInferenceConfig = {
windowMs: 60_000,
cooldownMs: 30_000,
minBlockSize: 1000,
minAccumulationSize: 2000,
minAccumulationCount: 3,
minPrintSize: 200,
maxEvidence: 5,
maxSpreadPct: 0.01,
maxQuoteAgeMs: 1000
};
const baseJoin = {
source_ts: 1_000,
ingest_ts: 1_010,
seq: 1,
trace_id: "equityjoin:print-1",
id: "equityjoin:print-1",
print_trace_id: "print-1",
quote_trace_id: "quote-1",
features: {
underlying_id: "SPY",
price: 100,
size: 1200,
off_exchange_flag: true,
print_ts: 1_000,
quote_placement: "MID",
quote_mid: 100,
quote_spread: 0.1
},
join_quality: {
quote_age_ms: 5
}
};
describe("dark inference rules", () => {
it("emits absorbed block on large off-exchange mid prints", () => {
const state = createDarkInferenceState();
const events = evaluateDarkInferences(baseJoin, config, state);
expect(events).toHaveLength(1);
expect(events[0].type).toBe("absorbed_block");
expect(events[0].evidence_refs).toEqual([baseJoin.id]);
});
it("skips absorbed block when quote is stale", () => {
const state = createDarkInferenceState();
const staleJoin = {
...baseJoin,
join_quality: {
quote_age_ms: 5000,
quote_stale: 1
}
};
const events = evaluateDarkInferences(staleJoin, config, state);
expect(events).toHaveLength(0);
});
it("emits stealth accumulation on repeated buy placements", () => {
const state = createDarkInferenceState();
const joins = [0, 1, 2].map((offset) => ({
...baseJoin,
id: `equityjoin:buy-${offset}`,
trace_id: `equityjoin:buy-${offset}`,
seq: 10 + offset,
source_ts: 2_000 + offset * 500,
features: {
...baseJoin.features,
size: 800,
quote_placement: "A"
}
}));
const events = joins.flatMap((join) => evaluateDarkInferences(join, config, state));
const accumulation = events.find((event) => event.type === "stealth_accumulation");
expect(accumulation).toBeDefined();
expect(accumulation?.evidence_refs.length).toBeGreaterThan(0);
});
it("emits distribution on repeated sell placements", () => {
const state = createDarkInferenceState();
const joins = [0, 1, 2].map((offset) => ({
...baseJoin,
id: `equityjoin:sell-${offset}`,
trace_id: `equityjoin:sell-${offset}`,
seq: 20 + offset,
source_ts: 3_000 + offset * 500,
features: {
...baseJoin.features,
size: 900,
quote_placement: "B"
}
}));
const events = joins.flatMap((join) => evaluateDarkInferences(join, config, state));
const distribution = events.find((event) => event.type === "distribution");
expect(distribution).toBeDefined();
expect(distribution?.evidence_refs.length).toBeGreaterThan(0);
});
it("respects cooldown windows", () => {
const state = createDarkInferenceState();
const first = evaluateDarkInferences(baseJoin, config, state);
const second = evaluateDarkInferences(
{ ...baseJoin, source_ts: baseJoin.source_ts + 1_000, seq: baseJoin.seq + 1 },
config,
state
);
expect(first.length).toBeGreaterThan(0);
expect(second.find((event) => event.type === "absorbed_block")).toBeUndefined();
});
});