Add smart money replay evaluation harness
This commit is contained in:
parent
de6d25f046
commit
19a499d33c
4 changed files with 398 additions and 3 deletions
242
services/compute/src/smart-money-evaluation.ts
Normal file
242
services/compute/src/smart-money-evaluation.ts
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
import type { FlowPacket, SmartMoneyDirection, SmartMoneyEvent, SmartMoneyProfileId } from "@islandflow/types";
|
||||
import { buildSmartMoneyEventFromPacket, type SmartMoneyParentEventOptions } from "./parent-events";
|
||||
|
||||
export type SmartMoneyLabel = {
|
||||
event_id: string;
|
||||
profile_id: SmartMoneyProfileId | null;
|
||||
direction?: Exclude<SmartMoneyDirection, "unknown">;
|
||||
realized_return_bps?: number;
|
||||
};
|
||||
|
||||
export type ReplayConsistencyMismatch = {
|
||||
event_id: string;
|
||||
field: "missing_live" | "missing_batch" | "signature";
|
||||
live?: SmartMoneyEventSignature;
|
||||
batch?: SmartMoneyEventSignature;
|
||||
};
|
||||
|
||||
export type ReplayConsistencyReport = {
|
||||
live_count: number;
|
||||
batch_count: number;
|
||||
matched_count: number;
|
||||
mismatches: ReplayConsistencyMismatch[];
|
||||
consistent: boolean;
|
||||
};
|
||||
|
||||
export type SmartMoneyEventSignature = {
|
||||
event_id: string;
|
||||
primary_profile_id: SmartMoneyProfileId | null;
|
||||
primary_direction: SmartMoneyDirection;
|
||||
abstained: boolean;
|
||||
suppressed_reasons: string[];
|
||||
profile_scores: Array<{
|
||||
profile_id: SmartMoneyProfileId;
|
||||
probability: number;
|
||||
confidence_band: SmartMoneyEvent["profile_scores"][number]["confidence_band"];
|
||||
direction: SmartMoneyDirection;
|
||||
}>;
|
||||
};
|
||||
|
||||
export type CalibrationBucket = {
|
||||
min_probability: number;
|
||||
max_probability: number;
|
||||
count: number;
|
||||
average_probability: number;
|
||||
accuracy: number | null;
|
||||
};
|
||||
|
||||
export type SmartMoneyEvaluationReport = {
|
||||
sample_count: number;
|
||||
labeled_count: number;
|
||||
emitted_count: number;
|
||||
abstained_count: number;
|
||||
abstention_rate: number;
|
||||
profile_precision: Partial<Record<SmartMoneyProfileId, number | null>>;
|
||||
profile_recall: Partial<Record<SmartMoneyProfileId, number | null>>;
|
||||
calibration: CalibrationBucket[];
|
||||
economic_sanity: {
|
||||
directional_count: number;
|
||||
direction_hit_rate: number | null;
|
||||
average_signed_return_bps: number | null;
|
||||
};
|
||||
};
|
||||
|
||||
const PROFILES: SmartMoneyProfileId[] = [
|
||||
"institutional_directional",
|
||||
"retail_whale",
|
||||
"event_driven",
|
||||
"vol_seller",
|
||||
"arbitrage",
|
||||
"hedge_reactive"
|
||||
];
|
||||
|
||||
const directionalSign = (direction: SmartMoneyDirection): number => {
|
||||
if (direction === "bullish") {
|
||||
return 1;
|
||||
}
|
||||
if (direction === "bearish") {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
const round = (value: number, digits = 4): number => {
|
||||
if (!Number.isFinite(value)) {
|
||||
return 0;
|
||||
}
|
||||
return Number(value.toFixed(digits));
|
||||
};
|
||||
|
||||
export const smartMoneyEventSignature = (event: SmartMoneyEvent): SmartMoneyEventSignature => ({
|
||||
event_id: event.event_id,
|
||||
primary_profile_id: event.primary_profile_id,
|
||||
primary_direction: event.primary_direction,
|
||||
abstained: event.abstained,
|
||||
suppressed_reasons: [...event.suppressed_reasons].sort(),
|
||||
profile_scores: event.profile_scores.map((entry) => ({
|
||||
profile_id: entry.profile_id,
|
||||
probability: round(entry.probability, 6),
|
||||
confidence_band: entry.confidence_band,
|
||||
direction: entry.direction
|
||||
}))
|
||||
});
|
||||
|
||||
export const buildSmartMoneyEventsForReplay = (
|
||||
packets: FlowPacket[],
|
||||
optionsByPacketId: Record<string, SmartMoneyParentEventOptions | undefined> = {}
|
||||
): SmartMoneyEvent[] => {
|
||||
return packets
|
||||
.slice()
|
||||
.sort((a, b) => a.source_ts - b.source_ts || a.seq - b.seq || a.id.localeCompare(b.id))
|
||||
.map((packet) => buildSmartMoneyEventFromPacket(packet, optionsByPacketId[packet.id]));
|
||||
};
|
||||
|
||||
export const compareSmartMoneyReplayOutputs = (
|
||||
liveEvents: SmartMoneyEvent[],
|
||||
batchEvents: SmartMoneyEvent[]
|
||||
): ReplayConsistencyReport => {
|
||||
const liveById = new Map(liveEvents.map((event) => [event.event_id, smartMoneyEventSignature(event)]));
|
||||
const batchById = new Map(batchEvents.map((event) => [event.event_id, smartMoneyEventSignature(event)]));
|
||||
const ids = [...new Set([...liveById.keys(), ...batchById.keys()])].sort();
|
||||
const mismatches: ReplayConsistencyMismatch[] = [];
|
||||
|
||||
for (const id of ids) {
|
||||
const live = liveById.get(id);
|
||||
const batch = batchById.get(id);
|
||||
if (!live) {
|
||||
mismatches.push({ event_id: id, field: "missing_live", batch });
|
||||
continue;
|
||||
}
|
||||
if (!batch) {
|
||||
mismatches.push({ event_id: id, field: "missing_batch", live });
|
||||
continue;
|
||||
}
|
||||
if (JSON.stringify(live) !== JSON.stringify(batch)) {
|
||||
mismatches.push({ event_id: id, field: "signature", live, batch });
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
live_count: liveEvents.length,
|
||||
batch_count: batchEvents.length,
|
||||
matched_count: ids.length - mismatches.length,
|
||||
mismatches,
|
||||
consistent: mismatches.length === 0
|
||||
};
|
||||
};
|
||||
|
||||
export const evaluateSmartMoneyEvents = (
|
||||
events: SmartMoneyEvent[],
|
||||
labels: SmartMoneyLabel[],
|
||||
bucketCount = 5
|
||||
): SmartMoneyEvaluationReport => {
|
||||
const labelsById = new Map(labels.map((label) => [label.event_id, label]));
|
||||
const labeledEvents = events
|
||||
.map((event) => ({ event, label: labelsById.get(event.event_id) }))
|
||||
.filter((entry): entry is { event: SmartMoneyEvent; label: SmartMoneyLabel } => Boolean(entry.label));
|
||||
|
||||
const emitted = events.filter((event) => !event.abstained && event.primary_profile_id);
|
||||
const profilePrecision: SmartMoneyEvaluationReport["profile_precision"] = {};
|
||||
const profileRecall: SmartMoneyEvaluationReport["profile_recall"] = {};
|
||||
|
||||
for (const profile of PROFILES) {
|
||||
const predicted = labeledEvents.filter((entry) => entry.event.primary_profile_id === profile);
|
||||
const actual = labeledEvents.filter((entry) => entry.label.profile_id === profile);
|
||||
const truePositive = predicted.filter((entry) => entry.label.profile_id === profile).length;
|
||||
profilePrecision[profile] = predicted.length > 0 ? round(truePositive / predicted.length) : null;
|
||||
profileRecall[profile] = actual.length > 0 ? round(truePositive / actual.length) : null;
|
||||
}
|
||||
|
||||
const calibration = buildCalibration(labeledEvents, Math.max(1, Math.floor(bucketCount)));
|
||||
const economic = buildEconomicSanity(labeledEvents);
|
||||
|
||||
return {
|
||||
sample_count: events.length,
|
||||
labeled_count: labeledEvents.length,
|
||||
emitted_count: emitted.length,
|
||||
abstained_count: events.filter((event) => event.abstained).length,
|
||||
abstention_rate: events.length > 0 ? round(events.filter((event) => event.abstained).length / events.length) : 0,
|
||||
profile_precision: profilePrecision,
|
||||
profile_recall: profileRecall,
|
||||
calibration,
|
||||
economic_sanity: economic
|
||||
};
|
||||
};
|
||||
|
||||
const buildCalibration = (
|
||||
entries: Array<{ event: SmartMoneyEvent; label: SmartMoneyLabel }>,
|
||||
bucketCount: number
|
||||
): CalibrationBucket[] => {
|
||||
const buckets = Array.from({ length: bucketCount }, (_, index) => ({
|
||||
min_probability: round(index / bucketCount),
|
||||
max_probability: round((index + 1) / bucketCount),
|
||||
probabilities: [] as number[],
|
||||
correct: 0
|
||||
}));
|
||||
|
||||
for (const { event, label } of entries) {
|
||||
const probability = event.profile_scores.find((entry) => entry.profile_id === event.primary_profile_id)?.probability ?? 0;
|
||||
const index = Math.min(bucketCount - 1, Math.floor(probability * bucketCount));
|
||||
buckets[index].probabilities.push(probability);
|
||||
if (!event.abstained && event.primary_profile_id === label.profile_id) {
|
||||
buckets[index].correct += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return buckets.map((bucket) => ({
|
||||
min_probability: bucket.min_probability,
|
||||
max_probability: bucket.max_probability,
|
||||
count: bucket.probabilities.length,
|
||||
average_probability:
|
||||
bucket.probabilities.length > 0
|
||||
? round(bucket.probabilities.reduce((sum, value) => sum + value, 0) / bucket.probabilities.length)
|
||||
: 0,
|
||||
accuracy: bucket.probabilities.length > 0 ? round(bucket.correct / bucket.probabilities.length) : null
|
||||
}));
|
||||
};
|
||||
|
||||
const buildEconomicSanity = (
|
||||
entries: Array<{ event: SmartMoneyEvent; label: SmartMoneyLabel }>
|
||||
): SmartMoneyEvaluationReport["economic_sanity"] => {
|
||||
const directional = entries
|
||||
.map(({ event, label }) => ({
|
||||
sign: directionalSign(event.primary_direction),
|
||||
realized: label.realized_return_bps
|
||||
}))
|
||||
.filter((entry): entry is { sign: number; realized: number } => entry.sign !== 0 && Number.isFinite(entry.realized));
|
||||
|
||||
if (directional.length === 0) {
|
||||
return {
|
||||
directional_count: 0,
|
||||
direction_hit_rate: null,
|
||||
average_signed_return_bps: null
|
||||
};
|
||||
}
|
||||
|
||||
const signedReturns = directional.map((entry) => entry.sign * entry.realized);
|
||||
return {
|
||||
directional_count: directional.length,
|
||||
direction_hit_rate: round(signedReturns.filter((value) => value > 0).length / directional.length),
|
||||
average_signed_return_bps: round(signedReturns.reduce((sum, value) => sum + value, 0) / signedReturns.length, 2)
|
||||
};
|
||||
};
|
||||
153
services/compute/tests/smart-money-evaluation.test.ts
Normal file
153
services/compute/tests/smart-money-evaluation.test.ts
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
import { describe, expect, it } from "bun:test";
|
||||
import { buildSmartMoneyEventFromPacket } from "../src/parent-events";
|
||||
import {
|
||||
buildSmartMoneyEventsForReplay,
|
||||
compareSmartMoneyReplayOutputs,
|
||||
evaluateSmartMoneyEvents
|
||||
} from "../src/smart-money-evaluation";
|
||||
import { buildFlowPacket } from "./helpers";
|
||||
|
||||
const institutionalPacket = buildFlowPacket({
|
||||
id: "flowpacket:eval-institutional",
|
||||
seq: 2,
|
||||
source_ts: Date.parse("2025-01-15T15:00:01Z"),
|
||||
features: {
|
||||
option_contract_id: "SPY-2025-02-21-450-C",
|
||||
underlying_id: "SPY",
|
||||
count: 8,
|
||||
window_ms: 450,
|
||||
total_size: 2200,
|
||||
total_premium: 180_000,
|
||||
total_notional: 18_000_000,
|
||||
nbbo_coverage_ratio: 0.92,
|
||||
nbbo_aggressive_ratio: 0.82,
|
||||
nbbo_aggressive_buy_ratio: 0.78,
|
||||
nbbo_aggressive_sell_ratio: 0.04,
|
||||
nbbo_inside_ratio: 0.08,
|
||||
underlying_mid: 448
|
||||
}
|
||||
});
|
||||
|
||||
const eventDrivenPacket = buildFlowPacket({
|
||||
id: "flowpacket:eval-event-driven",
|
||||
seq: 1,
|
||||
source_ts: Date.parse("2025-01-15T15:00:00Z"),
|
||||
features: {
|
||||
option_contract_id: "AAPL-2025-02-07-225-C",
|
||||
underlying_id: "AAPL",
|
||||
count: 1,
|
||||
window_ms: 450,
|
||||
total_size: 1800,
|
||||
total_premium: 160_000,
|
||||
total_notional: 16_000_000,
|
||||
nbbo_coverage_ratio: 0.5,
|
||||
nbbo_aggressive_ratio: 0.4,
|
||||
nbbo_aggressive_buy_ratio: 0.4,
|
||||
nbbo_aggressive_sell_ratio: 0.1,
|
||||
nbbo_inside_ratio: 0.08,
|
||||
underlying_mid: 224
|
||||
}
|
||||
});
|
||||
|
||||
const stalePacket = buildFlowPacket({
|
||||
id: "flowpacket:eval-stale",
|
||||
seq: 3,
|
||||
source_ts: Date.parse("2025-01-15T15:00:02Z"),
|
||||
features: {
|
||||
option_contract_id: "SPY-2025-02-21-450-C",
|
||||
underlying_id: "SPY",
|
||||
count: 8,
|
||||
window_ms: 450,
|
||||
total_size: 2200,
|
||||
total_premium: 180_000,
|
||||
nbbo_coverage_ratio: 0.1,
|
||||
nbbo_missing_count: 8
|
||||
}
|
||||
});
|
||||
|
||||
const calendarOptions = {
|
||||
"flowpacket:eval-event-driven": {
|
||||
eventCalendarMatch: {
|
||||
underlying_id: "AAPL",
|
||||
event_ts: Date.parse("2025-01-31T21:00:00Z"),
|
||||
event_kind: "earnings",
|
||||
announced_ts: Date.parse("2024-12-20T21:00:00Z"),
|
||||
days_to_event: 16.25
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
describe("smart money evaluation utilities", () => {
|
||||
it("compares replay-style live and batch outputs with stable event signatures", () => {
|
||||
const liveEvents = [institutionalPacket, eventDrivenPacket, stalePacket].map((packet) =>
|
||||
buildSmartMoneyEventFromPacket(packet, calendarOptions[packet.id])
|
||||
);
|
||||
const batchEvents = buildSmartMoneyEventsForReplay(
|
||||
[stalePacket, institutionalPacket, eventDrivenPacket],
|
||||
calendarOptions
|
||||
);
|
||||
|
||||
const report = compareSmartMoneyReplayOutputs(liveEvents, batchEvents);
|
||||
expect(report.consistent).toBe(true);
|
||||
expect(report.live_count).toBe(3);
|
||||
expect(report.batch_count).toBe(3);
|
||||
expect(report.matched_count).toBe(3);
|
||||
expect(report.mismatches).toEqual([]);
|
||||
});
|
||||
|
||||
it("reports signature mismatches when live and batch scoring diverge", () => {
|
||||
const liveEvent = buildSmartMoneyEventFromPacket(institutionalPacket);
|
||||
const batchEvent = {
|
||||
...liveEvent,
|
||||
primary_profile_id: "retail_whale" as const
|
||||
};
|
||||
|
||||
const report = compareSmartMoneyReplayOutputs([liveEvent], [batchEvent]);
|
||||
expect(report.consistent).toBe(false);
|
||||
expect(report.mismatches).toHaveLength(1);
|
||||
expect(report.mismatches[0]?.field).toBe("signature");
|
||||
});
|
||||
|
||||
it("summarizes precision, recall, calibration, abstention rate, and economic sanity", () => {
|
||||
const events = buildSmartMoneyEventsForReplay(
|
||||
[institutionalPacket, eventDrivenPacket, stalePacket],
|
||||
calendarOptions
|
||||
);
|
||||
const report = evaluateSmartMoneyEvents(
|
||||
events,
|
||||
[
|
||||
{
|
||||
event_id: "smartmoney:single_leg_event:flowpacket:eval-institutional",
|
||||
profile_id: "institutional_directional",
|
||||
direction: "bullish",
|
||||
realized_return_bps: 42
|
||||
},
|
||||
{
|
||||
event_id: "smartmoney:single_leg_event:flowpacket:eval-event-driven",
|
||||
profile_id: "event_driven",
|
||||
direction: "bullish",
|
||||
realized_return_bps: 18
|
||||
},
|
||||
{
|
||||
event_id: "smartmoney:single_leg_event:flowpacket:eval-stale",
|
||||
profile_id: null,
|
||||
realized_return_bps: -12
|
||||
}
|
||||
],
|
||||
4
|
||||
);
|
||||
|
||||
expect(report.sample_count).toBe(3);
|
||||
expect(report.labeled_count).toBe(3);
|
||||
expect(report.emitted_count).toBe(2);
|
||||
expect(report.abstained_count).toBe(1);
|
||||
expect(report.abstention_rate).toBeCloseTo(1 / 3);
|
||||
expect(report.profile_precision.institutional_directional).toBe(1);
|
||||
expect(report.profile_recall.event_driven).toBe(1);
|
||||
expect(report.calibration).toHaveLength(4);
|
||||
expect(report.calibration.reduce((sum, bucket) => sum + bucket.count, 0)).toBe(3);
|
||||
expect(report.economic_sanity.directional_count).toBe(2);
|
||||
expect(report.economic_sanity.direction_hit_rate).toBe(1);
|
||||
expect(report.economic_sanity.average_signed_return_bps).toBe(30);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue