add alpaca news wire across ingest api and web
This commit is contained in:
parent
62aae70878
commit
906fe411c9
31 changed files with 1407 additions and 50 deletions
|
|
@ -9,6 +9,7 @@ import {
|
|||
SUBJECT_EQUITY_QUOTES,
|
||||
SUBJECT_INFERRED_DARK,
|
||||
SUBJECT_FLOW_PACKETS,
|
||||
SUBJECT_NEWS,
|
||||
SUBJECT_SMART_MONEY_EVENTS,
|
||||
SUBJECT_OPTION_NBBO,
|
||||
SUBJECT_OPTION_SIGNAL_PRINTS,
|
||||
|
|
@ -20,6 +21,7 @@ import {
|
|||
STREAM_EQUITY_QUOTES,
|
||||
STREAM_INFERRED_DARK,
|
||||
STREAM_FLOW_PACKETS,
|
||||
STREAM_NEWS,
|
||||
STREAM_SMART_MONEY_EVENTS,
|
||||
STREAM_OPTION_NBBO,
|
||||
STREAM_OPTION_SIGNAL_PRINTS,
|
||||
|
|
@ -35,6 +37,7 @@ import {
|
|||
import {
|
||||
createClickHouseClient,
|
||||
ensureAlertsTable,
|
||||
ensureNewsTable,
|
||||
ensureClassifierHitsTable,
|
||||
ensureEquityCandlesTable,
|
||||
ensureEquityPrintJoinsTable,
|
||||
|
|
@ -48,6 +51,8 @@ import {
|
|||
fetchAlertsAfter,
|
||||
fetchAlertsBefore,
|
||||
fetchAlertContextByTraceId,
|
||||
fetchNewsAfter,
|
||||
fetchNewsBefore,
|
||||
fetchClassifierHitsAfter,
|
||||
fetchClassifierHitsBefore,
|
||||
fetchSmartMoneyEventsAfter,
|
||||
|
|
@ -58,6 +63,7 @@ import {
|
|||
fetchFlowPacketsByMemberTraceIds,
|
||||
fetchFlowPacketsBefore,
|
||||
fetchRecentAlerts,
|
||||
fetchRecentNews,
|
||||
fetchRecentClassifierHits,
|
||||
fetchRecentSmartMoneyEvents,
|
||||
fetchRecentEquityPrintJoins,
|
||||
|
|
@ -99,6 +105,7 @@ import {
|
|||
EquityQuoteSchema,
|
||||
FeedSnapshot,
|
||||
InferredDarkEventSchema,
|
||||
NewsStorySchema,
|
||||
LiveClientMessageSchema,
|
||||
LiveServerMessage,
|
||||
LiveSubscription,
|
||||
|
|
@ -676,7 +683,8 @@ const run = async () => {
|
|||
STREAM_FLOW_PACKETS,
|
||||
STREAM_SMART_MONEY_EVENTS,
|
||||
STREAM_CLASSIFIER_HITS,
|
||||
STREAM_ALERTS
|
||||
STREAM_ALERTS,
|
||||
STREAM_NEWS
|
||||
],
|
||||
{ logger }
|
||||
);
|
||||
|
|
@ -719,6 +727,7 @@ const run = async () => {
|
|||
await ensureSmartMoneyEventsTable(clickhouse);
|
||||
await ensureClassifierHitsTable(clickhouse);
|
||||
await ensureAlertsTable(clickhouse);
|
||||
await ensureNewsTable(clickhouse);
|
||||
});
|
||||
|
||||
let redis: ReturnType<typeof createClient> | null = null;
|
||||
|
|
@ -843,6 +852,11 @@ const run = async () => {
|
|||
subject: SUBJECT_ALERTS,
|
||||
stream: STREAM_ALERTS,
|
||||
durableName: "api-alerts"
|
||||
},
|
||||
{
|
||||
subject: SUBJECT_NEWS,
|
||||
stream: STREAM_NEWS,
|
||||
durableName: "api-news"
|
||||
}
|
||||
] as const;
|
||||
|
||||
|
|
@ -991,10 +1005,16 @@ const run = async () => {
|
|||
consumerBindings[10].durableName
|
||||
);
|
||||
|
||||
const newsSubscription = await subscribeWithReset(
|
||||
consumerBindings[11].subject,
|
||||
consumerBindings[11].stream,
|
||||
consumerBindings[11].durableName
|
||||
);
|
||||
|
||||
const fanoutLive = async (
|
||||
subscription: LiveSubscription,
|
||||
item: unknown,
|
||||
ingestChannel: "options" | "nbbo" | "equities" | "equity-quotes" | "equity-candles" | "equity-overlay" | "equity-joins" | "flow" | "classifier-hits" | "alerts" | "inferred-dark"
|
||||
ingestChannel: "options" | "nbbo" | "equities" | "equity-quotes" | "equity-candles" | "equity-overlay" | "equity-joins" | "flow" | "classifier-hits" | "alerts" | "inferred-dark" | "news"
|
||||
) => {
|
||||
const watermark = await liveState.ingest(ingestChannel, item);
|
||||
|
||||
|
|
@ -1252,6 +1272,21 @@ const run = async () => {
|
|||
}
|
||||
};
|
||||
|
||||
const pumpNews = async () => {
|
||||
for await (const msg of newsSubscription.messages) {
|
||||
try {
|
||||
const payload = NewsStorySchema.parse(newsSubscription.decode(msg));
|
||||
await fanoutLive({ channel: "news" }, payload, "news");
|
||||
msg.ack();
|
||||
} catch (error) {
|
||||
logger.error("failed to process news story", {
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
});
|
||||
msg.term();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void pumpOptions();
|
||||
void pumpOptionNbbo();
|
||||
void pumpEquities();
|
||||
|
|
@ -1263,6 +1298,7 @@ const run = async () => {
|
|||
void pumpSmartMoney();
|
||||
void pumpClassifierHits();
|
||||
void pumpAlerts();
|
||||
void pumpNews();
|
||||
|
||||
const buildSyntheticStatusBody = () => {
|
||||
const derived =
|
||||
|
|
@ -1490,6 +1526,12 @@ const run = async () => {
|
|||
return jsonResponse({ data });
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/news") {
|
||||
const limit = parseLimit(url.searchParams.get("limit") ?? "100");
|
||||
const data = await fetchRecentNews(clickhouse, limit);
|
||||
return jsonResponse({ data });
|
||||
}
|
||||
|
||||
if (req.method === "GET" && isAlertContextPath(url.pathname)) {
|
||||
try {
|
||||
const traceId = parseAlertContextTraceIdPath(url.pathname);
|
||||
|
|
@ -1607,6 +1649,14 @@ const run = async () => {
|
|||
);
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/history/news") {
|
||||
const { beforeTs, beforeSeq, limit } = parseBeforeParams(url);
|
||||
const data = await fetchNewsBefore(clickhouse, beforeTs, beforeSeq, limit);
|
||||
return jsonResponse(
|
||||
buildHistoryResponse(data, (item) => ({ ts: item.published_ts, seq: item.seq }))
|
||||
);
|
||||
}
|
||||
|
||||
if (req.method === "GET" && /^\/flow\/packets\/[^/]+$/.test(url.pathname)) {
|
||||
const id = decodeURIComponent(url.pathname.slice("/flow/packets/".length));
|
||||
const data = await fetchFlowPacketById(clickhouse, id);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import {
|
|||
fetchRecentEquityQuotes,
|
||||
fetchRecentFlowPackets,
|
||||
fetchRecentInferredDark,
|
||||
fetchRecentNews,
|
||||
fetchRecentOptionNBBO,
|
||||
fetchRecentSmartMoneyEvents,
|
||||
type ClickHouseClient
|
||||
|
|
@ -25,6 +26,7 @@ import {
|
|||
FeedSnapshot,
|
||||
FlowPacketSchema,
|
||||
InferredDarkEventSchema,
|
||||
NewsStorySchema,
|
||||
LiveChannelHealth,
|
||||
LiveGenericChannel,
|
||||
LiveHotChannel,
|
||||
|
|
@ -40,6 +42,7 @@ import {
|
|||
type EquityCandle,
|
||||
type EquityPrint,
|
||||
type LiveChannel,
|
||||
type NewsStory,
|
||||
type OptionPrint
|
||||
} from "@islandflow/types";
|
||||
import { createMetrics } from "@islandflow/observability";
|
||||
|
|
@ -63,7 +66,8 @@ const GENERIC_LIMIT_ENV_KEYS: Record<LiveGenericChannel, string> = {
|
|||
"smart-money": "LIVE_LIMIT_SMART_MONEY",
|
||||
"classifier-hits": "LIVE_LIMIT_CLASSIFIER_HITS",
|
||||
alerts: "LIVE_LIMIT_ALERTS",
|
||||
"inferred-dark": "LIVE_LIMIT_INFERRED_DARK"
|
||||
"inferred-dark": "LIVE_LIMIT_INFERRED_DARK",
|
||||
news: "LIVE_LIMIT_NEWS"
|
||||
};
|
||||
|
||||
const CHART_LIMITS = {
|
||||
|
|
@ -81,7 +85,8 @@ const DEFAULT_LIVE_LIMITS: GenericLiveLimits = {
|
|||
"smart-money": 300,
|
||||
"classifier-hits": 300,
|
||||
alerts: 300,
|
||||
"inferred-dark": 300
|
||||
"inferred-dark": 300,
|
||||
news: 100
|
||||
};
|
||||
|
||||
const DEFAULT_SCOPED_CACHE_MAX_KEYS = 32;
|
||||
|
|
@ -196,16 +201,28 @@ export const resolveGenericLiveLimits = (env: NodeJS.ProcessEnv = process.env):
|
|||
env,
|
||||
"inferred-dark",
|
||||
env.LIVE_LIMIT_DEFAULT ? liveLimitDefault : DEFAULT_LIVE_LIMITS["inferred-dark"]
|
||||
)
|
||||
),
|
||||
news: parseGenericLimit(env, "news", env.LIVE_LIMIT_DEFAULT ? liveLimitDefault : DEFAULT_LIVE_LIMITS.news)
|
||||
};
|
||||
};
|
||||
|
||||
const parsePositiveInt = (value: string | undefined, fallback: number): number => {
|
||||
const parsed = Number(value);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return fallback;
|
||||
const extractFreshnessTs = (channel: LiveGenericChannel, item: any): number | null => {
|
||||
switch (channel) {
|
||||
case "options":
|
||||
case "nbbo":
|
||||
case "equities":
|
||||
case "equity-quotes":
|
||||
return typeof item.ts === "number" ? item.ts : null;
|
||||
case "flow":
|
||||
case "classifier-hits":
|
||||
case "alerts":
|
||||
case "inferred-dark":
|
||||
return typeof item.source_ts === "number" ? item.source_ts : null;
|
||||
case "news":
|
||||
return typeof item.published_ts === "number" ? item.published_ts : null;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
return Math.max(1, Math.floor(parsed));
|
||||
};
|
||||
|
||||
export const resolveLiveStateConfig = (env: NodeJS.ProcessEnv = process.env): LiveStateConfig => ({
|
||||
|
|
@ -217,6 +234,13 @@ export const resolveLiveStateConfig = (env: NodeJS.ProcessEnv = process.env): Li
|
|||
),
|
||||
redisFlushMaxItems: parsePositiveInt(env.LIVE_REDIS_FLUSH_MAX_ITEMS, DEFAULT_REDIS_FLUSH_MAX_ITEMS)
|
||||
});
|
||||
const parsePositiveInt = (value: string | undefined, fallback: number): number => {
|
||||
const parsed = Number(value);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return fallback;
|
||||
}
|
||||
return Math.max(1, Math.floor(parsed));
|
||||
};
|
||||
|
||||
type RedisLike = Pick<
|
||||
RedisClientType,
|
||||
|
|
@ -318,6 +342,14 @@ const getGenericConfig = (limits: GenericLiveLimits): {
|
|||
parse: (value) => InferredDarkEventSchema.parse(value),
|
||||
cursor: (item) => ({ ts: item.source_ts, seq: item.seq }),
|
||||
fetchRecent: fetchRecentInferredDark
|
||||
},
|
||||
news: {
|
||||
redisKey: "live:news",
|
||||
cursorField: "news",
|
||||
limit: limits.news,
|
||||
parse: (value) => NewsStorySchema.parse(value),
|
||||
cursor: (item) => ({ ts: item.published_ts, seq: item.seq }),
|
||||
fetchRecent: fetchRecentNews
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -371,23 +403,6 @@ const normalizeGenericItems = <T>(
|
|||
return sortGenericItems(items, config.cursor).slice(0, config.limit);
|
||||
};
|
||||
|
||||
const extractFreshnessTs = (channel: LiveGenericChannel, item: any): number | null => {
|
||||
switch (channel) {
|
||||
case "options":
|
||||
case "nbbo":
|
||||
case "equities":
|
||||
case "equity-quotes":
|
||||
return typeof item.ts === "number" ? item.ts : null;
|
||||
case "flow":
|
||||
case "classifier-hits":
|
||||
case "alerts":
|
||||
case "inferred-dark":
|
||||
return typeof item.source_ts === "number" ? item.source_ts : null;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
const isWithinLiveFeedLookback = (
|
||||
channel: LiveGenericChannel,
|
||||
item: unknown,
|
||||
|
|
|
|||
16
services/ingest-news/package.json
Normal file
16
services/ingest-news/package.json
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"name": "@islandflow/ingest-news",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "bun run src/index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@islandflow/bus": "workspace:*",
|
||||
"@islandflow/config": "workspace:*",
|
||||
"@islandflow/observability": "workspace:*",
|
||||
"@islandflow/types": "workspace:*",
|
||||
"ws": "^8.18.3",
|
||||
"zod": "^3.23.8"
|
||||
}
|
||||
}
|
||||
216
services/ingest-news/src/index.ts
Normal file
216
services/ingest-news/src/index.ts
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
import { readEnv } from "@islandflow/config";
|
||||
import { createLogger } from "@islandflow/observability";
|
||||
import {
|
||||
SUBJECT_NEWS,
|
||||
STREAM_NEWS,
|
||||
connectJetStreamWithRetry,
|
||||
ensureKnownStreams,
|
||||
publishJson
|
||||
} from "@islandflow/bus";
|
||||
import { NewsStorySchema, type NewsStory } from "@islandflow/types";
|
||||
import WebSocket from "ws";
|
||||
import { z } from "zod";
|
||||
import { resolveNewsSymbols } from "./symbols";
|
||||
|
||||
const service = "ingest-news";
|
||||
const logger = createLogger({ service });
|
||||
|
||||
const envSchema = z.object({
|
||||
NATS_URL: z.string().default("nats://127.0.0.1:4222"),
|
||||
ALPACA_API_KEY: z.string().default(""),
|
||||
ALPACA_REST_URL: z.string().default("https://data.alpaca.markets"),
|
||||
ALPACA_WS_BASE_URL: z.string().default("wss://stream.data.alpaca.markets"),
|
||||
ALPACA_NEWS_BACKFILL_LIMIT: z.coerce.number().int().positive().max(200).default(100),
|
||||
ALPACA_NEWS_WEBSOCKET_PATH: z.string().default("/v1beta1/news")
|
||||
});
|
||||
|
||||
const env = readEnv(envSchema);
|
||||
|
||||
type AlpacaNewsItem = {
|
||||
id?: number;
|
||||
headline?: string;
|
||||
summary?: string;
|
||||
content?: string;
|
||||
author?: string;
|
||||
created_at?: string;
|
||||
updated_at?: string;
|
||||
url?: string;
|
||||
symbols?: string[];
|
||||
source?: string;
|
||||
};
|
||||
|
||||
type AlpacaNewsResponse = {
|
||||
news?: AlpacaNewsItem[];
|
||||
};
|
||||
|
||||
const buildHeaders = (): Record<string, string> => ({
|
||||
Authorization: `Bearer ${env.ALPACA_API_KEY}`
|
||||
});
|
||||
|
||||
const parseTimestamp = (value: string | undefined): number => {
|
||||
const parsed = value ? Date.parse(value) : Number.NaN;
|
||||
return Number.isFinite(parsed) ? parsed : Date.now();
|
||||
};
|
||||
|
||||
const toStory = (item: AlpacaNewsItem, seq: number): NewsStory | null => {
|
||||
const storyId = Number(item.id);
|
||||
if (!Number.isFinite(storyId) || storyId < 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const provider = "alpaca";
|
||||
const contentHtml = item.content ?? "";
|
||||
const symbols = resolveNewsSymbols(item.symbols ?? [], contentHtml);
|
||||
const publishedTs = parseTimestamp(item.created_at);
|
||||
const updatedTs = parseTimestamp(item.updated_at ?? item.created_at);
|
||||
|
||||
return NewsStorySchema.parse({
|
||||
source_ts: publishedTs,
|
||||
ingest_ts: Date.now(),
|
||||
seq,
|
||||
trace_id: `${provider}:${storyId}`,
|
||||
story_id: storyId,
|
||||
provider,
|
||||
source: item.source?.trim() || item.author?.trim() || "Alpaca News",
|
||||
headline: item.headline?.trim() || `Story ${storyId}`,
|
||||
summary: item.summary?.trim() || "",
|
||||
content_html: contentHtml,
|
||||
url: item.url?.trim() || "",
|
||||
published_ts: publishedTs,
|
||||
updated_ts: updatedTs,
|
||||
provider_symbols: symbols.provider_symbols,
|
||||
resolved_symbols: symbols.resolved_symbols,
|
||||
symbol_resolution: symbols.symbol_resolution
|
||||
});
|
||||
};
|
||||
|
||||
const fetchBackfill = async (): Promise<AlpacaNewsItem[]> => {
|
||||
const url = new URL("/v1beta1/news", env.ALPACA_REST_URL);
|
||||
url.searchParams.set("sort", "desc");
|
||||
url.searchParams.set("limit", env.ALPACA_NEWS_BACKFILL_LIMIT.toString());
|
||||
|
||||
const response = await fetch(url.toString(), {
|
||||
headers: buildHeaders()
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`alpaca news backfill failed (${response.status})`);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as AlpacaNewsResponse;
|
||||
return Array.isArray(payload.news) ? payload.news : [];
|
||||
};
|
||||
|
||||
const decodePayload = (data: WebSocket.RawData): unknown => {
|
||||
if (typeof data === "string") {
|
||||
return JSON.parse(data) as unknown;
|
||||
}
|
||||
if (data instanceof ArrayBuffer) {
|
||||
return JSON.parse(new TextDecoder().decode(new Uint8Array(data))) as unknown;
|
||||
}
|
||||
if (ArrayBuffer.isView(data)) {
|
||||
return JSON.parse(new TextDecoder().decode(new Uint8Array(data.buffer, data.byteOffset, data.byteLength))) as unknown;
|
||||
}
|
||||
return JSON.parse(new TextDecoder().decode(new Uint8Array(data as ArrayBuffer))) as unknown;
|
||||
};
|
||||
|
||||
const run = async () => {
|
||||
if (!env.ALPACA_API_KEY) {
|
||||
throw new Error("ALPACA_API_KEY is required for ingest-news.");
|
||||
}
|
||||
|
||||
const { nc, js, jsm } = await connectJetStreamWithRetry(
|
||||
{
|
||||
servers: env.NATS_URL,
|
||||
name: service
|
||||
},
|
||||
{ attempts: 120, delayMs: 500 }
|
||||
);
|
||||
|
||||
await ensureKnownStreams(jsm, [STREAM_NEWS], { logger });
|
||||
|
||||
let seq = 0;
|
||||
const publishStory = async (item: AlpacaNewsItem) => {
|
||||
seq += 1;
|
||||
const story = toStory(item, seq);
|
||||
if (!story) {
|
||||
return;
|
||||
}
|
||||
await publishJson(js, SUBJECT_NEWS, story);
|
||||
};
|
||||
|
||||
const backfill = await fetchBackfill();
|
||||
for (const item of backfill.reverse()) {
|
||||
await publishStory(item);
|
||||
}
|
||||
|
||||
const wsUrl = new URL(env.ALPACA_NEWS_WEBSOCKET_PATH, env.ALPACA_WS_BASE_URL).toString();
|
||||
const ws = new WebSocket(wsUrl, {
|
||||
headers: buildHeaders()
|
||||
});
|
||||
|
||||
ws.on("open", () => {
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
action: "auth",
|
||||
key: env.ALPACA_API_KEY,
|
||||
secret: ""
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
ws.on("message", (raw) => {
|
||||
let payload: unknown;
|
||||
try {
|
||||
payload = decodePayload(raw);
|
||||
} catch (error) {
|
||||
logger.warn("failed to decode alpaca news message", {
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Array.isArray(payload)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const entry of payload) {
|
||||
if (!entry || typeof entry !== "object") {
|
||||
continue;
|
||||
}
|
||||
const message = entry as Record<string, unknown>;
|
||||
if (message.T === "success") {
|
||||
const msg = typeof message.msg === "string" ? message.msg : "";
|
||||
if (msg === "authenticated") {
|
||||
ws.send(JSON.stringify({ action: "subscribe", news: ["*"] }));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (message.T === "subscription" || message.T === "error") {
|
||||
continue;
|
||||
}
|
||||
void publishStory(message as AlpacaNewsItem).catch((error) => {
|
||||
logger.error("failed to publish alpaca news story", {
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const shutdown = async (signal: string) => {
|
||||
logger.info("shutting down", { signal });
|
||||
ws.close();
|
||||
await nc.drain();
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
process.on("SIGINT", () => void shutdown("SIGINT"));
|
||||
process.on("SIGTERM", () => void shutdown("SIGTERM"));
|
||||
};
|
||||
|
||||
void run().catch((error) => {
|
||||
logger.error("service crashed", {
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
});
|
||||
process.exit(1);
|
||||
});
|
||||
70
services/ingest-news/src/symbols.ts
Normal file
70
services/ingest-news/src/symbols.ts
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import type { NewsSymbolResolution } from "@islandflow/types";
|
||||
|
||||
const TICKER_ANCHOR_RE = />\s*([A-Z]{1,5})\s*<\/a>/g;
|
||||
const EXCHANGE_TICKER_RE = /\b(?:NASDAQ|NYSE|NYSEAMERICAN|AMEX|OTC|CBOE):([A-Z]{1,5})\b/g;
|
||||
const DOLLAR_TICKER_RE = /\$([A-Z]{1,5})\b/g;
|
||||
|
||||
const normalizeSymbols = (symbols: string[]): string[] => {
|
||||
const seen = new Set<string>();
|
||||
const normalized: string[] = [];
|
||||
|
||||
for (const entry of symbols) {
|
||||
const symbol = entry.trim().toUpperCase();
|
||||
if (!symbol || !/^[A-Z]{1,5}$/.test(symbol) || seen.has(symbol)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(symbol);
|
||||
normalized.push(symbol);
|
||||
}
|
||||
|
||||
return normalized;
|
||||
};
|
||||
|
||||
const collectMatches = (value: string, regex: RegExp): string[] => {
|
||||
regex.lastIndex = 0;
|
||||
const matches: string[] = [];
|
||||
let match: RegExpExecArray | null = null;
|
||||
while ((match = regex.exec(value)) !== null) {
|
||||
matches.push(match[1] ?? "");
|
||||
}
|
||||
return matches;
|
||||
};
|
||||
|
||||
export const resolveNewsSymbols = (
|
||||
providerSymbols: string[],
|
||||
contentHtml: string
|
||||
): {
|
||||
provider_symbols: string[];
|
||||
resolved_symbols: string[];
|
||||
symbol_resolution: NewsSymbolResolution;
|
||||
} => {
|
||||
const normalizedProvider = normalizeSymbols(providerSymbols);
|
||||
const derived = normalizeSymbols([
|
||||
...collectMatches(contentHtml, TICKER_ANCHOR_RE),
|
||||
...collectMatches(contentHtml, EXCHANGE_TICKER_RE),
|
||||
...collectMatches(contentHtml, DOLLAR_TICKER_RE)
|
||||
]);
|
||||
|
||||
if (normalizedProvider.length > 0) {
|
||||
const merged = normalizeSymbols([...normalizedProvider, ...derived]);
|
||||
return {
|
||||
provider_symbols: normalizedProvider,
|
||||
resolved_symbols: merged,
|
||||
symbol_resolution: derived.length > 0 ? "mixed" : "provider"
|
||||
};
|
||||
}
|
||||
|
||||
if (derived.length > 0) {
|
||||
return {
|
||||
provider_symbols: [],
|
||||
resolved_symbols: derived,
|
||||
symbol_resolution: "derived"
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
provider_symbols: [],
|
||||
resolved_symbols: [],
|
||||
symbol_resolution: "none"
|
||||
};
|
||||
};
|
||||
30
services/ingest-news/tests/symbols.test.ts
Normal file
30
services/ingest-news/tests/symbols.test.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { describe, expect, it } from "bun:test";
|
||||
import { resolveNewsSymbols } from "../src/symbols";
|
||||
|
||||
describe("resolveNewsSymbols", () => {
|
||||
it("prefers provider symbols when present", () => {
|
||||
const result = resolveNewsSymbols(["tsla", "aapl"], "<p>No extra tickers here.</p>");
|
||||
expect(result.provider_symbols).toEqual(["TSLA", "AAPL"]);
|
||||
expect(result.resolved_symbols).toEqual(["TSLA", "AAPL"]);
|
||||
expect(result.symbol_resolution).toBe("provider");
|
||||
});
|
||||
|
||||
it("falls back to ticker anchors", () => {
|
||||
const result = resolveNewsSymbols([], '<a href="/quote/TSLA">TSLA</a>');
|
||||
expect(result.resolved_symbols).toEqual(["TSLA"]);
|
||||
expect(result.symbol_resolution).toBe("derived");
|
||||
});
|
||||
|
||||
it("falls back to exchange and dollar patterns", () => {
|
||||
const result = resolveNewsSymbols([], "<p>NASDAQ:TSLA met with $IBM executives.</p>");
|
||||
expect(result.resolved_symbols).toEqual(["TSLA", "IBM"]);
|
||||
expect(result.symbol_resolution).toBe("derived");
|
||||
});
|
||||
|
||||
it("dedupes and uppercases merged symbols", () => {
|
||||
const result = resolveNewsSymbols(["tsla"], "<p>$TSLA and NASDAQ:TSLA</p>");
|
||||
expect(result.provider_symbols).toEqual(["TSLA"]);
|
||||
expect(result.resolved_symbols).toEqual(["TSLA"]);
|
||||
expect(result.symbol_resolution).toBe("mixed");
|
||||
});
|
||||
});
|
||||
7
services/ingest-news/tsconfig.json
Normal file
7
services/ingest-news/tsconfig.json
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"types": []
|
||||
},
|
||||
"include": ["src/**/*.ts", "tests/**/*.ts"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue