From 93b9152345bda8fcd9d055d1927cb4d834f25c20 Mon Sep 17 00:00:00 2001 From: dirtydishes Date: Tue, 19 May 2026 20:02:35 -0400 Subject: [PATCH] persist news stories and request article content --- README.md | 2 +- services/api/src/index.ts | 4 +++- services/ingest-news/src/index.ts | 16 +++++++++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9456d1b..6b3b7fc 100644 --- a/README.md +++ b/README.md @@ -270,7 +270,7 @@ All runtime configuration comes from `.env`. | `ALPACA_MONEYNESS_FALLBACK_PCT` | `0.1` | Wider fallback moneyness filter if candidate set is too sparse. | | `ALPACA_MAX_QUOTES` | `200` | Upper bound on selected Alpaca options contracts/quotes per cycle. | | `ALPACA_EQUITIES_FEED` | `iex` | Alpaca equities feed: `iex` or `sip`. | -| `ALPACA_NEWS_BACKFILL_LIMIT` | `100` | Alpaca news stories fetched on startup, capped at 200. | +| `ALPACA_NEWS_BACKFILL_LIMIT` | `50` | Alpaca news stories fetched on startup, capped at 50 by the Alpaca News API. | | `ALPACA_NEWS_WEBSOCKET_PATH` | `/v1beta1/news` | Alpaca news websocket path. | ### Databento replay adapter configuration diff --git a/services/api/src/index.ts b/services/api/src/index.ts index f481626..562fb6b 100644 --- a/services/api/src/index.ts +++ b/services/api/src/index.ts @@ -92,7 +92,8 @@ import { fetchNearestOptionNBBOForPrints, fetchSmartMoneyEventsByPacketIds, fetchClassifierHitsByPacketIds, - fetchRecentOptionPrints + fetchRecentOptionPrints, + insertNewsStory } from "@islandflow/storage"; import type { EquityPrintQueryFilters } from "@islandflow/storage"; import { @@ -1277,6 +1278,7 @@ const run = async () => { for await (const msg of newsSubscription.messages) { try { const payload = NewsStorySchema.parse(newsSubscription.decode(msg)); + await insertNewsStory(clickhouse, payload); await fanoutLive({ channel: "news" }, payload, "news"); msg.ack(); } catch (error) { diff --git a/services/ingest-news/src/index.ts b/services/ingest-news/src/index.ts index c73cfe0..95cca42 100644 --- a/services/ingest-news/src/index.ts +++ b/services/ingest-news/src/index.ts @@ -30,13 +30,21 @@ const envSchema = z.object({ ALPACA_SECRET_KEY: z.string().default(""), ALPACA_REST_URL: z.string().default("https://data.alpaca.markets"), ALPACA_WS_BASE_URL: z.string().default("wss://stream.data.alpaca.markets"), - ALPACA_NEWS_BACKFILL_LIMIT: z.coerce.number().int().positive().max(200).default(100), + ALPACA_NEWS_BACKFILL_LIMIT: z.coerce.number().int().positive().max(50).default(50), ALPACA_NEWS_WEBSOCKET_PATH: z.string().default("/v1beta1/news") }); const env = readEnv(envSchema); const alpacaCredentials = resolveAlpacaCredentials(env); +const escapeHtml = (value: string): string => + value + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); + type AlpacaNewsItem = { id?: number; headline?: string; @@ -66,7 +74,8 @@ const toStory = (item: AlpacaNewsItem, seq: number): NewsStory | null => { } const provider = "alpaca"; - const contentHtml = item.content ?? ""; + const summary = item.summary?.trim() ?? ""; + const contentHtml = item.content?.trim() || (summary ? `

${escapeHtml(summary)}

` : ""); const symbols = resolveNewsSymbols(item.symbols ?? [], contentHtml); const publishedTs = parseTimestamp(item.created_at); const updatedTs = parseTimestamp(item.updated_at ?? item.created_at); @@ -80,7 +89,7 @@ const toStory = (item: AlpacaNewsItem, seq: number): NewsStory | null => { provider, source: item.source?.trim() || item.author?.trim() || "Alpaca News", headline: item.headline?.trim() || `Story ${storyId}`, - summary: item.summary?.trim() || "", + summary, content_html: contentHtml, url: item.url?.trim() || "", published_ts: publishedTs, @@ -95,6 +104,7 @@ const fetchBackfill = async (): Promise => { const url = new URL("/v1beta1/news", env.ALPACA_REST_URL); url.searchParams.set("sort", "desc"); url.searchParams.set("limit", env.ALPACA_NEWS_BACKFILL_LIMIT.toString()); + url.searchParams.set("include_content", "true"); const response = await fetch(url.toString(), { headers: buildAlpacaAuthHeaders(alpacaCredentials)