add alpaca news wire across ingest api and web

This commit is contained in:
dirtydishes 2026-05-18 16:55:31 -04:00
parent 62aae70878
commit 906fe411c9
31 changed files with 1407 additions and 50 deletions

View file

@ -0,0 +1,16 @@
{
"name": "@islandflow/ingest-news",
"private": true,
"type": "module",
"scripts": {
"dev": "bun run src/index.ts"
},
"dependencies": {
"@islandflow/bus": "workspace:*",
"@islandflow/config": "workspace:*",
"@islandflow/observability": "workspace:*",
"@islandflow/types": "workspace:*",
"ws": "^8.18.3",
"zod": "^3.23.8"
}
}

View file

@ -0,0 +1,216 @@
import { readEnv } from "@islandflow/config";
import { createLogger } from "@islandflow/observability";
import {
SUBJECT_NEWS,
STREAM_NEWS,
connectJetStreamWithRetry,
ensureKnownStreams,
publishJson
} from "@islandflow/bus";
import { NewsStorySchema, type NewsStory } from "@islandflow/types";
import WebSocket from "ws";
import { z } from "zod";
import { resolveNewsSymbols } from "./symbols";
const service = "ingest-news";
const logger = createLogger({ service });
const envSchema = z.object({
NATS_URL: z.string().default("nats://127.0.0.1:4222"),
ALPACA_API_KEY: z.string().default(""),
ALPACA_REST_URL: z.string().default("https://data.alpaca.markets"),
ALPACA_WS_BASE_URL: z.string().default("wss://stream.data.alpaca.markets"),
ALPACA_NEWS_BACKFILL_LIMIT: z.coerce.number().int().positive().max(200).default(100),
ALPACA_NEWS_WEBSOCKET_PATH: z.string().default("/v1beta1/news")
});
const env = readEnv(envSchema);
type AlpacaNewsItem = {
id?: number;
headline?: string;
summary?: string;
content?: string;
author?: string;
created_at?: string;
updated_at?: string;
url?: string;
symbols?: string[];
source?: string;
};
type AlpacaNewsResponse = {
news?: AlpacaNewsItem[];
};
const buildHeaders = (): Record<string, string> => ({
Authorization: `Bearer ${env.ALPACA_API_KEY}`
});
const parseTimestamp = (value: string | undefined): number => {
const parsed = value ? Date.parse(value) : Number.NaN;
return Number.isFinite(parsed) ? parsed : Date.now();
};
const toStory = (item: AlpacaNewsItem, seq: number): NewsStory | null => {
const storyId = Number(item.id);
if (!Number.isFinite(storyId) || storyId < 0) {
return null;
}
const provider = "alpaca";
const contentHtml = item.content ?? "";
const symbols = resolveNewsSymbols(item.symbols ?? [], contentHtml);
const publishedTs = parseTimestamp(item.created_at);
const updatedTs = parseTimestamp(item.updated_at ?? item.created_at);
return NewsStorySchema.parse({
source_ts: publishedTs,
ingest_ts: Date.now(),
seq,
trace_id: `${provider}:${storyId}`,
story_id: storyId,
provider,
source: item.source?.trim() || item.author?.trim() || "Alpaca News",
headline: item.headline?.trim() || `Story ${storyId}`,
summary: item.summary?.trim() || "",
content_html: contentHtml,
url: item.url?.trim() || "",
published_ts: publishedTs,
updated_ts: updatedTs,
provider_symbols: symbols.provider_symbols,
resolved_symbols: symbols.resolved_symbols,
symbol_resolution: symbols.symbol_resolution
});
};
const fetchBackfill = async (): Promise<AlpacaNewsItem[]> => {
const url = new URL("/v1beta1/news", env.ALPACA_REST_URL);
url.searchParams.set("sort", "desc");
url.searchParams.set("limit", env.ALPACA_NEWS_BACKFILL_LIMIT.toString());
const response = await fetch(url.toString(), {
headers: buildHeaders()
});
if (!response.ok) {
throw new Error(`alpaca news backfill failed (${response.status})`);
}
const payload = (await response.json()) as AlpacaNewsResponse;
return Array.isArray(payload.news) ? payload.news : [];
};
const decodePayload = (data: WebSocket.RawData): unknown => {
if (typeof data === "string") {
return JSON.parse(data) as unknown;
}
if (data instanceof ArrayBuffer) {
return JSON.parse(new TextDecoder().decode(new Uint8Array(data))) as unknown;
}
if (ArrayBuffer.isView(data)) {
return JSON.parse(new TextDecoder().decode(new Uint8Array(data.buffer, data.byteOffset, data.byteLength))) as unknown;
}
return JSON.parse(new TextDecoder().decode(new Uint8Array(data as ArrayBuffer))) as unknown;
};
const run = async () => {
if (!env.ALPACA_API_KEY) {
throw new Error("ALPACA_API_KEY is required for ingest-news.");
}
const { nc, js, jsm } = await connectJetStreamWithRetry(
{
servers: env.NATS_URL,
name: service
},
{ attempts: 120, delayMs: 500 }
);
await ensureKnownStreams(jsm, [STREAM_NEWS], { logger });
let seq = 0;
const publishStory = async (item: AlpacaNewsItem) => {
seq += 1;
const story = toStory(item, seq);
if (!story) {
return;
}
await publishJson(js, SUBJECT_NEWS, story);
};
const backfill = await fetchBackfill();
for (const item of backfill.reverse()) {
await publishStory(item);
}
const wsUrl = new URL(env.ALPACA_NEWS_WEBSOCKET_PATH, env.ALPACA_WS_BASE_URL).toString();
const ws = new WebSocket(wsUrl, {
headers: buildHeaders()
});
ws.on("open", () => {
ws.send(
JSON.stringify({
action: "auth",
key: env.ALPACA_API_KEY,
secret: ""
})
);
});
ws.on("message", (raw) => {
let payload: unknown;
try {
payload = decodePayload(raw);
} catch (error) {
logger.warn("failed to decode alpaca news message", {
error: error instanceof Error ? error.message : String(error)
});
return;
}
if (!Array.isArray(payload)) {
return;
}
for (const entry of payload) {
if (!entry || typeof entry !== "object") {
continue;
}
const message = entry as Record<string, unknown>;
if (message.T === "success") {
const msg = typeof message.msg === "string" ? message.msg : "";
if (msg === "authenticated") {
ws.send(JSON.stringify({ action: "subscribe", news: ["*"] }));
}
continue;
}
if (message.T === "subscription" || message.T === "error") {
continue;
}
void publishStory(message as AlpacaNewsItem).catch((error) => {
logger.error("failed to publish alpaca news story", {
error: error instanceof Error ? error.message : String(error)
});
});
}
});
const shutdown = async (signal: string) => {
logger.info("shutting down", { signal });
ws.close();
await nc.drain();
process.exit(0);
};
process.on("SIGINT", () => void shutdown("SIGINT"));
process.on("SIGTERM", () => void shutdown("SIGTERM"));
};
void run().catch((error) => {
logger.error("service crashed", {
error: error instanceof Error ? error.message : String(error)
});
process.exit(1);
});

View file

@ -0,0 +1,70 @@
import type { NewsSymbolResolution } from "@islandflow/types";
const TICKER_ANCHOR_RE = />\s*([A-Z]{1,5})\s*<\/a>/g;
const EXCHANGE_TICKER_RE = /\b(?:NASDAQ|NYSE|NYSEAMERICAN|AMEX|OTC|CBOE):([A-Z]{1,5})\b/g;
const DOLLAR_TICKER_RE = /\$([A-Z]{1,5})\b/g;
const normalizeSymbols = (symbols: string[]): string[] => {
const seen = new Set<string>();
const normalized: string[] = [];
for (const entry of symbols) {
const symbol = entry.trim().toUpperCase();
if (!symbol || !/^[A-Z]{1,5}$/.test(symbol) || seen.has(symbol)) {
continue;
}
seen.add(symbol);
normalized.push(symbol);
}
return normalized;
};
const collectMatches = (value: string, regex: RegExp): string[] => {
regex.lastIndex = 0;
const matches: string[] = [];
let match: RegExpExecArray | null = null;
while ((match = regex.exec(value)) !== null) {
matches.push(match[1] ?? "");
}
return matches;
};
export const resolveNewsSymbols = (
providerSymbols: string[],
contentHtml: string
): {
provider_symbols: string[];
resolved_symbols: string[];
symbol_resolution: NewsSymbolResolution;
} => {
const normalizedProvider = normalizeSymbols(providerSymbols);
const derived = normalizeSymbols([
...collectMatches(contentHtml, TICKER_ANCHOR_RE),
...collectMatches(contentHtml, EXCHANGE_TICKER_RE),
...collectMatches(contentHtml, DOLLAR_TICKER_RE)
]);
if (normalizedProvider.length > 0) {
const merged = normalizeSymbols([...normalizedProvider, ...derived]);
return {
provider_symbols: normalizedProvider,
resolved_symbols: merged,
symbol_resolution: derived.length > 0 ? "mixed" : "provider"
};
}
if (derived.length > 0) {
return {
provider_symbols: [],
resolved_symbols: derived,
symbol_resolution: "derived"
};
}
return {
provider_symbols: [],
resolved_symbols: [],
symbol_resolution: "none"
};
};

View file

@ -0,0 +1,30 @@
import { describe, expect, it } from "bun:test";
import { resolveNewsSymbols } from "../src/symbols";
describe("resolveNewsSymbols", () => {
it("prefers provider symbols when present", () => {
const result = resolveNewsSymbols(["tsla", "aapl"], "<p>No extra tickers here.</p>");
expect(result.provider_symbols).toEqual(["TSLA", "AAPL"]);
expect(result.resolved_symbols).toEqual(["TSLA", "AAPL"]);
expect(result.symbol_resolution).toBe("provider");
});
it("falls back to ticker anchors", () => {
const result = resolveNewsSymbols([], '<a href="/quote/TSLA">TSLA</a>');
expect(result.resolved_symbols).toEqual(["TSLA"]);
expect(result.symbol_resolution).toBe("derived");
});
it("falls back to exchange and dollar patterns", () => {
const result = resolveNewsSymbols([], "<p>NASDAQ:TSLA met with $IBM executives.</p>");
expect(result.resolved_symbols).toEqual(["TSLA", "IBM"]);
expect(result.symbol_resolution).toBe("derived");
});
it("dedupes and uppercases merged symbols", () => {
const result = resolveNewsSymbols(["tsla"], "<p>$TSLA and NASDAQ:TSLA</p>");
expect(result.provider_symbols).toEqual(["TSLA"]);
expect(result.resolved_symbols).toEqual(["TSLA"]);
expect(result.symbol_resolution).toBe("mixed");
});
});

View file

@ -0,0 +1,7 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"types": []
},
"include": ["src/**/*.ts", "tests/**/*.ts"]
}