Implement native public edge cutover
This commit is contained in:
parent
d589858c03
commit
bdb9d9a95a
29 changed files with 1215 additions and 31 deletions
|
|
@ -1,4 +1,4 @@
|
|||
{"_type":"issue","id":"islandflow-9rc","title":"Implement native fast iterative deploy plan","description":"Implement the checked-in plan at plans/2026-05-18-native-fast-iterative-deploy-plan.md. Cover deploy-phase timing instrumentation, native deployment operational assets, deploy guardrails, validation/cutover documentation, and any required live VPS remediation that is safely actionable from this session. Track follow-up items separately if anything cannot be completed in-repo or on the live host.","status":"in_progress","priority":1,"issue_type":"feature","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-18T07:15:19Z","created_by":"dirtydishes","updated_at":"2026-05-18T07:15:25Z","started_at":"2026-05-18T07:15:25Z","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-9rc","title":"Implement native fast iterative deploy plan","description":"Implement the checked-in plan at plans/2026-05-18-native-fast-iterative-deploy-plan.md. Cover deploy-phase timing instrumentation, native deployment operational assets, deploy guardrails, validation/cutover documentation, and any required live VPS remediation that is safely actionable from this session. Track follow-up items separately if anything cannot be completed in-repo or on the live host.","status":"closed","priority":1,"issue_type":"feature","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-18T07:15:19Z","created_by":"dirtydishes","updated_at":"2026-05-18T07:34:03Z","started_at":"2026-05-18T07:15:25Z","closed_at":"2026-05-18T07:34:03Z","close_reason":"Implemented the native fast iterative deploy plan with deploy timing summaries, worker-only native fast mode, edge-cutover guardrails, local-on-server execution support, checked-in native ops assets, live audit findings, and turn documentation. Remaining cutover work is tracked in islandflow-vvw.","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-8kj","title":"Configure persistent beads Dolt remote on deltaisland server","description":"Install the beads and Dolt CLIs on the server, configure a persistent Dolt sync remote backed by the server-hosted Forgejo repository, verify refs/dolt/data publication, and document Nginx Proxy Manager / firewall considerations.","status":"closed","priority":1,"issue_type":"task","assignee":"delta","created_at":"2026-05-17T10:31:31Z","created_by":"delta","updated_at":"2026-05-17T10:37:47Z","started_at":"2026-05-17T10:32:16Z","closed_at":"2026-05-17T10:37:47Z","close_reason":"Installed bd and dolt on the server, configured the Forgejo-backed Dolt remote, published refs/dolt/data, and documented the setup.","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-200","title":"Implement durable options tape history","description":"Implement the plan from docs/plans/2026-05-16-1711-durable-options-tape-history.html: durable ClickHouse-backed options history, signal/all prints view selection, preserved execution context, stale semantics limited to live health, reset runbook, tests, and turn documentation.","status":"closed","priority":1,"issue_type":"feature","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-16T21:21:30Z","created_by":"dirtydishes","updated_at":"2026-05-16T21:26:51Z","started_at":"2026-05-16T21:21:33Z","closed_at":"2026-05-16T21:26:51Z","close_reason":"Implemented durable options tape history, signal/raw view selection, reset runbook, tests, and turn documentation.","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-k4f","title":"Gate deploy script on docker workspace snapshot sync","description":"Prevent frozen-lockfile build failures during deploy by adding a local preflight in scripts/deploy.ts that runs bun run check:docker-workspace and aborts with a clear sync+commit remediation message when stale.","status":"closed","priority":1,"issue_type":"task","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-15T23:01:44Z","created_by":"dirtydishes","updated_at":"2026-05-15T23:04:11Z","started_at":"2026-05-15T23:01:48Z","closed_at":"2026-05-15T23:04:11Z","close_reason":"Closed","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
|
|
@ -13,7 +13,8 @@
|
|||
{"_type":"issue","id":"islandflow-ayo","title":"Drop stale backlog events from live fanout","description":"Follow-up to live freshness rollout: /ws/live was still fanning out stale backlog events for freshness-gated channels, which kept tape panes in Live feed behind despite active synthetic ingest. Gate fanout and cache ingest by freshness for options/nbbo/equities/flow.","status":"closed","priority":1,"issue_type":"bug","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-04-28T21:26:39Z","created_by":"dirtydishes","updated_at":"2026-04-28T21:26:44Z","started_at":"2026-04-28T21:26:44Z","closed_at":"2026-04-28T21:26:44Z","close_reason":"Completed","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-0v6","title":"Fix tape freshness, NBBO coverage, pause controls, and filter popup","description":"Implement the tape fixes requested for synthetic options notional sizing, strict live freshness, live-mode pause/resume behavior, stronger NBBO snapshot coverage, and moving flow filters behind a popup. Includes server-side live cache changes, web terminal state/UI changes, and tests for synthetic pricing, live snapshot freshness/NBBO retention, and live pause/filter interactions.","status":"closed","priority":1,"issue_type":"task","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-04-28T21:02:52Z","created_by":"dirtydishes","updated_at":"2026-04-28T21:13:38Z","started_at":"2026-04-28T21:02:57Z","closed_at":"2026-04-28T21:13:38Z","close_reason":"Completed","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-e4r","title":"Implement smart-money flow filtering and synthetic firehose modes","description":"Implement the approved multi-surface plan for named synthetic market profiles, options raw-vs-signal filtering, live/API filter contracts, Tape page client-side flow filters, firehose-readiness improvements, tests, and README updates.","status":"closed","priority":1,"issue_type":"feature","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-04-28T20:10:49Z","created_by":"dirtydishes","updated_at":"2026-04-28T20:29:29Z","started_at":"2026-04-28T20:10:53Z","closed_at":"2026-04-28T20:29:29Z","close_reason":"Implemented synthetic market profiles, options signal-path filtering, signal-aware API/replay contracts, Tape page filters, tests, and README updates. Follow-up tracked in islandflow-biq.","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-vvw","title":"Stage native public-edge cutover after worker soak","description":"Why this issue exists and what needs to be done:\\n- The native deploy path is now provisioned for worker-first iteration, with checked-in user units, rollback helpers, and edge guardrails\\n- Remaining work is to enable and soak native worker units, validate duplicate-processing behavior, then deliberately cut over the public web/api edge if warranted\\n- Final acceptance should include deciding whether Docker or native becomes the default runtime after operational evidence","status":"open","priority":2,"issue_type":"task","owner":"dishes@dpdrm.com","created_at":"2026-05-18T07:32:35Z","created_by":"dirtydishes","updated_at":"2026-05-18T07:32:35Z","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-fl5","title":"Decide final public posture for api.flow.deltaisland.io after native cutover","description":"Why this issue exists and what needs to be done:\\n- Native cutover now works end-to-end through Nginx Proxy Manager and the public API hostname now resolves directly to the VPS\\n- The API hostname was left DNS-only in Cloudflare during incident resolution, while the web hostname still uses the Cloudflare proxy\\n- We need to decide whether api.flow.deltaisland.io should remain direct-to-origin or be re-proxied through Cloudflare, then validate TLS, websocket, and operational behavior for the chosen posture","status":"open","priority":2,"issue_type":"task","owner":"dishes@dpdrm.com","created_at":"2026-05-18T23:51:21Z","created_by":"dirtydishes","updated_at":"2026-05-18T23:51:21Z","dependencies":[{"issue_id":"islandflow-fl5","depends_on_id":"islandflow-vvw","type":"discovered-from","created_at":"2026-05-18T19:52:32Z","created_by":"dirtydishes","metadata":"{}"}],"dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-vvw","title":"Stage native public-edge cutover after worker soak","description":"Why this issue exists and what needs to be done:\\n- The native deploy path is now provisioned for worker-first iteration, with checked-in user units, rollback helpers, and edge guardrails\\n- Remaining work is to enable and soak native worker units, validate duplicate-processing behavior, then deliberately cut over the public web/api edge if warranted\\n- Final acceptance should include deciding whether Docker or native becomes the default runtime after operational evidence","notes":"2026-05-18: native infra, native app services, NPM public-edge retargeting, Docker rollback helpers, and Cloudflare/DNS API hostname recovery were implemented and verified. Public checks now pass for flow.deltaisland.io and api.flow.deltaisland.io. Remaining follow-up: decide whether api.flow.deltaisland.io should remain DNS-only or be re-proxied through Cloudflare under islandflow-fl5.","status":"in_progress","priority":2,"issue_type":"task","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-18T07:32:35Z","created_by":"dirtydishes","updated_at":"2026-05-18T23:52:32Z","started_at":"2026-05-18T23:51:20Z","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-bsg","title":"Fix public /replay/options proxy regression","description":"Restore correct public routing for GET /replay/options on flow.deltaisland.io. The app currently serves HTML for that API path, which indicates edge/proxy routing drift. Update the live proxy topology or deployment assets as needed, then validate with bun run scripts/check-public-api-routes.ts.","status":"closed","priority":2,"issue_type":"bug","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-18T07:15:19Z","created_by":"dirtydishes","updated_at":"2026-05-18T07:32:51Z","started_at":"2026-05-18T07:15:24Z","closed_at":"2026-05-18T07:32:51Z","close_reason":"Audited the live VPS and reverse proxy on 2026-05-18: public /replay/options now returns JSON, bun run scripts/check-public-api-routes.ts passes, and the active Nginx Proxy Manager config includes /replay in the API route matcher. No in-repo app code change was required.","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-9j5","title":"Prepare PR for deploy allowlist cleanup","description":"Why this issue exists and what needs to be done:\\n- Package current deploy allowlist cleanup into a reviewable PR with multiple commits\\n- Add required turn documentation in docs/turns\\n- Run validation and push all artifacts","status":"closed","priority":2,"issue_type":"task","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-17T15:44:12Z","created_by":"dirtydishes","updated_at":"2026-05-17T15:53:55Z","started_at":"2026-05-17T15:44:22Z","closed_at":"2026-05-17T15:53:55Z","close_reason":"Packaged deploy allowlist cleanup into multi-commit PR branch with required turn documentation and push workflow.","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
{"_type":"issue","id":"islandflow-0sa","title":"Fix live tape auto-hold, history seam, and remove manual pause control","description":"The live tape should automatically hold when the user scrolls away from the top, resume when they return to the top or use Jump to top, and keep older prints available seamlessly beyond the hot window. Manual Pause/Resume control is now redundant and should be removed from live tape panes. This work should also fix the current regression where paused/held tapes still mutate, and align the options tape with a strict 100-row hot head backed by ClickHouse history.","notes":"Implemented live scroll-hold with no live pause button, demand-loaded ClickHouse history, a 100-row options hot head, and cache-first scoped snapshots. Validated with bun test apps/web/app/terminal.test.ts services/api/tests/live.test.ts and bun --cwd=apps/web run build.","status":"closed","priority":2,"issue_type":"bug","assignee":"dirtydishes","owner":"dishes@dpdrm.com","created_at":"2026-05-16T18:12:51Z","created_by":"dirtydishes","updated_at":"2026-05-16T18:23:43Z","started_at":"2026-05-16T18:12:54Z","closed_at":"2026-05-16T18:23:43Z","close_reason":"Closed","dependency_count":0,"dependent_count":0,"comment_count":0}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
"scripts": {
|
||||
"dev": "bun run scripts/dev.ts",
|
||||
"build": "next build",
|
||||
"start": "next start -p 3000"
|
||||
"start": "next start"
|
||||
},
|
||||
"dependencies": {
|
||||
"@islandflow/types": "workspace:*",
|
||||
|
|
|
|||
23
deployment/docker/.dockerignore
Normal file
23
deployment/docker/.dockerignore
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
.git
|
||||
.github
|
||||
.DS_Store
|
||||
.bun
|
||||
.tmp
|
||||
node_modules
|
||||
dist
|
||||
coverage
|
||||
logs
|
||||
apps/web/.next
|
||||
.env
|
||||
.env.*
|
||||
session-ses_*.md
|
||||
token-usage-output.txt
|
||||
signal-cli-*.tar.gz
|
||||
*.tar
|
||||
*.tar.gz
|
||||
*.tgz
|
||||
*.zip
|
||||
__pycache__
|
||||
.pytest_cache
|
||||
!.env.example
|
||||
!**/.env.example
|
||||
|
|
@ -4,8 +4,10 @@ NATS_URL=nats://nats:4222
|
|||
CLICKHOUSE_URL=http://clickhouse:8123
|
||||
CLICKHOUSE_DATABASE=default
|
||||
REDIS_URL=redis://redis:6379
|
||||
ISLANDFLOW_DATA_ROOT=/var/lib/islandflow
|
||||
|
||||
API_PORT=4000
|
||||
API_HOST=0.0.0.0
|
||||
API_BIND_IP=127.0.0.1
|
||||
API_HOST_PORT=4000
|
||||
WEB_BIND_IP=127.0.0.1
|
||||
|
|
|
|||
|
|
@ -59,4 +59,4 @@ COPY --from=build /app/packages ./packages
|
|||
|
||||
EXPOSE 3000
|
||||
|
||||
CMD ["bun", "run", "--cwd", "apps/web", "start"]
|
||||
CMD ["bun", "run", "--cwd", "apps/web", "start", "--", "-H", "0.0.0.0", "-p", "3000"]
|
||||
|
|
|
|||
|
|
@ -2,12 +2,12 @@
|
|||
|
||||
This directory contains the Docker runtime for Islandflow VPS deployments.
|
||||
|
||||
Docker remains the default and recommended server rollout path, but the repo-root `deploy` helper can now target either:
|
||||
Docker remains the default rollout path before native cutover and the rollback path after cutover. The repo-root `deploy` helper can target either:
|
||||
|
||||
- `--runtime docker` for this Docker Compose stack
|
||||
- `--runtime native` for an experimental host-native Bun + systemd rollout described in `deployment/native/README.md`
|
||||
- `--runtime native` for the host-native Bun + systemd rollout described in `deployment/native/README.md`
|
||||
|
||||
The repo no longer ships or supports a separate `deployment/npm` stack. If you want a reverse proxy, point it at the host ports published by this stack.
|
||||
The public VPS edge remains Nginx Proxy Manager. Docker fallback can be reached either through the shared Docker network service names or the host ports published by this stack.
|
||||
|
||||
It is separate from the repo-root `docker-compose.yml`, which remains the lightweight local infra stack for development.
|
||||
|
||||
|
|
@ -17,7 +17,7 @@ Do not run the repo-root `docker-compose.yml` on the VPS. On the live server tha
|
|||
|
||||
- Builds and runs the full Islandflow stack with Docker Compose.
|
||||
- Publishes `web` and `api` to host ports, bound to loopback by default.
|
||||
- Runs ClickHouse, Redis, and NATS JetStream with persistent Docker volumes.
|
||||
- Runs ClickHouse, Redis, and NATS JetStream with persistent host data under `ISLANDFLOW_DATA_ROOT`.
|
||||
- Runs the core runtime services: `ingest-options`, `ingest-equities`, `compute`, `candles`, `api`, and `web`.
|
||||
- Keeps `replay` opt-in through a Compose profile, because the current replay service starts immediately when the container is enabled.
|
||||
|
||||
|
|
@ -56,6 +56,7 @@ cp .env.example .env
|
|||
Important defaults:
|
||||
|
||||
- `NATS_URL`, `CLICKHOUSE_URL`, and `REDIS_URL` should stay on the internal container hostnames unless you intentionally split infra out.
|
||||
- `ISLANDFLOW_DATA_ROOT=/var/lib/islandflow` matches the native infra data root used by the VPS cutover helpers.
|
||||
- `OPTIONS_INGEST_ADAPTER=synthetic` and `EQUITIES_INGEST_ADAPTER=synthetic` are the safest first-boot settings.
|
||||
- `WEB_BIND_IP=127.0.0.1` and `API_BIND_IP=127.0.0.1` keep the published ports local to the host by default.
|
||||
- `WEB_HOST_PORT=3000` and `API_HOST_PORT=4000` control the host-side published ports.
|
||||
|
|
@ -213,7 +214,7 @@ BuildKit cache mounts require a modern Docker Engine with Dockerfile frontend su
|
|||
|
||||
## Safe rollouts on `152.53.80.229`
|
||||
|
||||
The current live VPS uses Nginx Proxy Manager on the shared Docker network and routes public traffic to the Docker `web` and `api` containers by container name. Because of that, this Docker path remains the operationally correct default for the live server today.
|
||||
The current live VPS uses Nginx Proxy Manager as the outer edge. Before native cutover, NPM routes Islandflow traffic to Docker service names. During cutover, `deployment/native/switch-npm-edge.sh native` retargets only the Islandflow proxy hosts to the NPM bridge gateway IP so NPM can reach native host ports. If needed, override the detected target with `ISLANDFLOW_NATIVE_HOST=<host-ip>`.
|
||||
|
||||
The deploy helper also warns if it detects a second compose project named `islandflow` on the server, because that usually means the repo-root local-infra stack was started on the VPS by mistake.
|
||||
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@ services:
|
|||
init: true
|
||||
expose:
|
||||
- "3000"
|
||||
ports:
|
||||
- "${WEB_BIND_IP:-127.0.0.1}:${WEB_HOST_PORT:-3000}:3000"
|
||||
networks:
|
||||
- default
|
||||
- shared
|
||||
|
|
@ -64,8 +66,13 @@ services:
|
|||
api:
|
||||
<<: *service-common
|
||||
command: ["services/api/src/index.ts"]
|
||||
environment:
|
||||
LOG_LEVEL: ${LOG_LEVEL:-warn}
|
||||
API_HOST: 0.0.0.0
|
||||
expose:
|
||||
- "4000"
|
||||
ports:
|
||||
- "${API_BIND_IP:-127.0.0.1}:${API_HOST_PORT:-4000}:4000"
|
||||
networks:
|
||||
- default
|
||||
- shared
|
||||
|
|
@ -128,7 +135,7 @@ services:
|
|||
soft: 262144
|
||||
hard: 262144
|
||||
volumes:
|
||||
- clickhouse-data:/var/lib/clickhouse
|
||||
- ${ISLANDFLOW_DATA_ROOT:-/var/lib/islandflow}/clickhouse:/var/lib/clickhouse
|
||||
- ./clickhouse/listen.xml:/etc/clickhouse-server/config.d/listen.xml:ro
|
||||
healthcheck:
|
||||
test:
|
||||
|
|
@ -146,7 +153,7 @@ services:
|
|||
restart: unless-stopped
|
||||
command: ["redis-server", "--appendonly", "yes"]
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
- ${ISLANDFLOW_DATA_ROOT:-/var/lib/islandflow}/redis:/data
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
|
|
@ -164,14 +171,9 @@ services:
|
|||
restart: unless-stopped
|
||||
command: ["-js", "-sd", "/data"]
|
||||
volumes:
|
||||
- nats-data:/data
|
||||
- ${ISLANDFLOW_DATA_ROOT:-/var/lib/islandflow}/nats:/data
|
||||
|
||||
networks:
|
||||
shared:
|
||||
external: true
|
||||
name: ${NPM_SHARED_NETWORK:-npm-shared}
|
||||
|
||||
volumes:
|
||||
clickhouse-data:
|
||||
redis-data:
|
||||
nats-data:
|
||||
|
|
|
|||
|
|
@ -9,12 +9,14 @@ This directory documents the host-native Islandflow rollout path used by:
|
|||
|
||||
## Current operating model
|
||||
|
||||
Native runtime is now intended for **fast iterative backend deploys first**, while Docker remains the supported public production edge until a deliberate cutover is completed.
|
||||
Native runtime is now intended for a phased VPS cutover. Docker remains the supported rollback runtime, but Docker and native app services must not own the same Islandflow scope at the same time because the workers and API use durable JetStream consumers.
|
||||
|
||||
Today, the recommended split is:
|
||||
|
||||
- **Docker runtime** for the live public `web` + `api` path
|
||||
- **Native runtime** for worker-only iteration (`compute`, `candles`, `ingest-options`, `ingest-equities`)
|
||||
- **Nginx Proxy Manager** remains the public `:80/:443` edge
|
||||
- **Native system services** own NATS, Redis, and ClickHouse after infra cutover
|
||||
- **Native user services** own `web`, `api`, and workers after app cutover
|
||||
- **Docker Compose** remains available as the rollback runtime
|
||||
- local development stays:
|
||||
- Docker infra: `bun run dev:infra`
|
||||
- native backend services: `bun run dev:services`
|
||||
|
|
@ -47,6 +49,38 @@ That means native worker deploy support is now provisioned on the host, but nati
|
|||
|
||||
## Checked-in native ops assets
|
||||
|
||||
### Infra system units
|
||||
|
||||
Checked-in system service units and config live under:
|
||||
|
||||
- `deployment/native/systemd/system/islandflow-nats.service`
|
||||
- `deployment/native/systemd/system/islandflow-redis.service`
|
||||
- `deployment/native/systemd/system/islandflow-clickhouse.service`
|
||||
- `deployment/native/config/redis.conf`
|
||||
- `deployment/native/config/clickhouse-listen.xml`
|
||||
|
||||
Install and start them on the VPS with:
|
||||
|
||||
```bash
|
||||
./deployment/native/bootstrap-infra.sh
|
||||
```
|
||||
|
||||
Or install and start manually:
|
||||
|
||||
```bash
|
||||
sudo ./deployment/native/install-infra-units.sh
|
||||
sudo ./deployment/native/start-infra.sh
|
||||
./deployment/native/check-native-infra.sh
|
||||
```
|
||||
|
||||
The native infra services bind to loopback and use stable host data paths:
|
||||
|
||||
- NATS JetStream: `/var/lib/islandflow/nats`
|
||||
- Redis: `/var/lib/islandflow/redis`
|
||||
- ClickHouse: `/var/lib/islandflow/clickhouse`
|
||||
|
||||
The Docker fallback compose file uses the same `ISLANDFLOW_DATA_ROOT` default of `/var/lib/islandflow`, so rollback can preserve durable state when only one runtime is active.
|
||||
|
||||
### User unit templates
|
||||
|
||||
Checked-in unit files live under:
|
||||
|
|
@ -89,10 +123,29 @@ Install script behavior:
|
|||
|
||||
This validates:
|
||||
|
||||
- native infra health for `full`, `api`, `services`, and `workers`
|
||||
- `systemctl --user is-active` for the selected units
|
||||
- local API health at `http://127.0.0.1:4000/health` when API scope is included
|
||||
- local web health at `http://127.0.0.1:3000/` when web scope is included
|
||||
|
||||
### App cutover and edge switch helpers
|
||||
|
||||
```bash
|
||||
./deployment/native/cutover.sh full
|
||||
./deployment/native/switch-npm-edge.sh native
|
||||
./deployment/native/full-rollback.sh
|
||||
```
|
||||
|
||||
The edge switch helper updates the Nginx Proxy Manager database entries for `flow.deltaisland.io` and `api.flow.deltaisland.io`, preserving the same-origin Islandflow API location matcher:
|
||||
|
||||
```nginx
|
||||
^/(ws|replay|prints|joins|nbbo|dark|flow|candles|history)/
|
||||
```
|
||||
|
||||
For native cutover, the helper targets the NPM bridge gateway IP by default, not `host.docker.internal`. NPM generates `proxy_pass` with a runtime-resolved `$server` variable, so Docker's `/etc/hosts` alias is not sufficient for these proxy hosts. On the current VPS that native target resolves to `172.18.0.1`, which reaches the host-native `3000` and `4000` listeners from the NPM container.
|
||||
|
||||
Switching back to Docker restores upstreams to the Compose service names `web:3000` and `api:4000`.
|
||||
|
||||
### Rollback helper
|
||||
|
||||
```bash
|
||||
|
|
@ -184,7 +237,7 @@ Without that variable, these commands are refused:
|
|||
- `./deploy main --runtime native --api-only`
|
||||
- `./deploy main --runtime native --services-only`
|
||||
|
||||
This keeps the native path focused on safe worker iteration until proxy routing and public unit ownership are switched deliberately.
|
||||
This keeps native app ownership explicit until infra, app health, and proxy routing are switched deliberately.
|
||||
|
||||
## Running deploy from the VPS itself
|
||||
|
||||
|
|
|
|||
24
deployment/native/bootstrap-infra.sh
Executable file
24
deployment/native/bootstrap-infra.sh
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
|
||||
if [[ "${EUID}" -eq 0 ]]; then
|
||||
"$repo_root/deployment/native/install-infra-units.sh"
|
||||
else
|
||||
sudo "$repo_root/deployment/native/install-infra-units.sh"
|
||||
fi
|
||||
|
||||
echo "Stopping Docker Islandflow services before native infra opens durable data."
|
||||
(
|
||||
cd "$repo_root/deployment/docker"
|
||||
docker compose stop web api compute candles ingest-options ingest-equities nats redis clickhouse
|
||||
)
|
||||
|
||||
if [[ "${EUID}" -eq 0 ]]; then
|
||||
"$repo_root/deployment/native/start-infra.sh"
|
||||
else
|
||||
sudo "$repo_root/deployment/native/start-infra.sh"
|
||||
fi
|
||||
|
||||
"$repo_root/deployment/native/check-native-infra.sh"
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
set -euo pipefail
|
||||
|
||||
scope="${1:-full}"
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
units=()
|
||||
|
||||
case "$scope" in
|
||||
|
|
@ -27,6 +28,12 @@ case "$scope" in
|
|||
;;
|
||||
esac
|
||||
|
||||
case "$scope" in
|
||||
full|api|services|workers)
|
||||
"$repo_root/deployment/native/check-native-infra.sh"
|
||||
;;
|
||||
esac
|
||||
|
||||
for unit in "${units[@]}"; do
|
||||
systemctl --user is-active --quiet "$unit"
|
||||
echo "ok $unit"
|
||||
|
|
|
|||
24
deployment/native/check-native-infra.sh
Executable file
24
deployment/native/check-native-infra.sh
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
systemctl is-active --quiet islandflow-nats.service
|
||||
echo "ok islandflow-nats.service"
|
||||
|
||||
systemctl is-active --quiet islandflow-redis.service
|
||||
echo "ok islandflow-redis.service"
|
||||
|
||||
systemctl is-active --quiet islandflow-clickhouse.service
|
||||
echo "ok islandflow-clickhouse.service"
|
||||
|
||||
if command -v redis-cli >/dev/null 2>&1; then
|
||||
redis-cli -h 127.0.0.1 -p 6379 ping | grep -q PONG
|
||||
else
|
||||
timeout 2 bash -c '</dev/tcp/127.0.0.1/6379'
|
||||
fi
|
||||
echo "ok redis-ping"
|
||||
|
||||
curl -fksS http://127.0.0.1:8123/ping | grep -q Ok
|
||||
echo "ok clickhouse-ping"
|
||||
|
||||
timeout 2 bash -c '</dev/tcp/127.0.0.1/4222'
|
||||
echo "ok nats-port"
|
||||
6
deployment/native/config/clickhouse-listen.xml
Normal file
6
deployment/native/config/clickhouse-listen.xml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
<clickhouse>
|
||||
<listen_host>127.0.0.1</listen_host>
|
||||
<path>/var/lib/islandflow/clickhouse/</path>
|
||||
<tmp_path>/var/lib/islandflow/clickhouse/tmp/</tmp_path>
|
||||
<user_files_path>/var/lib/islandflow/clickhouse/user_files/</user_files_path>
|
||||
</clickhouse>
|
||||
10
deployment/native/config/redis.conf
Normal file
10
deployment/native/config/redis.conf
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
bind 127.0.0.1
|
||||
protected-mode yes
|
||||
port 6379
|
||||
dir /var/lib/islandflow/redis
|
||||
appendonly yes
|
||||
save 900 1
|
||||
save 300 10
|
||||
save 60 10000
|
||||
loglevel notice
|
||||
databases 16
|
||||
34
deployment/native/cutover.sh
Executable file
34
deployment/native/cutover.sh
Executable file
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
scope="${1:-full}"
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
|
||||
case "$scope" in
|
||||
full|services|workers|api|web)
|
||||
;;
|
||||
*)
|
||||
echo "Usage: deployment/native/cutover.sh [full|services|workers|api|web]" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Stopping Docker-owned Islandflow app services before native ownership starts."
|
||||
(
|
||||
cd "$repo_root/deployment/docker"
|
||||
docker compose stop web api compute candles ingest-options ingest-equities
|
||||
)
|
||||
|
||||
if [[ "$scope" == "full" || "$scope" == "services" || "$scope" == "api" || "$scope" == "web" ]]; then
|
||||
"$repo_root/deployment/native/check-native-infra.sh"
|
||||
fi
|
||||
|
||||
systemctl --user restart $(case "$scope" in
|
||||
full) echo islandflow-web.service islandflow-api.service islandflow-compute.service islandflow-candles.service islandflow-ingest-options.service islandflow-ingest-equities.service ;;
|
||||
services) echo islandflow-api.service islandflow-compute.service islandflow-candles.service islandflow-ingest-options.service islandflow-ingest-equities.service ;;
|
||||
workers) echo islandflow-compute.service islandflow-candles.service islandflow-ingest-options.service islandflow-ingest-equities.service ;;
|
||||
api) echo islandflow-api.service ;;
|
||||
web) echo islandflow-web.service ;;
|
||||
esac)
|
||||
|
||||
"$repo_root/deployment/native/check-native-health.sh" "$scope"
|
||||
27
deployment/native/full-rollback.sh
Executable file
27
deployment/native/full-rollback.sh
Executable file
|
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
|
||||
echo "Stopping native app services."
|
||||
systemctl --user stop islandflow-web.service islandflow-api.service islandflow-compute.service islandflow-candles.service islandflow-ingest-options.service islandflow-ingest-equities.service || true
|
||||
|
||||
echo "Stopping native infra before Docker reopens durable data."
|
||||
if [[ "${EUID}" -eq 0 ]]; then
|
||||
systemctl stop islandflow-nats.service islandflow-redis.service islandflow-clickhouse.service || true
|
||||
else
|
||||
sudo systemctl stop islandflow-nats.service islandflow-redis.service islandflow-clickhouse.service || true
|
||||
fi
|
||||
|
||||
echo "Switching NPM Islandflow upstreams back to Docker service names."
|
||||
"$repo_root/deployment/native/switch-npm-edge.sh" docker
|
||||
|
||||
echo "Restarting Docker Islandflow runtime."
|
||||
(
|
||||
cd "$repo_root/deployment/docker"
|
||||
docker compose up -d web api compute candles ingest-options ingest-equities
|
||||
)
|
||||
|
||||
curl -I -fksS "${DEPLOY_PUBLIC_APP_URL:-https://flow.deltaisland.io}" >/dev/null
|
||||
curl -fksS "${DEPLOY_PUBLIC_API_HEALTH_URL:-https://api.flow.deltaisland.io/health}" >/dev/null
|
||||
echo "Rollback validation passed."
|
||||
72
deployment/native/install-infra-units.sh
Executable file
72
deployment/native/install-infra-units.sh
Executable file
|
|
@ -0,0 +1,72 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
system_unit_source_dir="$repo_root/deployment/native/systemd/system"
|
||||
config_source_dir="$repo_root/deployment/native/config"
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
echo "Run as root: sudo $0" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
resolve_binary() {
|
||||
local name="$1"
|
||||
local path=""
|
||||
|
||||
path="$(command -v "$name" 2>/dev/null || true)"
|
||||
if [[ -n "$path" ]]; then
|
||||
printf '%s\n' "$path"
|
||||
return 0
|
||||
fi
|
||||
|
||||
for candidate in "/usr/bin/$name" "/usr/sbin/$name" "/usr/local/bin/$name" "/usr/local/sbin/$name"; do
|
||||
if [[ -x "$candidate" ]]; then
|
||||
printf '%s\n' "$candidate"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
missing=()
|
||||
for command in nats-server redis-server clickhouse-server; do
|
||||
if ! resolve_binary "$command" >/dev/null; then
|
||||
missing+=("$command")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#missing[@]} -gt 0 ]]; then
|
||||
echo "Missing native infra binaries: ${missing[*]}" >&2
|
||||
echo "Install NATS Server, Redis Server, and ClickHouse Server before bootstrapping native infra." >&2
|
||||
echo "On Debian, Redis is usually available as redis-server; ClickHouse and NATS may require their vendor repositories or packaged binaries." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ensure_system_user() {
|
||||
local name="$1"
|
||||
local home="$2"
|
||||
|
||||
getent group "$name" >/dev/null || groupadd --system "$name"
|
||||
getent passwd "$name" >/dev/null || useradd --system --gid "$name" --home-dir "$home" --shell /usr/sbin/nologin "$name"
|
||||
}
|
||||
|
||||
ensure_system_user nats /var/lib/islandflow/nats
|
||||
ensure_system_user redis /var/lib/islandflow/redis
|
||||
ensure_system_user clickhouse /var/lib/islandflow/clickhouse
|
||||
|
||||
install -d -m 0755 /etc/islandflow
|
||||
install -m 0644 "$config_source_dir/redis.conf" /etc/islandflow/redis.conf
|
||||
install -d -m 0755 /etc/clickhouse-server/config.d
|
||||
install -m 0644 "$config_source_dir/clickhouse-listen.xml" /etc/clickhouse-server/config.d/islandflow-listen.xml
|
||||
|
||||
install -d -o nats -g nats -m 0750 /var/lib/islandflow/nats
|
||||
install -d -o redis -g redis -m 0750 /var/lib/islandflow/redis
|
||||
install -d -o clickhouse -g clickhouse -m 0750 /var/lib/islandflow/clickhouse
|
||||
|
||||
install -m 0644 "$system_unit_source_dir"/islandflow-*.service /etc/systemd/system/
|
||||
systemctl daemon-reload
|
||||
|
||||
echo "Installed native infra system units and config."
|
||||
echo "Start infra with: sudo deployment/native/start-infra.sh"
|
||||
17
deployment/native/start-infra.sh
Executable file
17
deployment/native/start-infra.sh
Executable file
|
|
@ -0,0 +1,17 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
echo "Run as root: sudo $0" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for unit in redis-server.service nats-server.service clickhouse-server.service; do
|
||||
if systemctl list-unit-files "$unit" >/dev/null 2>&1; then
|
||||
systemctl disable --now "$unit" >/dev/null 2>&1 || true
|
||||
fi
|
||||
done
|
||||
|
||||
systemctl reset-failed islandflow-nats.service islandflow-redis.service islandflow-clickhouse.service || true
|
||||
systemctl enable --now islandflow-nats.service islandflow-redis.service islandflow-clickhouse.service
|
||||
"$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/check-native-infra.sh"
|
||||
9
deployment/native/stop-infra.sh
Executable file
9
deployment/native/stop-infra.sh
Executable file
|
|
@ -0,0 +1,9 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
echo "Run as root: sudo $0" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
systemctl stop islandflow-nats.service islandflow-redis.service islandflow-clickhouse.service
|
||||
285
deployment/native/switch-npm-edge.sh
Executable file
285
deployment/native/switch-npm-edge.sh
Executable file
|
|
@ -0,0 +1,285 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
target="${1:-native}"
|
||||
npm_root="${NPM_ROOT:-/home/delta/nginx-proxy-manager}"
|
||||
db_path="${NPM_DB_PATH:-$npm_root/data/database.sqlite}"
|
||||
app_domain="${ISLANDFLOW_APP_DOMAIN:-flow.deltaisland.io}"
|
||||
api_domain="${ISLANDFLOW_API_DOMAIN:-api.flow.deltaisland.io}"
|
||||
native_host="${ISLANDFLOW_NATIVE_HOST:-}"
|
||||
docker_web_host="${ISLANDFLOW_DOCKER_WEB_HOST:-web}"
|
||||
docker_api_host="${ISLANDFLOW_DOCKER_API_HOST:-api}"
|
||||
web_port="${ISLANDFLOW_WEB_PORT:-3000}"
|
||||
api_port="${ISLANDFLOW_API_PORT:-4000}"
|
||||
restart_npm="${NPM_RESTART:-1}"
|
||||
npm_container="${NPM_CONTAINER_NAME:-nginx-proxy-manager}"
|
||||
sudo_cmd=()
|
||||
|
||||
case "$target" in
|
||||
native|docker)
|
||||
;;
|
||||
*)
|
||||
echo "Usage: deployment/native/switch-npm-edge.sh [native|docker]" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
resolve_native_host() {
|
||||
if [[ -n "$native_host" ]]; then
|
||||
printf '%s\n' "$native_host"
|
||||
return
|
||||
fi
|
||||
|
||||
if command -v docker >/dev/null 2>&1 && docker ps --format '{{.Names}}' | grep -qx "$npm_container"; then
|
||||
native_host="$(docker inspect "$npm_container" --format '{{range .NetworkSettings.Networks}}{{println .Gateway}}{{end}}' | sed '/^$/d' | head -n1)"
|
||||
if [[ -n "$native_host" ]]; then
|
||||
printf '%s\n' "$native_host"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Unable to determine the native upstream host for NPM." >&2
|
||||
echo "Set ISLANDFLOW_NATIVE_HOST explicitly or start the $npm_container container first." >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [[ "$target" == "native" ]]; then
|
||||
native_host="$(resolve_native_host)"
|
||||
fi
|
||||
|
||||
if [[ ! -w "$db_path" || ! -w "$(dirname "$db_path")" ]]; then
|
||||
if [[ "${EUID}" -eq 0 ]]; then
|
||||
sudo_cmd=()
|
||||
elif command -v sudo >/dev/null 2>&1; then
|
||||
sudo_cmd=(sudo)
|
||||
else
|
||||
echo "NPM database path is not writable and sudo is unavailable: $db_path" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ! -f "$db_path" ]]; then
|
||||
echo "NPM database not found: $db_path" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
backup="$db_path.before-islandflow-$target-$(date +%Y%m%d%H%M%S)"
|
||||
"${sudo_cmd[@]}" cp "$db_path" "$backup"
|
||||
echo "Backed up NPM database to $backup"
|
||||
|
||||
"${sudo_cmd[@]}" python3 - "$db_path" "$target" "$app_domain" "$api_domain" "$native_host" "$docker_web_host" "$docker_api_host" "$web_port" "$api_port" <<'PY'
|
||||
import json
|
||||
import sqlite3
|
||||
import sys
|
||||
|
||||
db_path, target, app_domain, api_domain, native_host, docker_web_host, docker_api_host, web_port, api_port = sys.argv[1:]
|
||||
web_host = native_host if target == "native" else docker_web_host
|
||||
api_host = native_host if target == "native" else docker_api_host
|
||||
|
||||
advanced_config = f"""location ~ ^/(ws|replay|prints|joins|nbbo|dark|flow|candles|history)/ {{
|
||||
set $forward_scheme http;
|
||||
set $server "{api_host}";
|
||||
set $port {api_port};
|
||||
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $http_connection;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
include conf.d/include/proxy.conf;
|
||||
}}"""
|
||||
|
||||
def has_domain(raw, domain):
|
||||
try:
|
||||
return domain in json.loads(raw)
|
||||
except Exception:
|
||||
return domain in raw
|
||||
|
||||
con = sqlite3.connect(db_path)
|
||||
cur = con.cursor()
|
||||
rows = list(cur.execute("select id, domain_names from proxy_host where is_deleted = 0"))
|
||||
app_ids = [row_id for row_id, domains in rows if has_domain(domains, app_domain)]
|
||||
api_ids = [row_id for row_id, domains in rows if has_domain(domains, api_domain)]
|
||||
|
||||
if len(app_ids) != 1 or len(api_ids) != 1:
|
||||
raise SystemExit(f"Expected one app and one API proxy host, found app={app_ids} api={api_ids}")
|
||||
|
||||
cur.execute(
|
||||
"update proxy_host set forward_scheme = 'http', forward_host = ?, forward_port = ?, allow_websocket_upgrade = 1, advanced_config = ?, modified_on = datetime('now') where id = ?",
|
||||
(web_host, int(web_port), advanced_config, app_ids[0]),
|
||||
)
|
||||
cur.execute(
|
||||
"update proxy_host set forward_scheme = 'http', forward_host = ?, forward_port = ?, allow_websocket_upgrade = 1, modified_on = datetime('now') where id = ?",
|
||||
(api_host, int(api_port), api_ids[0]),
|
||||
)
|
||||
con.commit()
|
||||
print(f"Updated {app_domain} -> {web_host}:{web_port}")
|
||||
print(f"Updated {api_domain} -> {api_host}:{api_port}")
|
||||
PY
|
||||
|
||||
if command -v python3 >/dev/null 2>&1; then
|
||||
"${sudo_cmd[@]}" python3 - "$npm_root" "$db_path" "$target" "$app_domain" "$api_domain" "$native_host" "$docker_web_host" "$docker_api_host" "$web_port" "$api_port" <<'PY'
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
(
|
||||
npm_root,
|
||||
db_path,
|
||||
target,
|
||||
app_domain,
|
||||
api_domain,
|
||||
native_host,
|
||||
docker_web_host,
|
||||
docker_api_host,
|
||||
web_port,
|
||||
api_port,
|
||||
) = sys.argv[1:]
|
||||
|
||||
web_host = native_host if target == "native" else docker_web_host
|
||||
api_host = native_host if target == "native" else docker_api_host
|
||||
|
||||
def has_domain(raw, domain):
|
||||
try:
|
||||
return domain in json.loads(raw)
|
||||
except Exception:
|
||||
return domain in raw
|
||||
|
||||
def replace_nth(text, pattern, replacement, index):
|
||||
matches = list(pattern.finditer(text))
|
||||
if len(matches) < index:
|
||||
raise SystemExit(f"Unable to rewrite generated proxy config; expected match {index} for {pattern.pattern!r}")
|
||||
match = matches[index - 1]
|
||||
return text[:match.start()] + replacement(match) + text[match.end():]
|
||||
|
||||
server_pattern = re.compile(r'^(?P<prefix>\s*set \$server\s+)".*?";\s*$', re.M)
|
||||
port_pattern = re.compile(r'^(?P<prefix>\s*set \$port\s+)\d+;\s*$', re.M)
|
||||
|
||||
def replace_server(text, host, index):
|
||||
return replace_nth(text, server_pattern, lambda m: f'{m.group("prefix")}"{host}";', index)
|
||||
|
||||
def replace_port(text, port, index):
|
||||
return replace_nth(text, port_pattern, lambda m: f'{m.group("prefix")}{port};', index)
|
||||
|
||||
con = sqlite3.connect(db_path)
|
||||
rows = list(con.execute("select id, domain_names from proxy_host where is_deleted = 0"))
|
||||
app_ids = [row_id for row_id, domains in rows if has_domain(domains, app_domain)]
|
||||
api_ids = [row_id for row_id, domains in rows if has_domain(domains, api_domain)]
|
||||
if len(app_ids) != 1 or len(api_ids) != 1:
|
||||
raise SystemExit(f"Expected one app and one API proxy host, found app={app_ids} api={api_ids}")
|
||||
|
||||
api_conf = Path(npm_root) / "data/nginx/proxy_host" / f"{api_ids[0]}.conf"
|
||||
app_conf = Path(npm_root) / "data/nginx/proxy_host" / f"{app_ids[0]}.conf"
|
||||
|
||||
if api_conf.exists():
|
||||
text = api_conf.read_text()
|
||||
text = replace_server(text, api_host, 1)
|
||||
text = replace_port(text, int(api_port), 1)
|
||||
api_conf.write_text(text)
|
||||
print(f"Synchronized {api_conf.name} -> {api_host}:{api_port}")
|
||||
|
||||
if app_conf.exists():
|
||||
text = app_conf.read_text()
|
||||
text = replace_server(text, web_host, 1)
|
||||
text = replace_port(text, int(web_port), 1)
|
||||
text = replace_server(text, api_host, 2)
|
||||
text = replace_port(text, int(api_port), 2)
|
||||
app_conf.write_text(text)
|
||||
print(f"Synchronized {app_conf.name} -> {web_host}:{web_port} and API matcher -> {api_host}:{api_port}")
|
||||
PY
|
||||
fi
|
||||
|
||||
if [[ "$restart_npm" == "0" ]]; then
|
||||
echo "NPM container restart skipped because NPM_RESTART=0."
|
||||
elif command -v docker >/dev/null 2>&1 && docker ps --format '{{.Names}}' | grep -qx nginx-proxy-manager; then
|
||||
docker restart nginx-proxy-manager >/dev/null
|
||||
echo "Restarted nginx-proxy-manager"
|
||||
else
|
||||
echo "NPM container restart skipped; restart it manually if it is not managed by Docker on this host."
|
||||
fi
|
||||
|
||||
if command -v docker >/dev/null 2>&1 && docker ps --format '{{.Names}}' | grep -qx "$npm_container"; then
|
||||
"${sudo_cmd[@]}" python3 - "$npm_root" "$db_path" "$target" "$app_domain" "$api_domain" "$native_host" "$docker_web_host" "$docker_api_host" "$web_port" "$api_port" <<'PY'
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
(
|
||||
npm_root,
|
||||
db_path,
|
||||
target,
|
||||
app_domain,
|
||||
api_domain,
|
||||
native_host,
|
||||
docker_web_host,
|
||||
docker_api_host,
|
||||
web_port,
|
||||
api_port,
|
||||
) = sys.argv[1:]
|
||||
|
||||
web_host = native_host if target == "native" else docker_web_host
|
||||
api_host = native_host if target == "native" else docker_api_host
|
||||
|
||||
def has_domain(raw, domain):
|
||||
try:
|
||||
return domain in json.loads(raw)
|
||||
except Exception:
|
||||
return domain in raw
|
||||
|
||||
def replace_nth(text, pattern, replacement, index):
|
||||
matches = list(pattern.finditer(text))
|
||||
if len(matches) < index:
|
||||
raise SystemExit(f"Unable to rewrite generated proxy config; expected match {index} for {pattern.pattern!r}")
|
||||
match = matches[index - 1]
|
||||
return text[:match.start()] + replacement(match) + text[match.end():]
|
||||
|
||||
server_pattern = re.compile(r'^(?P<prefix>\s*set \$server\s+)".*?";\s*$', re.M)
|
||||
port_pattern = re.compile(r'^(?P<prefix>\s*set \$port\s+)\d+;\s*$', re.M)
|
||||
|
||||
def replace_server(text, host, index):
|
||||
return replace_nth(text, server_pattern, lambda m: f'{m.group("prefix")}"{host}";', index)
|
||||
|
||||
def replace_port(text, port, index):
|
||||
return replace_nth(text, port_pattern, lambda m: f'{m.group("prefix")}{port};', index)
|
||||
|
||||
con = sqlite3.connect(db_path)
|
||||
rows = list(con.execute("select id, domain_names from proxy_host where is_deleted = 0"))
|
||||
app_ids = [row_id for row_id, domains in rows if has_domain(domains, app_domain)]
|
||||
api_ids = [row_id for row_id, domains in rows if has_domain(domains, api_domain)]
|
||||
if len(app_ids) != 1 or len(api_ids) != 1:
|
||||
raise SystemExit(f"Expected one app and one API proxy host, found app={app_ids} api={api_ids}")
|
||||
|
||||
api_conf = Path(npm_root) / "data/nginx/proxy_host" / f"{api_ids[0]}.conf"
|
||||
app_conf = Path(npm_root) / "data/nginx/proxy_host" / f"{app_ids[0]}.conf"
|
||||
|
||||
if api_conf.exists():
|
||||
text = api_conf.read_text()
|
||||
text = replace_server(text, api_host, 1)
|
||||
text = replace_port(text, int(api_port), 1)
|
||||
api_conf.write_text(text)
|
||||
|
||||
if app_conf.exists():
|
||||
text = app_conf.read_text()
|
||||
text = replace_server(text, web_host, 1)
|
||||
text = replace_port(text, int(web_port), 1)
|
||||
text = replace_server(text, api_host, 2)
|
||||
text = replace_port(text, int(api_port), 2)
|
||||
app_conf.write_text(text)
|
||||
PY
|
||||
reloaded=0
|
||||
for _ in 1 2 3 4 5; do
|
||||
if docker exec "$npm_container" nginx -s reload >/dev/null 2>&1; then
|
||||
reloaded=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [[ "$reloaded" == "1" ]]; then
|
||||
echo "Reloaded nginx-proxy-manager"
|
||||
else
|
||||
echo "Warning: nginx-proxy-manager reload did not succeed after restart; verify the container is healthy." >&2
|
||||
fi
|
||||
fi
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
[Unit]
|
||||
Description=Islandflow ClickHouse
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/env clickhouse-server --config-file=/etc/clickhouse-server/config.xml
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
User=clickhouse
|
||||
Group=clickhouse
|
||||
StateDirectory=clickhouse
|
||||
LimitNOFILE=262144
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
18
deployment/native/systemd/system/islandflow-nats.service
Normal file
18
deployment/native/systemd/system/islandflow-nats.service
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[Unit]
|
||||
Description=Islandflow NATS JetStream
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/sbin/nats-server -js -sd /var/lib/islandflow/nats -a 127.0.0.1 -p 4222 -m 8222
|
||||
Restart=always
|
||||
RestartSec=2
|
||||
User=nats
|
||||
Group=nats
|
||||
RuntimeDirectory=islandflow-nats
|
||||
StateDirectory=islandflow/nats
|
||||
LimitNOFILE=1048576
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
18
deployment/native/systemd/system/islandflow-redis.service
Normal file
18
deployment/native/systemd/system/islandflow-redis.service
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[Unit]
|
||||
Description=Islandflow Redis
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=/usr/bin/env redis-server /etc/islandflow/redis.conf --supervised systemd --daemonize no
|
||||
Restart=always
|
||||
RestartSec=2
|
||||
User=redis
|
||||
Group=redis
|
||||
RuntimeDirectory=islandflow-redis
|
||||
StateDirectory=islandflow/redis
|
||||
LimitNOFILE=65535
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
@ -6,6 +6,8 @@ Wants=network-online.target
|
|||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/delta/islandflow
|
||||
Environment=API_HOST=0.0.0.0
|
||||
Environment=API_PORT=4000
|
||||
EnvironmentFile=/home/delta/islandflow/.env
|
||||
ExecStart=/home/delta/.bun/bin/bun services/api/src/index.ts
|
||||
Restart=always
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Wants=network-online.target
|
|||
Type=simple
|
||||
WorkingDirectory=/home/delta/islandflow
|
||||
EnvironmentFile=/home/delta/islandflow/.env
|
||||
Environment=OPTIONS_INGEST_ADAPTER=synthetic
|
||||
ExecStart=/home/delta/.bun/bin/bun services/ingest-options/src/index.ts
|
||||
Restart=always
|
||||
RestartSec=2
|
||||
|
|
|
|||
|
|
@ -6,8 +6,10 @@ Wants=network-online.target
|
|||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/delta/islandflow
|
||||
Environment=WEB_HOST=0.0.0.0
|
||||
Environment=WEB_PORT=3000
|
||||
EnvironmentFile=/home/delta/islandflow/.env
|
||||
ExecStart=/home/delta/.bun/bin/bun --cwd apps/web run start
|
||||
ExecStart=/bin/sh -lc 'cd /home/delta/islandflow/apps/web && exec /home/delta/.bun/bin/bun x next start -H "$WEB_HOST" -p "$WEB_PORT"'
|
||||
Restart=always
|
||||
RestartSec=2
|
||||
KillSignal=SIGINT
|
||||
|
|
|
|||
521
docs/turns/2026-05-18-native-public-edge-cutover.html
Normal file
521
docs/turns/2026-05-18-native-public-edge-cutover.html
Normal file
|
|
@ -0,0 +1,521 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Turn Document - Native Public Edge Cutover</title>
|
||||
<style>
|
||||
:root {
|
||||
color-scheme: dark;
|
||||
--bg-core: #06080b;
|
||||
--bg-elevated: #0b1016;
|
||||
--bg-pane: #111820;
|
||||
--bg-pane-2: #0d141b;
|
||||
--bg-soft: rgba(255, 255, 255, 0.03);
|
||||
--border-subtle: rgba(255, 255, 255, 0.12);
|
||||
--border-strong: rgba(245, 166, 35, 0.32);
|
||||
--text-primary: #e6edf4;
|
||||
--text-dim: #90a0b2;
|
||||
--text-faint: #6e7b8c;
|
||||
--signal-amber: #f5a623;
|
||||
--signal-amber-soft: rgba(245, 166, 35, 0.12);
|
||||
--confirm-green: #25c17a;
|
||||
--confirm-green-soft: rgba(37, 193, 122, 0.14);
|
||||
--risk-red: #ff6b5f;
|
||||
--risk-red-soft: rgba(255, 107, 95, 0.12);
|
||||
--info-blue: #4da3ff;
|
||||
--info-blue-soft: rgba(77, 163, 255, 0.12);
|
||||
--shadow: 0 24px 60px rgba(0, 0, 0, 0.35);
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
font-family: "IBM Plex Sans", "Segoe UI", sans-serif;
|
||||
background:
|
||||
radial-gradient(circle at top right, rgba(245, 166, 35, 0.12), transparent 28%),
|
||||
linear-gradient(180deg, #06080b 0%, #0a1117 100%);
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
main {
|
||||
width: min(1080px, calc(100vw - 32px));
|
||||
margin: 0 auto;
|
||||
padding: 28px 0 48px;
|
||||
}
|
||||
|
||||
.hero {
|
||||
background:
|
||||
linear-gradient(140deg, rgba(245, 166, 35, 0.1), transparent 42%),
|
||||
linear-gradient(180deg, rgba(255, 255, 255, 0.02), transparent 100%),
|
||||
var(--bg-pane);
|
||||
border: 1px solid var(--border-strong);
|
||||
border-radius: 16px;
|
||||
box-shadow: var(--shadow);
|
||||
padding: 26px 28px;
|
||||
margin-bottom: 18px;
|
||||
}
|
||||
|
||||
.eyebrow,
|
||||
h2,
|
||||
.meta-label,
|
||||
th {
|
||||
font-family: "IBM Plex Mono", monospace;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.12em;
|
||||
}
|
||||
|
||||
.eyebrow {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
color: var(--signal-amber);
|
||||
font-size: 0.72rem;
|
||||
margin-bottom: 14px;
|
||||
}
|
||||
|
||||
h1 {
|
||||
margin: 0 0 10px;
|
||||
font-family: "Quantico", "IBM Plex Sans", sans-serif;
|
||||
font-size: clamp(2rem, 4vw, 3rem);
|
||||
line-height: 1.05;
|
||||
letter-spacing: 0.06em;
|
||||
}
|
||||
|
||||
.lead {
|
||||
margin: 0;
|
||||
max-width: 72ch;
|
||||
color: var(--text-dim);
|
||||
line-height: 1.65;
|
||||
}
|
||||
|
||||
.meta-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
||||
gap: 10px;
|
||||
margin-top: 18px;
|
||||
}
|
||||
|
||||
.meta-card {
|
||||
padding: 12px 14px;
|
||||
border-radius: 12px;
|
||||
background: var(--bg-soft);
|
||||
border: 1px solid var(--border-subtle);
|
||||
}
|
||||
|
||||
.meta-label {
|
||||
color: var(--text-faint);
|
||||
font-size: 0.68rem;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.meta-value {
|
||||
color: var(--text-primary);
|
||||
font-size: 0.95rem;
|
||||
}
|
||||
|
||||
section {
|
||||
background: var(--bg-pane);
|
||||
border: 1px solid var(--border-subtle);
|
||||
border-radius: 16px;
|
||||
padding: 22px 24px;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
h2 {
|
||||
margin: 0 0 14px;
|
||||
font-size: 0.76rem;
|
||||
color: var(--signal-amber);
|
||||
}
|
||||
|
||||
p,
|
||||
li {
|
||||
line-height: 1.65;
|
||||
color: var(--text-dim);
|
||||
}
|
||||
|
||||
ul {
|
||||
margin: 0;
|
||||
padding-left: 20px;
|
||||
}
|
||||
|
||||
li + li {
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
strong {
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: "IBM Plex Mono", monospace;
|
||||
font-size: 0.92em;
|
||||
color: var(--signal-amber);
|
||||
}
|
||||
|
||||
pre {
|
||||
margin: 12px 0 0;
|
||||
padding: 14px 16px;
|
||||
border-radius: 12px;
|
||||
background: var(--bg-pane-2);
|
||||
border: 1px solid var(--border-subtle);
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
pre code {
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.status-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.status-card {
|
||||
border-radius: 12px;
|
||||
border: 1px solid var(--border-subtle);
|
||||
padding: 14px;
|
||||
background: var(--bg-pane-2);
|
||||
}
|
||||
|
||||
.status-card.good {
|
||||
border-color: rgba(37, 193, 122, 0.32);
|
||||
background: linear-gradient(180deg, var(--confirm-green-soft), transparent), var(--bg-pane-2);
|
||||
}
|
||||
|
||||
.status-card.warn {
|
||||
border-color: rgba(77, 163, 255, 0.28);
|
||||
background: linear-gradient(180deg, var(--info-blue-soft), transparent), var(--bg-pane-2);
|
||||
}
|
||||
|
||||
.status-title {
|
||||
margin: 0 0 6px;
|
||||
color: var(--text-primary);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.status-copy {
|
||||
margin: 0;
|
||||
color: var(--text-dim);
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
th,
|
||||
td {
|
||||
text-align: left;
|
||||
padding: 10px 0;
|
||||
border-bottom: 1px solid var(--border-subtle);
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
th {
|
||||
color: var(--text-faint);
|
||||
font-size: 0.68rem;
|
||||
}
|
||||
|
||||
td {
|
||||
color: var(--text-dim);
|
||||
}
|
||||
|
||||
.pill {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
border-radius: 999px;
|
||||
padding: 4px 9px;
|
||||
font-family: "IBM Plex Mono", monospace;
|
||||
font-size: 0.7rem;
|
||||
letter-spacing: 0.08em;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.pill.good {
|
||||
color: var(--confirm-green);
|
||||
background: var(--confirm-green-soft);
|
||||
}
|
||||
|
||||
.pill.warn {
|
||||
color: var(--info-blue);
|
||||
background: var(--info-blue-soft);
|
||||
}
|
||||
|
||||
.pill.risk {
|
||||
color: var(--risk-red);
|
||||
background: var(--risk-red-soft);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<section class="hero">
|
||||
<div class="eyebrow">Islandflow Turn Document</div>
|
||||
<h1>Native Public Edge Cutover</h1>
|
||||
<p class="lead">
|
||||
Completed the VPS native-first cutover for Islandflow infrastructure and app services while keeping Nginx
|
||||
Proxy Manager as the outer edge and Docker as the rollback path. The final state now serves
|
||||
<code>flow.deltaisland.io</code> and <code>api.flow.deltaisland.io</code> from the native web and API
|
||||
processes, with verified public routing and a documented follow-up for the long-term API Cloudflare posture.
|
||||
</p>
|
||||
<div class="meta-grid">
|
||||
<div class="meta-card">
|
||||
<div class="meta-label">Generated</div>
|
||||
<div class="meta-value">2026-05-18 19:52 EDT</div>
|
||||
</div>
|
||||
<div class="meta-card">
|
||||
<div class="meta-label">Primary Issue</div>
|
||||
<div class="meta-value"><code>islandflow-vvw</code></div>
|
||||
</div>
|
||||
<div class="meta-card">
|
||||
<div class="meta-label">Follow-up</div>
|
||||
<div class="meta-value"><code>islandflow-fl5</code></div>
|
||||
</div>
|
||||
<div class="meta-card">
|
||||
<div class="meta-label">Runtime State</div>
|
||||
<div class="meta-value">Native active, Docker retained for rollback</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Summary</h2>
|
||||
<p>
|
||||
The repository now contains the native infra units, native cutover scripts, Docker fallback adjustments, and
|
||||
public-edge retargeting logic required to run Islandflow natively on the VPS. During validation, the live NPM
|
||||
edge was switched from Docker container-name upstreams to native host ports, the host firewall was adjusted so
|
||||
the NPM bridge could reach the native API, and the separate public API TLS problem was resolved by correcting
|
||||
the Cloudflare DNS state for <code>api.flow.deltaisland.io</code>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Changes Made</h2>
|
||||
<ul>
|
||||
<li>
|
||||
Added checked-in native infra operations under <code>deployment/native/</code>, including
|
||||
<code>bootstrap-infra.sh</code>, <code>check-native-infra.sh</code>, <code>cutover.sh</code>,
|
||||
<code>full-rollback.sh</code>, <code>start-infra.sh</code>, and the native system units for NATS, Redis,
|
||||
and ClickHouse.
|
||||
</li>
|
||||
<li>
|
||||
Extended native app runtime units so the web and API bind on host-reachable interfaces, and forced the
|
||||
native options ingest service to use the synthetic adapter during the cutover.
|
||||
</li>
|
||||
<li>
|
||||
Updated <code>services/api</code> to support explicit host binding through <code>API_HOST</code>, and fixed
|
||||
JetStream retention conversion in <code>packages/bus</code> so native services can start cleanly with the
|
||||
configured max-age values.
|
||||
</li>
|
||||
<li>
|
||||
Updated the Docker fallback assets to publish loopback web/API ports, share durable host data under
|
||||
<code>/var/lib/islandflow</code>, and document the native-to-Docker rollback path.
|
||||
</li>
|
||||
<li>
|
||||
Reworked <code>deployment/native/switch-npm-edge.sh</code> so it targets the NPM bridge gateway IP instead
|
||||
of <code>host.docker.internal</code>, handles the root-owned NPM SQLite database, synchronizes generated
|
||||
<code>proxy_host</code> configs, and reloads NPM deterministically after the edge switch.
|
||||
</li>
|
||||
<li>
|
||||
Created Beads follow-up issue <code>islandflow-fl5</code> for the remaining decision about whether
|
||||
<code>api.flow.deltaisland.io</code> should remain DNS-only or be re-proxied through Cloudflare.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Context</h2>
|
||||
<p>
|
||||
The migration started from a Docker-owned production baseline where NATS, Redis, ClickHouse, API, workers, and
|
||||
web all ran in Compose, while NPM routed Islandflow traffic to Docker service names. That setup blocked a safe
|
||||
native cutover for two reasons: the native services could not reach Docker-only infra reliably, and NPM could
|
||||
not send public traffic to host-native processes without a deliberate upstream retarget.
|
||||
</p>
|
||||
<p>
|
||||
The runtime model for this work is exclusive ownership. Native and Docker are not allowed to run the same API
|
||||
or worker scopes in parallel because JetStream durable consumers would conflict. The objective was therefore a
|
||||
phased handoff, not a mixed soak for the same queues.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Important Implementation Details</h2>
|
||||
<div class="status-grid">
|
||||
<article class="status-card good">
|
||||
<p class="status-title">NPM edge targeting</p>
|
||||
<p class="status-copy">
|
||||
NPM generates <code>proxy_pass</code> from a runtime-resolved <code>$server</code> variable, so the
|
||||
Docker <code>/etc/hosts</code> alias for <code>host.docker.internal</code> was not sufficient. The switch
|
||||
helper now detects the NPM bridge gateway and uses that IP for native upstreams.
|
||||
</p>
|
||||
</article>
|
||||
<article class="status-card good">
|
||||
<p class="status-title">Firewall path</p>
|
||||
<p class="status-copy">
|
||||
The host UFW policy already allowed port <code>3000</code> but not <code>4000</code>. The live fix was a
|
||||
source-scoped allow for the NPM bridge subnet so the containerized edge could reach the native API.
|
||||
</p>
|
||||
</article>
|
||||
<article class="status-card warn">
|
||||
<p class="status-title">Cloudflare API hostname</p>
|
||||
<p class="status-copy">
|
||||
The API hostname failure was separate from the native cutover. The hostname is now a DNS-only
|
||||
<code>A</code> record pointing at the VPS, which restored public TLS and health responses.
|
||||
</p>
|
||||
</article>
|
||||
</div>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Area</th>
|
||||
<th>Implementation detail</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><strong>Native API</strong></td>
|
||||
<td>
|
||||
<code>services/api/src/index.ts</code> now accepts <code>API_HOST</code> and passes it to
|
||||
<code>Bun.serve</code>. The native unit sets <code>API_HOST=0.0.0.0</code> and
|
||||
<code>API_PORT=4000</code>.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Native web</strong></td>
|
||||
<td>
|
||||
The native web unit now starts from <code>apps/web</code> with
|
||||
<code>bun x next start -H "$WEB_HOST" -p "$WEB_PORT"</code>, avoiding the earlier repo-root startup
|
||||
failure and binding the service on <code>0.0.0.0:3000</code>.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>JetStream retention</strong></td>
|
||||
<td>
|
||||
Native startup exposed a retention-unit bug. The shared bus layer now converts stream max-age values with
|
||||
<code>nanos(...)</code> and formats them back with <code>millis(...)</code>.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Docker fallback</strong></td>
|
||||
<td>
|
||||
Docker Compose now uses <code>ISLANDFLOW_DATA_ROOT=/var/lib/islandflow</code>, publishes loopback
|
||||
ports, and keeps the fallback runtime compatible with the same durable data directories as the native
|
||||
services.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>NPM switch helper</strong></td>
|
||||
<td>
|
||||
The helper now updates both the NPM database and the generated
|
||||
<code>/data/nginx/proxy_host/*.conf</code> files, because a DB-only restart did not reliably rewrite the
|
||||
live configs for Islandflow.
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<pre><code>sudo ufw allow proto tcp from 172.18.0.0/16 to any port 4000 comment 'npm bridge to native api'</code></pre>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Expected Impact for End-Users</h2>
|
||||
<ul>
|
||||
<li>
|
||||
Public web and API traffic now reaches the native Islandflow services, which removes Docker from the primary
|
||||
live request path while keeping the outer edge unchanged.
|
||||
</li>
|
||||
<li>
|
||||
Same-origin public API routes such as <code>/prints</code>, <code>/history</code>, <code>/replay</code>,
|
||||
<code>/nbbo</code>, and <code>/ws/live</code> continue to resolve correctly through the main app hostname.
|
||||
</li>
|
||||
<li>
|
||||
Rollback remains fast and explicit: NPM can be pointed back at Docker service names and the Docker runtime
|
||||
can reclaim the same durable data directories if native operation needs to be abandoned.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Validation</h2>
|
||||
<div class="status-grid">
|
||||
<article class="status-card good">
|
||||
<div class="pill good">Static checks</div>
|
||||
<ul>
|
||||
<li><code>bun run check:docker-workspace</code></li>
|
||||
<li><code>docker compose -f deployment/docker/docker-compose.yml config --quiet</code></li>
|
||||
<li><code>docker compose -f /home/delta/nginx-proxy-manager/docker-compose.yml config --quiet</code></li>
|
||||
<li><code>bash -n deployment/native/*.sh</code></li>
|
||||
<li><code>systemd-analyze verify deployment/native/systemd/user/*.service deployment/native/systemd/system/*.service</code></li>
|
||||
<li><code>bun build services/api/src/index.ts --target=bun</code></li>
|
||||
<li><code>bun build scripts/deploy.ts --target=bun</code></li>
|
||||
</ul>
|
||||
</article>
|
||||
<article class="status-card good">
|
||||
<div class="pill good">Native runtime</div>
|
||||
<ul>
|
||||
<li><code>./deployment/native/check-native-health.sh full</code></li>
|
||||
<li><code>curl http://127.0.0.1:4000/health</code></li>
|
||||
<li><code>curl -I http://127.0.0.1:3000/</code></li>
|
||||
</ul>
|
||||
</article>
|
||||
<article class="status-card good">
|
||||
<div class="pill good">Public edge</div>
|
||||
<ul>
|
||||
<li><code>curl -I -fksS https://flow.deltaisland.io</code></li>
|
||||
<li><code>curl -fksS https://api.flow.deltaisland.io/health</code></li>
|
||||
<li><code>bun run scripts/check-public-api-routes.ts https://flow.deltaisland.io</code></li>
|
||||
</ul>
|
||||
</article>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Issues, Limitations, and Mitigations</h2>
|
||||
<ul>
|
||||
<li>
|
||||
The native ingest-options service required an explicit synthetic-adapter override because the environment file
|
||||
still pointed at an Alpaca adapter that was returning <code>401</code> responses. The service now starts
|
||||
cleanly for native cutover, but production adapter selection remains an operational decision.
|
||||
</li>
|
||||
<li>
|
||||
The NPM helper still relies on direct config synchronization because NPM did not reliably regenerate the
|
||||
Islandflow proxy files from SQLite changes alone. This is mitigated by keeping the synchronization logic
|
||||
checked in and by reloading NPM as part of the helper itself.
|
||||
</li>
|
||||
<li>
|
||||
The final public API recovery currently leaves <code>api.flow.deltaisland.io</code> as a DNS-only hostname.
|
||||
That restored service, but it changes the edge posture relative to the web hostname and should be reviewed
|
||||
deliberately.
|
||||
</li>
|
||||
<li>
|
||||
A temporary Cloudflare API token was used to inspect and correct zone state during validation. That token
|
||||
should be rotated outside this repository workflow.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Follow-up Work</h2>
|
||||
<ul>
|
||||
<li>
|
||||
<code>islandflow-fl5</code>: decide whether <code>api.flow.deltaisland.io</code> should remain DNS-only or
|
||||
be re-proxied through Cloudflare, then re-validate TLS, websocket, and operational behavior for the chosen
|
||||
posture.
|
||||
</li>
|
||||
<li>
|
||||
After operational soak, decide whether native should become the default production runtime or remain a
|
||||
supported alternative with Docker as the preferred steady-state runtime.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -9,7 +9,9 @@ import {
|
|||
type StreamUpdateConfig,
|
||||
JSONCodec,
|
||||
type JsMsg,
|
||||
createInbox
|
||||
createInbox,
|
||||
nanos,
|
||||
millis
|
||||
} from "nats";
|
||||
import { getKnownStreamDefinitions, getStreamDefinition, type StreamRetentionClass } from "./streams";
|
||||
|
||||
|
|
@ -164,13 +166,13 @@ export const resolveStreamRetention = (
|
|||
): Pick<StreamConfig, "max_bytes" | "max_age"> => {
|
||||
if (streamClass === "raw") {
|
||||
return {
|
||||
max_age: parseBoundedNumber(env.STREAM_RAW_MAX_AGE_MS, 3_600_000),
|
||||
max_age: nanos(parseBoundedNumber(env.STREAM_RAW_MAX_AGE_MS, 3_600_000)),
|
||||
max_bytes: parseBoundedNumber(env.STREAM_RAW_MAX_BYTES, 536_870_912)
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
max_age: parseBoundedNumber(env.STREAM_DERIVED_MAX_AGE_MS, 43_200_000),
|
||||
max_age: nanos(parseBoundedNumber(env.STREAM_DERIVED_MAX_AGE_MS, 43_200_000)),
|
||||
max_bytes: parseBoundedNumber(env.STREAM_DERIVED_MAX_BYTES, 268_435_456)
|
||||
};
|
||||
};
|
||||
|
|
@ -417,7 +419,7 @@ const formatBytes = (value: number): string => {
|
|||
};
|
||||
|
||||
const formatRetentionSummary = (config: StreamConfig): string => {
|
||||
return `age=${formatDurationMs(Number(config.max_age))} bytes=${formatBytes(config.max_bytes)} replicas=${config.num_replicas} retention=${config.retention} discard=${config.discard}`;
|
||||
return `age=${formatDurationMs(millis(Number(config.max_age)))} bytes=${formatBytes(config.max_bytes)} replicas=${config.num_replicas} retention=${config.retention} discard=${config.discard}`;
|
||||
};
|
||||
|
||||
const formatReportLine = (
|
||||
|
|
@ -442,12 +444,12 @@ const formatReportLine = (
|
|||
const details = report.retentionDrift
|
||||
.map((delta) => {
|
||||
const desiredValue = delta.field === "max_age"
|
||||
? formatDurationMs(Number(delta.desired))
|
||||
? formatDurationMs(millis(Number(delta.desired)))
|
||||
: delta.field === "max_bytes"
|
||||
? formatBytes(Number(delta.desired))
|
||||
: formatStructuredValue(delta.desired);
|
||||
const currentValue = delta.field === "max_age"
|
||||
? formatDurationMs(Number(delta.current))
|
||||
? formatDurationMs(millis(Number(delta.current)))
|
||||
: delta.field === "max_bytes"
|
||||
? formatBytes(Number(delta.current))
|
||||
: formatStructuredValue(delta.current);
|
||||
|
|
|
|||
|
|
@ -920,6 +920,10 @@ function remoteNativeVerification(scope: DeployScope, fast: boolean): void {
|
|||
const units = nativeUnitsForScope(scope).map((value) => shellEscape(value)).join(" ");
|
||||
const checks: string[] = [];
|
||||
|
||||
if (scope === "full" || scope === "api" || scope === "services" || scope === "workers") {
|
||||
checks.push("./deployment/native/check-native-infra.sh");
|
||||
}
|
||||
|
||||
if (scopeIncludesApi(scope)) {
|
||||
checks.push('curl -fksS http://127.0.0.1:4000/health');
|
||||
}
|
||||
|
|
@ -954,10 +958,10 @@ function remoteVerification(runtime: DeployRuntime, scope: DeployScope, fast: bo
|
|||
|
||||
function publicVerification(scope: DeployScope, fast: boolean): void {
|
||||
section("Public Verification");
|
||||
if (!fast || scopeIncludesWeb(scope)) {
|
||||
if (scopeIncludesWeb(scope)) {
|
||||
runChecked("curl", ["-I", "-fksS", PUBLIC_APP_URL]);
|
||||
} else {
|
||||
console.log("[deploy] Fast mode: skipping public app HEAD check because web scope is not included.");
|
||||
console.log("[deploy] Skipping public app HEAD check because web scope is not included.");
|
||||
}
|
||||
|
||||
if (scopeIncludesApi(scope) && PUBLIC_API_HEALTH_URL) {
|
||||
|
|
|
|||
|
|
@ -138,6 +138,7 @@ const DeliverPolicySchema = z.enum(["new", "all", "last", "last_per_subject"]);
|
|||
|
||||
const envSchema = z.object({
|
||||
API_PORT: z.coerce.number().int().positive().default(4000),
|
||||
API_HOST: z.string().min(1).default("127.0.0.1"),
|
||||
NATS_URL: z.string().default("nats://127.0.0.1:4222"),
|
||||
CLICKHOUSE_URL: z.string().default("http://127.0.0.1:8123"),
|
||||
CLICKHOUSE_DATABASE: z.string().default("default"),
|
||||
|
|
@ -1313,6 +1314,7 @@ const run = async () => {
|
|||
};
|
||||
|
||||
const server = Bun.serve<WsData | LiveWsData>({
|
||||
hostname: env.API_HOST,
|
||||
port: env.API_PORT,
|
||||
fetch: async (req: Request, serverRef: any) => {
|
||||
const url = new URL(req.url);
|
||||
|
|
@ -1995,7 +1997,7 @@ const run = async () => {
|
|||
}
|
||||
});
|
||||
|
||||
logger.info("api listening", { port: server.port });
|
||||
logger.info("api listening", { host: env.API_HOST, port: server.port });
|
||||
|
||||
const shutdown = async (signal: string) => {
|
||||
if (state.shutdownPromise) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue