From a5dcb56f7d22ac68d03ec777a561492c8ecbc8b2 Mon Sep 17 00:00:00 2001 From: albert Date: Sun, 29 Mar 2026 21:58:02 -0400 Subject: [PATCH] Initial commit: FLM proxy server for AMD NPU --- .gitignore | 33 +++++++ CLAUDE.md | 88 +++++++++++++++++ flm-proxy.js | 160 ++++++++++++++++++++++++++++++ flm-service-install.js | 27 ++++++ flm-service-uninstall.js | 12 +++ flm-start.bat | 4 + flm-stop.bat | 3 + package-lock.json | 205 +++++++++++++++++++++++++++++++++++++++ package.json | 5 + 9 files changed, 537 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 flm-proxy.js create mode 100644 flm-service-install.js create mode 100644 flm-service-uninstall.js create mode 100644 flm-start.bat create mode 100644 flm-stop.bat create mode 100644 package-lock.json create mode 100644 package.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..845972f --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +node_modules/ +daemon/ +.env +FastFlowLM/ +miniforge3/ +ImageModerationService/ +VisionScannerService/ +Desktop/ +Documents/ +Downloads/ +Music/ +Pictures/ +Videos/ +Favorites/ +Links/ +AppData/ +"Application Data" +Cookies/ +"Local Settings" +"My Documents" +NetHood/ +PrintHood/ +Recent/ +"Saved Games" +SendTo/ +"Start Menu" +Templates/ +NTUSER* +ntuser* +*.dat +*.LOG* +*.blf +*.regtrans-ms diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..81fa3b6 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,88 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Overview + +This is a FastFlowLM proxy server setup that runs LLM models on an AMD NPU (Neural Processing Unit). The proxy auto-starts the model on first request and stops it after idle timeout to free RAM. + +## Architecture + +- **`flm-proxy.js`** — Node.js HTTP proxy (port 8000) that sits in front of FastFlowLM (port 8001). It lazily spawns `flm.exe`, polls until the model is ready, proxies all requests, and kills the process after 5 minutes of inactivity. Exposes `/status` and `/stop` control endpoints. +- **`FastFlowLM/flm.exe`** — Pre-built binary that serves OpenAI-compatible API (`/v1/models`, `/v1/chat/completions`, etc.) using NPU-accelerated models. Not source code — do not modify. +- **`flm-service-install.js` / `flm-service-uninstall.js`** — Install/uninstall the proxy as a Windows service via `node-windows`. +- **`daemon/`** — Windows service wrapper files generated by `node-windows` (exe, logs, config). +- **`flm-start.bat` / `flm-stop.bat`** — Simple batch scripts to run FLM directly (bypassing the proxy). + +## Commands + +```bash +# Run the proxy (foreground) +node flm-proxy.js + +# Install as Windows service +node flm-service-install.js + +# Uninstall Windows service +node flm-service-uninstall.js + +# Install dependencies +npm install + +# Check service logs +cat ~/daemon/flmvisionproxy.out.log +cat ~/daemon/flmvisionproxy.err.log +``` + +## Key Configuration (in flm-proxy.js) + +- `MODEL` — currently `qwen2.5vl-it:3b` (Qwen2.5 Vision-Language 3B) +- `PROXY_PORT` — 8000 (external-facing) +- `FLM_PORT` — 8001 (internal FLM server) +- `IDLE_TIMEOUT_MS` — 5 minutes +- `HOST` — `0.0.0.0` (listens on all interfaces) + +## Available Models + +See `FastFlowLM/model_list.json` for the full catalog. Model identifiers use the format `family:size` (e.g., `qwen3:4b`, `llama3.2:3b`). Vision models have `"vlm": true`. Thinking models have `"think": true`. + +## Services + +All services are TypeScript/Express apps with the same build pattern: + +```bash +cd +npm install # install deps +npm run build # tsc → dist/ +npm start # node dist/server.js +npm run dev # tsx watch (hot-reload) + +# Windows service management +node service-install.js +node service-uninstall.js +``` + +### ImageModerationService (port 8100) + +Checks uploaded images for NSFW/explicit content using the local vision LLM. When an image is flagged unsafe, fires callbacks to the upload service (to replace the image) and to Parochia (to flag the user). + +- **Endpoints:** `POST /moderate` (multipart: `file`, `context`, `imagePath`, `userId`, `siteId`), `GET /health` +- **Vision model:** `gemma3:4b` via FLM proxy at `localhost:8000` +- **Callbacks:** Configurable in `.env` — upload service replace URL + Parochia moderation callback +- **Source:** `src/moderate.ts` (moderation logic), `src/server.ts` (Express app) + +### VisionScannerService (port 8002) + +Scans shelf/pantry photos to extract product information and prices using the vision LLM. Uses ChromaDB for embeddings storage and Ollama for embedding generation. Supports image tiling for high-res photos. + +- **Endpoints:** `POST /scan/shelf` (multipart: `image`, `store_name`), `POST /scan/pantry` (multipart: `image`), `GET /health` +- **Vision model:** `qwen2.5vl-it:3b` via FLM proxy at `localhost:8000` +- **External deps:** Ollama (`192.168.0.15:11434`, `nomic-embed-text`), ChromaDB (`192.168.0.15:8000`), optional Gemini API +- **Source:** `src/vision.ts` (LLM calls), `src/tiling.ts` (image tiling), `src/shelf.ts` / `src/pantry.ts` (scan logic), `src/embeddings.ts` + `src/chroma.ts` (vector storage), `src/matching.ts` (product matching), `src/parsing.ts` (response parsing), `src/gemini.ts` (Gemini fallback), `src/config.ts` + +## Environment + +- Windows 11, AMD NPU hardware +- Node.js with `node-windows` dependency +- FLM binary path: `C:\Users\sshuser\FastFlowLM\flm.exe` +- All paths are hardcoded to `C:\Users\sshuser\` diff --git a/flm-proxy.js b/flm-proxy.js new file mode 100644 index 0000000..26eb28b --- /dev/null +++ b/flm-proxy.js @@ -0,0 +1,160 @@ +const http = require("http"); +const { spawn, execSync } = require("child_process"); + +const FLM_PATH = "C:\\Users\\sshuser\\FastFlowLM\\flm.exe"; +const MODEL = "qwen2.5vl-it:3b"; +const HOST = "0.0.0.0"; +const PROXY_PORT = 8000; +const FLM_PORT = 8001; +const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes + +let flmProcess = null; +let idleTimer = null; +let starting = false; +let ready = false; + +function log(msg) { + console.log(`[${new Date().toLocaleTimeString()}] ${msg}`); +} + +function resetIdleTimer() { + if (idleTimer) clearTimeout(idleTimer); + idleTimer = setTimeout(() => { + log("Idle timeout reached. Stopping model..."); + stopFlm(); + }, IDLE_TIMEOUT_MS); +} + +function stopFlm() { + ready = false; + starting = false; + if (idleTimer) clearTimeout(idleTimer); + if (flmProcess) { + try { execSync('taskkill /IM flm.exe /F', { stdio: 'ignore' }); } catch {} + flmProcess = null; + log("Model stopped. RAM freed."); + } +} + +function startFlm() { + return new Promise((resolve, reject) => { + if (ready) return resolve(); + if (starting) { + const wait = setInterval(() => { + if (ready) { clearInterval(wait); resolve(); } + }, 500); + return; + } + + starting = true; + log("Starting model on NPU..."); + + flmProcess = spawn(FLM_PATH, [ + "serve", MODEL, + "--host", "127.0.0.1", + "--port", String(FLM_PORT), + "--pmode", "performance" + ], { stdio: ["pipe", "pipe", "pipe"] }); + + flmProcess.stderr.on("data", (d) => { + const s = d.toString(); + if (s.includes("ERROR")) log("FLM: " + s.trim()); + }); + + flmProcess.on("exit", (code) => { + log(`FLM exited (code ${code})`); + flmProcess = null; + ready = false; + starting = false; + }); + + // Poll until the server responds + const check = setInterval(() => { + const req = http.get(`http://127.0.0.1:${FLM_PORT}/v1/models`, (res) => { + if (res.statusCode === 200) { + clearInterval(check); + ready = true; + starting = false; + log("Model ready!"); + resolve(); + } + }); + req.on("error", () => {}); + req.setTimeout(1000, () => req.destroy()); + }, 1000); + + // Timeout after 60s + setTimeout(() => { + if (!ready) { + clearInterval(check); + reject(new Error("Model failed to start within 60s")); + } + }, 60000); + }); +} + +function proxy(clientReq, clientRes) { + const options = { + hostname: "127.0.0.1", + port: FLM_PORT, + path: clientReq.url, + method: clientReq.method, + headers: clientReq.headers + }; + + const proxyReq = http.request(options, (proxyRes) => { + clientRes.writeHead(proxyRes.statusCode, proxyRes.headers); + proxyRes.pipe(clientRes); + }); + + proxyReq.on("error", (e) => { + clientRes.writeHead(502); + clientRes.end(JSON.stringify({ error: "Model backend error: " + e.message })); + }); + + clientReq.pipe(proxyReq); +} + +const server = http.createServer(async (req, res) => { + // CORS headers + res.setHeader("Access-Control-Allow-Origin", "*"); + res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); + res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization"); + if (req.method === "OPTIONS") { res.writeHead(204); res.end(); return; } + + // Status endpoint + if (req.url === "/status") { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ model: MODEL, ready, starting, pid: flmProcess?.pid || null })); + return; + } + + // Stop endpoint + if (req.url === "/stop") { + stopFlm(); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ status: "stopped" })); + return; + } + + try { + resetIdleTimer(); + if (!ready) { + log(`Request received. Waking up model...`); + await startFlm(); + } + proxy(req, res); + } catch (e) { + res.writeHead(503); + res.end(JSON.stringify({ error: e.message })); + } +}); + +server.listen(PROXY_PORT, HOST, () => { + log(`Proxy listening on ${HOST}:${PROXY_PORT}`); + log(`Model will auto-start on first request, auto-stop after ${IDLE_TIMEOUT_MS / 60000}m idle`); + log(`Endpoints: /status, /stop`); +}); + +process.on("SIGINT", () => { stopFlm(); process.exit(); }); +process.on("SIGTERM", () => { stopFlm(); process.exit(); }); diff --git a/flm-service-install.js b/flm-service-install.js new file mode 100644 index 0000000..ca23449 --- /dev/null +++ b/flm-service-install.js @@ -0,0 +1,27 @@ +const Service = require("node-windows").Service; + +const svc = new Service({ + name: "FLM Vision Proxy", + description: "Auto-start/stop proxy for FastFlowLM vision model on NPU", + script: "C:\\Users\\sshuser\\flm-proxy.js", + nodeOptions: [], + env: [{ + name: "PATH", + value: process.env.PATH + }] +}); + +svc.on("install", () => { + console.log("Service installed. Starting..."); + svc.start(); +}); + +svc.on("start", () => { + console.log("Service started!"); +}); + +svc.on("error", (err) => { + console.error("Error:", err); +}); + +svc.install(); diff --git a/flm-service-uninstall.js b/flm-service-uninstall.js new file mode 100644 index 0000000..781bb15 --- /dev/null +++ b/flm-service-uninstall.js @@ -0,0 +1,12 @@ +const Service = require("node-windows").Service; + +const svc = new Service({ + name: "FLM Vision Proxy", + script: "C:\\Users\\sshuser\\flm-proxy.js" +}); + +svc.on("uninstall", () => { + console.log("Service uninstalled."); +}); + +svc.uninstall(); diff --git a/flm-start.bat b/flm-start.bat new file mode 100644 index 0000000..a0f907d --- /dev/null +++ b/flm-start.bat @@ -0,0 +1,4 @@ +@echo off +echo Starting Vision AI on port 8000... +start /B "" "C:\Users\sshuser\FastFlowLM\flm.exe" serve qwen2.5vl-it:3b --host 0.0.0.0 --port 8000 --pmode performance +echo Vision AI started. Access at http://192.168.0.208:8000 diff --git a/flm-stop.bat b/flm-stop.bat new file mode 100644 index 0000000..1b34639 --- /dev/null +++ b/flm-stop.bat @@ -0,0 +1,3 @@ +@echo off +taskkill /IM flm.exe /F >nul 2>&1 +echo AI stopped. diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..cadff55 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,205 @@ +{ + "name": "sshuser", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "node-windows": "^1.0.0-beta.8" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "license": "MIT" + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/node-windows": { + "version": "1.0.0-beta.8", + "resolved": "https://registry.npmjs.org/node-windows/-/node-windows-1.0.0-beta.8.tgz", + "integrity": "sha512-uLekXnSeem3nW5escID224Fd0U/1VtvE796JpSpOY+c73Cslz/Qn2WUHRJyPQJEMrNGAy/FMRFjjhh4z1alZTA==", + "license": "MIT", + "dependencies": { + "xml": "1.0.1", + "yargs": "^17.5.1" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/xml": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/xml/-/xml-1.0.1.tgz", + "integrity": "sha512-huCv9IH9Tcf95zuYCsQraZtWnJvBtLVE0QHMOs8bWyZAFZNDcYjsPq1nEx8jKA9y+Beo9v+7OBPRisQTjinQMw==", + "license": "MIT" + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "license": "MIT", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "license": "ISC", + "engines": { + "node": ">=12" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..e917195 --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "dependencies": { + "node-windows": "^1.0.0-beta.8" + } +}