Initial commit: FLM proxy server for AMD NPU

2026-03-29 21:58:02 -04:00
commit a5dcb56f7d
9 changed files with 537 additions and 0 deletions
--- a/flm-proxy.js
+++ b/flm-proxy.js
@@ -0,0 +1,160 @@
+const http = require("http");
+const { spawn, execSync } = require("child_process");
+
+const FLM_PATH = "C:\\Users\\sshuser\\FastFlowLM\\flm.exe";
+const MODEL = "qwen2.5vl-it:3b";
+const HOST = "0.0.0.0";
+const PROXY_PORT = 8000;
+const FLM_PORT = 8001;
+const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
+
+let flmProcess = null;
+let idleTimer = null;
+let starting = false;
+let ready = false;
+
+function log(msg) {
+  console.log(`[${new Date().toLocaleTimeString()}] ${msg}`);
+}
+
+function resetIdleTimer() {
+  if (idleTimer) clearTimeout(idleTimer);
+  idleTimer = setTimeout(() => {
+    log("Idle timeout reached. Stopping model...");
+    stopFlm();
+  }, IDLE_TIMEOUT_MS);
+}
+
+function stopFlm() {
+  ready = false;
+  starting = false;
+  if (idleTimer) clearTimeout(idleTimer);
+  if (flmProcess) {
+    try { execSync('taskkill /IM flm.exe /F', { stdio: 'ignore' }); } catch {}
+    flmProcess = null;
+    log("Model stopped. RAM freed.");
+  }
+}
+
+function startFlm() {
+  return new Promise((resolve, reject) => {
+    if (ready) return resolve();
+    if (starting) {
+      const wait = setInterval(() => {
+        if (ready) { clearInterval(wait); resolve(); }
+      }, 500);
+      return;
+    }
+
+    starting = true;
+    log("Starting model on NPU...");
+
+    flmProcess = spawn(FLM_PATH, [
+      "serve", MODEL,
+      "--host", "127.0.0.1",
+      "--port", String(FLM_PORT),
+      "--pmode", "performance"
+    ], { stdio: ["pipe", "pipe", "pipe"] });
+
+    flmProcess.stderr.on("data", (d) => {
+      const s = d.toString();
+      if (s.includes("ERROR")) log("FLM: " + s.trim());
+    });
+
+    flmProcess.on("exit", (code) => {
+      log(`FLM exited (code ${code})`);
+      flmProcess = null;
+      ready = false;
+      starting = false;
+    });
+
+    // Poll until the server responds
+    const check = setInterval(() => {
+      const req = http.get(`http://127.0.0.1:${FLM_PORT}/v1/models`, (res) => {
+        if (res.statusCode === 200) {
+          clearInterval(check);
+          ready = true;
+          starting = false;
+          log("Model ready!");
+          resolve();
+        }
+      });
+      req.on("error", () => {});
+      req.setTimeout(1000, () => req.destroy());
+    }, 1000);
+
+    // Timeout after 60s
+    setTimeout(() => {
+      if (!ready) {
+        clearInterval(check);
+        reject(new Error("Model failed to start within 60s"));
+      }
+    }, 60000);
+  });
+}
+
+function proxy(clientReq, clientRes) {
+  const options = {
+    hostname: "127.0.0.1",
+    port: FLM_PORT,
+    path: clientReq.url,
+    method: clientReq.method,
+    headers: clientReq.headers
+  };
+
+  const proxyReq = http.request(options, (proxyRes) => {
+    clientRes.writeHead(proxyRes.statusCode, proxyRes.headers);
+    proxyRes.pipe(clientRes);
+  });
+
+  proxyReq.on("error", (e) => {
+    clientRes.writeHead(502);
+    clientRes.end(JSON.stringify({ error: "Model backend error: " + e.message }));
+  });
+
+  clientReq.pipe(proxyReq);
+}
+
+const server = http.createServer(async (req, res) => {
+  // CORS headers
+  res.setHeader("Access-Control-Allow-Origin", "*");
+  res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
+  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
+  if (req.method === "OPTIONS") { res.writeHead(204); res.end(); return; }
+
+  // Status endpoint
+  if (req.url === "/status") {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ model: MODEL, ready, starting, pid: flmProcess?.pid || null }));
+    return;
+  }
+
+  // Stop endpoint
+  if (req.url === "/stop") {
+    stopFlm();
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ status: "stopped" }));
+    return;
+  }
+
+  try {
+    resetIdleTimer();
+    if (!ready) {
+      log(`Request received. Waking up model...`);
+      await startFlm();
+    }
+    proxy(req, res);
+  } catch (e) {
+    res.writeHead(503);
+    res.end(JSON.stringify({ error: e.message }));
+  }
+});
+
+server.listen(PROXY_PORT, HOST, () => {
+  log(`Proxy listening on ${HOST}:${PROXY_PORT}`);
+  log(`Model will auto-start on first request, auto-stop after ${IDLE_TIMEOUT_MS / 60000}m idle`);
+  log(`Endpoints: /status, /stop`);
+});
+
+process.on("SIGINT", () => { stopFlm(); process.exit(); });
+process.on("SIGTERM", () => { stopFlm(); process.exit(); });