flm-proxy.js

const http = require("http");
const { spawn, execSync } = require("child_process");

const FLM_PATH = "C:\\Users\\sshuser\\FastFlowLM\\flm.exe";
const MODEL = "qwen2.5vl-it:3b";
const HOST = "0.0.0.0";
const PROXY_PORT = 8000;
const FLM_PORT = 8001;
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes

let flmProcess = null;
let idleTimer = null;
let starting = false;
let ready = false;

function log(msg) {
  console.log(`[${new Date().toLocaleTimeString()}] ${msg}`);
}

function resetIdleTimer() {
  if (idleTimer) clearTimeout(idleTimer);
  idleTimer = setTimeout(() => {
    log("Idle timeout reached. Stopping model...");
    stopFlm();
  }, IDLE_TIMEOUT_MS);
}

function stopFlm() {
  ready = false;
  starting = false;
  if (idleTimer) clearTimeout(idleTimer);
  if (flmProcess) {
    try { execSync('taskkill /IM flm.exe /F', { stdio: 'ignore' }); } catch {}
    flmProcess = null;
    log("Model stopped. RAM freed.");
  }
}

function startFlm() {
  return new Promise((resolve, reject) => {
    if (ready) return resolve();
    if (starting) {
      const wait = setInterval(() => {
        if (ready) { clearInterval(wait); resolve(); }
      }, 500);
      return;
    }

    starting = true;
    log("Starting model on NPU...");

    flmProcess = spawn(FLM_PATH, [
      "serve", MODEL,
      "--host", "127.0.0.1",
      "--port", String(FLM_PORT),
      "--pmode", "performance"
    ], { stdio: ["pipe", "pipe", "pipe"] });

    flmProcess.stderr.on("data", (d) => {
      const s = d.toString();
      if (s.includes("ERROR")) log("FLM: " + s.trim());
    });

    flmProcess.on("exit", (code) => {
      log(`FLM exited (code ${code})`);
      flmProcess = null;
      ready = false;
      starting = false;
    });

    // Poll until the server responds
    const check = setInterval(() => {
      const req = http.get(`http://127.0.0.1:${FLM_PORT}/v1/models`, (res) => {
        if (res.statusCode === 200) {
          clearInterval(check);
          ready = true;
          starting = false;
          log("Model ready!");
          resolve();
        }
      });
      req.on("error", () => {});
      req.setTimeout(1000, () => req.destroy());
    }, 1000);

    // Timeout after 60s
    setTimeout(() => {
      if (!ready) {
        clearInterval(check);
        reject(new Error("Model failed to start within 60s"));
      }
    }, 60000);
  });
}

function proxy(clientReq, clientRes) {
  const options = {
    hostname: "127.0.0.1",
    port: FLM_PORT,
    path: clientReq.url,
    method: clientReq.method,
    headers: clientReq.headers
  };

  const proxyReq = http.request(options, (proxyRes) => {
    clientRes.writeHead(proxyRes.statusCode, proxyRes.headers);
    proxyRes.pipe(clientRes);
  });

  proxyReq.on("error", (e) => {
    clientRes.writeHead(502);
    clientRes.end(JSON.stringify({ error: "Model backend error: " + e.message }));
  });

  clientReq.pipe(proxyReq);
}

const server = http.createServer(async (req, res) => {
  // CORS headers
  res.setHeader("Access-Control-Allow-Origin", "*");
  res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
  if (req.method === "OPTIONS") { res.writeHead(204); res.end(); return; }

  // Status endpoint
  if (req.url === "/status") {
    res.writeHead(200, { "Content-Type": "application/json" });
    res.end(JSON.stringify({ model: MODEL, ready, starting, pid: flmProcess?.pid || null }));
    return;
  }

  // Stop endpoint
  if (req.url === "/stop") {
    stopFlm();
    res.writeHead(200, { "Content-Type": "application/json" });
    res.end(JSON.stringify({ status: "stopped" }));
    return;
  }

  try {
    resetIdleTimer();
    if (!ready) {
      log(`Request received. Waking up model...`);
      await startFlm();
    }
    proxy(req, res);
  } catch (e) {
    res.writeHead(503);
    res.end(JSON.stringify({ error: e.message }));
  }
});

server.listen(PROXY_PORT, HOST, () => {
  log(`Proxy listening on ${HOST}:${PROXY_PORT}`);
  log(`Model will auto-start on first request, auto-stop after ${IDLE_TIMEOUT_MS / 60000}m idle`);
  log(`Endpoints: /status, /stop`);
});

process.on("SIGINT", () => { stopFlm(); process.exit(); });
process.on("SIGTERM", () => { stopFlm(); process.exit(); });
Initial commit: FLM proxy server for AMD NPU 2026-03-29 21:58:02 -04:00			`const http = require("http");`
			`const { spawn, execSync } = require("child_process");`

			`const FLM_PATH = "C:\\Users\\sshuser\\FastFlowLM\\flm.exe";`
			`const MODEL = "qwen2.5vl-it:3b";`
			`const HOST = "0.0.0.0";`
			`const PROXY_PORT = 8000;`
			`const FLM_PORT = 8001;`
			`const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes`

			`let flmProcess = null;`
			`let idleTimer = null;`
			`let starting = false;`
			`let ready = false;`

			`function log(msg) {`
			console.log(`[${new Date().toLocaleTimeString()}] ${msg}`);
			`}`

			`function resetIdleTimer() {`
			`if (idleTimer) clearTimeout(idleTimer);`
			`idleTimer = setTimeout(() => {`
			`log("Idle timeout reached. Stopping model...");`
			`stopFlm();`
			`}, IDLE_TIMEOUT_MS);`
			`}`

			`function stopFlm() {`
			`ready = false;`
			`starting = false;`
			`if (idleTimer) clearTimeout(idleTimer);`
			`if (flmProcess) {`
			`try { execSync('taskkill /IM flm.exe /F', { stdio: 'ignore' }); } catch {}`
			`flmProcess = null;`
			`log("Model stopped. RAM freed.");`
			`}`
			`}`

			`function startFlm() {`
			`return new Promise((resolve, reject) => {`
			`if (ready) return resolve();`
			`if (starting) {`
			`const wait = setInterval(() => {`
			`if (ready) { clearInterval(wait); resolve(); }`
			`}, 500);`
			`return;`
			`}`

			`starting = true;`
			`log("Starting model on NPU...");`

			`flmProcess = spawn(FLM_PATH, [`
			`"serve", MODEL,`
			`"--host", "127.0.0.1",`
			`"--port", String(FLM_PORT),`
			`"--pmode", "performance"`
			`], { stdio: ["pipe", "pipe", "pipe"] });`

			`flmProcess.stderr.on("data", (d) => {`
			`const s = d.toString();`
			`if (s.includes("ERROR")) log("FLM: " + s.trim());`
			`});`

			`flmProcess.on("exit", (code) => {`
			log(`FLM exited (code ${code})`);
			`flmProcess = null;`
			`ready = false;`
			`starting = false;`
			`});`

			`// Poll until the server responds`
			`const check = setInterval(() => {`
			const req = http.get(`http://127.0.0.1:${FLM_PORT}/v1/models`, (res) => {
			`if (res.statusCode === 200) {`
			`clearInterval(check);`
			`ready = true;`
			`starting = false;`
			`log("Model ready!");`
			`resolve();`
			`}`
			`});`
			`req.on("error", () => {});`
			`req.setTimeout(1000, () => req.destroy());`
			`}, 1000);`

			`// Timeout after 60s`
			`setTimeout(() => {`
			`if (!ready) {`
			`clearInterval(check);`
			`reject(new Error("Model failed to start within 60s"));`
			`}`
			`}, 60000);`
			`});`
			`}`

			`function proxy(clientReq, clientRes) {`
			`const options = {`
			`hostname: "127.0.0.1",`
			`port: FLM_PORT,`
			`path: clientReq.url,`
			`method: clientReq.method,`
			`headers: clientReq.headers`
			`};`

			`const proxyReq = http.request(options, (proxyRes) => {`
			`clientRes.writeHead(proxyRes.statusCode, proxyRes.headers);`
			`proxyRes.pipe(clientRes);`
			`});`

			`proxyReq.on("error", (e) => {`
			`clientRes.writeHead(502);`
			`clientRes.end(JSON.stringify({ error: "Model backend error: " + e.message }));`
			`});`

			`clientReq.pipe(proxyReq);`
			`}`

			`const server = http.createServer(async (req, res) => {`
			`// CORS headers`
			`res.setHeader("Access-Control-Allow-Origin", "*");`
			`res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");`
			`res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");`
			`if (req.method === "OPTIONS") { res.writeHead(204); res.end(); return; }`

			`// Status endpoint`
			`if (req.url === "/status") {`
			`res.writeHead(200, { "Content-Type": "application/json" });`
			`res.end(JSON.stringify({ model: MODEL, ready, starting, pid: flmProcess?.pid \|\| null }));`
			`return;`
			`}`

			`// Stop endpoint`
			`if (req.url === "/stop") {`
			`stopFlm();`
			`res.writeHead(200, { "Content-Type": "application/json" });`
			`res.end(JSON.stringify({ status: "stopped" }));`
			`return;`
			`}`

			`try {`
			`resetIdleTimer();`
			`if (!ready) {`
			log(`Request received. Waking up model...`);
			`await startFlm();`
			`}`
			`proxy(req, res);`
			`} catch (e) {`
			`res.writeHead(503);`
			`res.end(JSON.stringify({ error: e.message }));`
			`}`
			`});`

			`server.listen(PROXY_PORT, HOST, () => {`
			log(`Proxy listening on ${HOST}:${PROXY_PORT}`);
			log(`Model will auto-start on first request, auto-stop after ${IDLE_TIMEOUT_MS / 60000}m idle`);
			log(`Endpoints: /status, /stop`);
			`});`

			`process.on("SIGINT", () => { stopFlm(); process.exit(); });`
			`process.on("SIGTERM", () => { stopFlm(); process.exit(); });`