Files
flm-proxy/flm-proxy.js

161 lines
4.2 KiB
JavaScript
Raw Normal View History

const http = require("http");
const { spawn, execSync } = require("child_process");
const FLM_PATH = "C:\\Users\\sshuser\\FastFlowLM\\flm.exe";
const MODEL = "qwen2.5vl-it:3b";
const HOST = "0.0.0.0";
const PROXY_PORT = 8000;
const FLM_PORT = 8001;
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
let flmProcess = null;
let idleTimer = null;
let starting = false;
let ready = false;
function log(msg) {
console.log(`[${new Date().toLocaleTimeString()}] ${msg}`);
}
function resetIdleTimer() {
if (idleTimer) clearTimeout(idleTimer);
idleTimer = setTimeout(() => {
log("Idle timeout reached. Stopping model...");
stopFlm();
}, IDLE_TIMEOUT_MS);
}
function stopFlm() {
ready = false;
starting = false;
if (idleTimer) clearTimeout(idleTimer);
if (flmProcess) {
try { execSync('taskkill /IM flm.exe /F', { stdio: 'ignore' }); } catch {}
flmProcess = null;
log("Model stopped. RAM freed.");
}
}
function startFlm() {
return new Promise((resolve, reject) => {
if (ready) return resolve();
if (starting) {
const wait = setInterval(() => {
if (ready) { clearInterval(wait); resolve(); }
}, 500);
return;
}
starting = true;
log("Starting model on NPU...");
flmProcess = spawn(FLM_PATH, [
"serve", MODEL,
"--host", "127.0.0.1",
"--port", String(FLM_PORT),
"--pmode", "performance"
], { stdio: ["pipe", "pipe", "pipe"] });
flmProcess.stderr.on("data", (d) => {
const s = d.toString();
if (s.includes("ERROR")) log("FLM: " + s.trim());
});
flmProcess.on("exit", (code) => {
log(`FLM exited (code ${code})`);
flmProcess = null;
ready = false;
starting = false;
});
// Poll until the server responds
const check = setInterval(() => {
const req = http.get(`http://127.0.0.1:${FLM_PORT}/v1/models`, (res) => {
if (res.statusCode === 200) {
clearInterval(check);
ready = true;
starting = false;
log("Model ready!");
resolve();
}
});
req.on("error", () => {});
req.setTimeout(1000, () => req.destroy());
}, 1000);
// Timeout after 60s
setTimeout(() => {
if (!ready) {
clearInterval(check);
reject(new Error("Model failed to start within 60s"));
}
}, 60000);
});
}
function proxy(clientReq, clientRes) {
const options = {
hostname: "127.0.0.1",
port: FLM_PORT,
path: clientReq.url,
method: clientReq.method,
headers: clientReq.headers
};
const proxyReq = http.request(options, (proxyRes) => {
clientRes.writeHead(proxyRes.statusCode, proxyRes.headers);
proxyRes.pipe(clientRes);
});
proxyReq.on("error", (e) => {
clientRes.writeHead(502);
clientRes.end(JSON.stringify({ error: "Model backend error: " + e.message }));
});
clientReq.pipe(proxyReq);
}
const server = http.createServer(async (req, res) => {
// CORS headers
res.setHeader("Access-Control-Allow-Origin", "*");
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
if (req.method === "OPTIONS") { res.writeHead(204); res.end(); return; }
// Status endpoint
if (req.url === "/status") {
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ model: MODEL, ready, starting, pid: flmProcess?.pid || null }));
return;
}
// Stop endpoint
if (req.url === "/stop") {
stopFlm();
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ status: "stopped" }));
return;
}
try {
resetIdleTimer();
if (!ready) {
log(`Request received. Waking up model...`);
await startFlm();
}
proxy(req, res);
} catch (e) {
res.writeHead(503);
res.end(JSON.stringify({ error: e.message }));
}
});
server.listen(PROXY_PORT, HOST, () => {
log(`Proxy listening on ${HOST}:${PROXY_PORT}`);
log(`Model will auto-start on first request, auto-stop after ${IDLE_TIMEOUT_MS / 60000}m idle`);
log(`Endpoints: /status, /stop`);
});
process.on("SIGINT", () => { stopFlm(); process.exit(); });
process.on("SIGTERM", () => { stopFlm(); process.exit(); });