Initial commit: FLM proxy server for AMD NPU
This commit is contained in:
160
flm-proxy.js
Normal file
160
flm-proxy.js
Normal file
@@ -0,0 +1,160 @@
|
||||
const http = require("http");
|
||||
const { spawn, execSync } = require("child_process");
|
||||
|
||||
const FLM_PATH = "C:\\Users\\sshuser\\FastFlowLM\\flm.exe";
|
||||
const MODEL = "qwen2.5vl-it:3b";
|
||||
const HOST = "0.0.0.0";
|
||||
const PROXY_PORT = 8000;
|
||||
const FLM_PORT = 8001;
|
||||
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
let flmProcess = null;
|
||||
let idleTimer = null;
|
||||
let starting = false;
|
||||
let ready = false;
|
||||
|
||||
function log(msg) {
|
||||
console.log(`[${new Date().toLocaleTimeString()}] ${msg}`);
|
||||
}
|
||||
|
||||
function resetIdleTimer() {
|
||||
if (idleTimer) clearTimeout(idleTimer);
|
||||
idleTimer = setTimeout(() => {
|
||||
log("Idle timeout reached. Stopping model...");
|
||||
stopFlm();
|
||||
}, IDLE_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
function stopFlm() {
|
||||
ready = false;
|
||||
starting = false;
|
||||
if (idleTimer) clearTimeout(idleTimer);
|
||||
if (flmProcess) {
|
||||
try { execSync('taskkill /IM flm.exe /F', { stdio: 'ignore' }); } catch {}
|
||||
flmProcess = null;
|
||||
log("Model stopped. RAM freed.");
|
||||
}
|
||||
}
|
||||
|
||||
function startFlm() {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (ready) return resolve();
|
||||
if (starting) {
|
||||
const wait = setInterval(() => {
|
||||
if (ready) { clearInterval(wait); resolve(); }
|
||||
}, 500);
|
||||
return;
|
||||
}
|
||||
|
||||
starting = true;
|
||||
log("Starting model on NPU...");
|
||||
|
||||
flmProcess = spawn(FLM_PATH, [
|
||||
"serve", MODEL,
|
||||
"--host", "127.0.0.1",
|
||||
"--port", String(FLM_PORT),
|
||||
"--pmode", "performance"
|
||||
], { stdio: ["pipe", "pipe", "pipe"] });
|
||||
|
||||
flmProcess.stderr.on("data", (d) => {
|
||||
const s = d.toString();
|
||||
if (s.includes("ERROR")) log("FLM: " + s.trim());
|
||||
});
|
||||
|
||||
flmProcess.on("exit", (code) => {
|
||||
log(`FLM exited (code ${code})`);
|
||||
flmProcess = null;
|
||||
ready = false;
|
||||
starting = false;
|
||||
});
|
||||
|
||||
// Poll until the server responds
|
||||
const check = setInterval(() => {
|
||||
const req = http.get(`http://127.0.0.1:${FLM_PORT}/v1/models`, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
clearInterval(check);
|
||||
ready = true;
|
||||
starting = false;
|
||||
log("Model ready!");
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
req.on("error", () => {});
|
||||
req.setTimeout(1000, () => req.destroy());
|
||||
}, 1000);
|
||||
|
||||
// Timeout after 60s
|
||||
setTimeout(() => {
|
||||
if (!ready) {
|
||||
clearInterval(check);
|
||||
reject(new Error("Model failed to start within 60s"));
|
||||
}
|
||||
}, 60000);
|
||||
});
|
||||
}
|
||||
|
||||
function proxy(clientReq, clientRes) {
|
||||
const options = {
|
||||
hostname: "127.0.0.1",
|
||||
port: FLM_PORT,
|
||||
path: clientReq.url,
|
||||
method: clientReq.method,
|
||||
headers: clientReq.headers
|
||||
};
|
||||
|
||||
const proxyReq = http.request(options, (proxyRes) => {
|
||||
clientRes.writeHead(proxyRes.statusCode, proxyRes.headers);
|
||||
proxyRes.pipe(clientRes);
|
||||
});
|
||||
|
||||
proxyReq.on("error", (e) => {
|
||||
clientRes.writeHead(502);
|
||||
clientRes.end(JSON.stringify({ error: "Model backend error: " + e.message }));
|
||||
});
|
||||
|
||||
clientReq.pipe(proxyReq);
|
||||
}
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
// CORS headers
|
||||
res.setHeader("Access-Control-Allow-Origin", "*");
|
||||
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
||||
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
|
||||
if (req.method === "OPTIONS") { res.writeHead(204); res.end(); return; }
|
||||
|
||||
// Status endpoint
|
||||
if (req.url === "/status") {
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ model: MODEL, ready, starting, pid: flmProcess?.pid || null }));
|
||||
return;
|
||||
}
|
||||
|
||||
// Stop endpoint
|
||||
if (req.url === "/stop") {
|
||||
stopFlm();
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ status: "stopped" }));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
resetIdleTimer();
|
||||
if (!ready) {
|
||||
log(`Request received. Waking up model...`);
|
||||
await startFlm();
|
||||
}
|
||||
proxy(req, res);
|
||||
} catch (e) {
|
||||
res.writeHead(503);
|
||||
res.end(JSON.stringify({ error: e.message }));
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(PROXY_PORT, HOST, () => {
|
||||
log(`Proxy listening on ${HOST}:${PROXY_PORT}`);
|
||||
log(`Model will auto-start on first request, auto-stop after ${IDLE_TIMEOUT_MS / 60000}m idle`);
|
||||
log(`Endpoints: /status, /stop`);
|
||||
});
|
||||
|
||||
process.on("SIGINT", () => { stopFlm(); process.exit(); });
|
||||
process.on("SIGTERM", () => { stopFlm(); process.exit(); });
|
||||
Reference in New Issue
Block a user