-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.ts
More file actions
111 lines (95 loc) · 2.87 KB
/
server.ts
File metadata and controls
111 lines (95 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/**
* Custom dev server — wraps Next.js and handles /api/voice WebSocket upgrades
* directly on the HTTP server, which Next.js API routes can't do.
*
* Usage: tsx watch server.ts (via `pnpm dev`)
*/
import { writeFile } from "node:fs/promises";
import { createServer } from "node:http";
import { parse } from "node:url";
import next from "next";
import { WebSocketServer } from "ws";
import { createFrameAnalyzer } from "./src/lib/gemini-session";
const dev = process.env.NODE_ENV !== "production";
const hostname = "localhost";
const port = Number(process.env.PORT) || 3000;
const app = next({ dev, hostname, port });
const handle = app.getRequestHandler();
app.prepare().then(() => {
const server = createServer(async (req, res) => {
await handle(req, res, parse(req.url ?? "/", true));
});
const wss = new WebSocketServer({ noServer: true });
server.on("upgrade", (req, socket, head) => {
const { pathname } = parse(req.url ?? "/", true);
if (pathname === "/api/voice") {
wss.handleUpgrade(req, socket, head, (ws) => {
handleVoiceConnection(ws);
});
}
// Other upgrade requests (e.g. Next.js HMR) are left alone
});
server.listen(port, () => {
console.log(`> Ready on http://${hostname}:${port}`);
});
});
function handleVoiceConnection(ws: import("ws").WebSocket) {
console.log("[voice-api] new WebSocket connection");
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
console.error("[voice-api] GEMINI_API_KEY missing from env");
ws.close(1011, "GEMINI_API_KEY not configured");
return;
}
const analyzer = createFrameAnalyzer(apiKey);
let frameCount = 0;
let analyzing = false;
ws.on("message", async (data: Buffer | ArrayBuffer) => {
// Skip if still processing the previous frame
if (analyzing) {
return;
}
const buf = Buffer.isBuffer(data) ? data : Buffer.from(data);
const arrayBuffer = buf.buffer.slice(
buf.byteOffset,
buf.byteOffset + buf.byteLength
) as ArrayBuffer;
frameCount++;
if (frameCount === 1) {
writeFile("debug-frame.jpg", buf).then(() =>
console.log(
`[voice-api] wrote debug-frame.jpg (${buf.byteLength} bytes)`
)
);
}
if (frameCount <= 3 || frameCount % 10 === 0) {
console.log(
`[voice-api] analyzing frame #${frameCount} (${buf.byteLength} bytes)`
);
}
analyzing = true;
try {
const event = await analyzer.analyzeFrame(arrayBuffer);
if (ws.readyState === ws.OPEN) {
if (event) {
console.log("[dialogue]", JSON.stringify(event));
ws.send(JSON.stringify(event));
} else {
ws.send(JSON.stringify({ type: "no_dialog" }));
}
}
} catch (err) {
console.error("[voice-api] Gemini error:", err);
} finally {
analyzing = false;
}
});
ws.on("close", () => {
console.log(
`[voice-api] client disconnected after ${frameCount} frames`
);
});
ws.on("error", (err) => {
console.error("[voice-api] WebSocket error:", err);
});
}