import { GoogleGenAI } from "@google/genai"; export type SiteMenteMessage = { role: "user" | "assistant" | "system"; content: string; }; export type SiteMenteSpeechResult = { audioBytes: Uint8Array; mimeType: string; }; const TEXT_MODEL = "gemini-2.5-flash"; const SPEECH_MODEL = "gemini-2.5-flash-preview-tts"; const getClient = () => { const apiKey = process.env.GEMINI_API_KEY; if (!apiKey) { throw new Error("GEMINI_API_KEY is not set."); } return new GoogleGenAI({ apiKey }); }; const extractText = (response: unknown): string => { const typed = response as { candidates?: Array<{ content?: { parts?: Array<{ text?: string }> }; }>; }; const text = typed?.candidates?.[0]?.content?.parts ?.map((part) => part.text ?? "") .join("") .trim() ?? ""; if (!text) { throw new Error("Gemini returned an empty response."); } return text; }; const extractAudio = (response: unknown): SiteMenteSpeechResult => { const typed = response as { candidates?: Array<{ content?: { parts?: Array<{ inlineData?: { data?: string; mimeType?: string }; }>; }; }>; }; const inlineData = typed?.candidates?.[0]?.content?.parts?.find((part) => part.inlineData) ?.inlineData ?? null; if (!inlineData?.data || !inlineData?.mimeType) { throw new Error("Gemini did not return audio data."); } const audioBytes = Uint8Array.from( Buffer.from(inlineData.data, "base64") ); return { audioBytes, mimeType: inlineData.mimeType }; }; const buildContents = (messages: SiteMenteMessage[]) => { return messages .filter((message) => message.role !== "system") .map((message) => ({ role: message.role === "assistant" ? "model" : "user", parts: [{ text: message.content }], })); }; const buildSystemInstruction = (messages: SiteMenteMessage[]) => { const systemText = messages .filter((message) => message.role === "system") .map((message) => message.content) .join("\n") .trim(); if (!systemText) { return undefined; } return { parts: [{ text: systemText }] }; }; export const generateSiteMenteText = async ( messages: SiteMenteMessage[] ): Promise => { try { const client = getClient(); const params = { model: TEXT_MODEL, contents: buildContents(messages), } as any; params.systemInstruction = buildSystemInstruction(messages); const response = await client.models.generateContent(params); return extractText(response); } catch (error) { console.error("[SiteMente][Gemini] Text generation failed", error); throw error; } }; export const generateSiteMenteSpeech = async ( text: string ): Promise => { try { const client = getClient(); const response = await client.models.generateContent({ model: SPEECH_MODEL, contents: [{ role: "user", parts: [{ text }] }], config: { responseModalities: ["AUDIO"], speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: "Kore", }, }, }, }, }); return extractAudio(response); } catch (error) { console.error("[SiteMente][Gemini] Speech generation failed", error); throw error; } };