Files

134 lines
3.3 KiB
TypeScript

import { GoogleGenAI } from "@google/genai";
export type SiteMenteMessage = {
role: "user" | "assistant" | "system";
content: string;
};
export type SiteMenteSpeechResult = {
audioBytes: Uint8Array;
mimeType: string;
};
const TEXT_MODEL = "gemini-2.5-flash";
const SPEECH_MODEL = "gemini-2.5-flash-preview-tts";
const getClient = () => {
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
throw new Error("GEMINI_API_KEY is not set.");
}
return new GoogleGenAI({ apiKey });
};
const extractText = (response: unknown): string => {
const typed = response as {
candidates?: Array<{
content?: { parts?: Array<{ text?: string }> };
}>;
};
const text =
typed?.candidates?.[0]?.content?.parts
?.map((part) => part.text ?? "")
.join("")
.trim() ?? "";
if (!text) {
throw new Error("Gemini returned an empty response.");
}
return text;
};
const extractAudio = (response: unknown): SiteMenteSpeechResult => {
const typed = response as {
candidates?: Array<{
content?: {
parts?: Array<{
inlineData?: { data?: string; mimeType?: string };
}>;
};
}>;
};
const inlineData =
typed?.candidates?.[0]?.content?.parts?.find((part) => part.inlineData)
?.inlineData ?? null;
if (!inlineData?.data || !inlineData?.mimeType) {
throw new Error("Gemini did not return audio data.");
}
const audioBytes = Uint8Array.from(
Buffer.from(inlineData.data, "base64")
);
return { audioBytes, mimeType: inlineData.mimeType };
};
const buildContents = (messages: SiteMenteMessage[]) => {
return messages
.filter((message) => message.role !== "system")
.map((message) => ({
role: message.role === "assistant" ? "model" : "user",
parts: [{ text: message.content }],
}));
};
const buildSystemInstruction = (messages: SiteMenteMessage[]) => {
const systemText = messages
.filter((message) => message.role === "system")
.map((message) => message.content)
.join("\n")
.trim();
if (!systemText) {
return undefined;
}
return { parts: [{ text: systemText }] };
};
export const generateSiteMenteText = async (
messages: SiteMenteMessage[]
): Promise<string> => {
try {
const client = getClient();
const params = {
model: TEXT_MODEL,
contents: buildContents(messages),
} as any;
params.systemInstruction = buildSystemInstruction(messages);
const response = await client.models.generateContent(params);
return extractText(response);
} catch (error) {
console.error("[SiteMente][Gemini] Text generation failed", error);
throw error;
}
};
export const generateSiteMenteSpeech = async (
text: string
): Promise<SiteMenteSpeechResult> => {
try {
const client = getClient();
const response = await client.models.generateContent({
model: SPEECH_MODEL,
contents: [{ role: "user", parts: [{ text }] }],
config: {
responseModalities: ["AUDIO"],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: {
voiceName: "Kore",
},
},
},
},
});
return extractAudio(response);
} catch (error) {
console.error("[SiteMente][Gemini] Speech generation failed", error);
throw error;
}
};