134 lines
3.3 KiB
TypeScript
134 lines
3.3 KiB
TypeScript
import { GoogleGenAI } from "@google/genai";
|
|
|
|
export type SiteMenteMessage = {
|
|
role: "user" | "assistant" | "system";
|
|
content: string;
|
|
};
|
|
|
|
export type SiteMenteSpeechResult = {
|
|
audioBytes: Uint8Array;
|
|
mimeType: string;
|
|
};
|
|
|
|
const TEXT_MODEL = "gemini-2.5-flash";
|
|
const SPEECH_MODEL = "gemini-2.5-flash-preview-tts";
|
|
|
|
const getClient = () => {
|
|
const apiKey = process.env.GEMINI_API_KEY;
|
|
if (!apiKey) {
|
|
throw new Error("GEMINI_API_KEY is not set.");
|
|
}
|
|
return new GoogleGenAI({ apiKey });
|
|
};
|
|
|
|
const extractText = (response: unknown): string => {
|
|
const typed = response as {
|
|
candidates?: Array<{
|
|
content?: { parts?: Array<{ text?: string }> };
|
|
}>;
|
|
};
|
|
const text =
|
|
typed?.candidates?.[0]?.content?.parts
|
|
?.map((part) => part.text ?? "")
|
|
.join("")
|
|
.trim() ?? "";
|
|
|
|
if (!text) {
|
|
throw new Error("Gemini returned an empty response.");
|
|
}
|
|
|
|
return text;
|
|
};
|
|
|
|
const extractAudio = (response: unknown): SiteMenteSpeechResult => {
|
|
const typed = response as {
|
|
candidates?: Array<{
|
|
content?: {
|
|
parts?: Array<{
|
|
inlineData?: { data?: string; mimeType?: string };
|
|
}>;
|
|
};
|
|
}>;
|
|
};
|
|
const inlineData =
|
|
typed?.candidates?.[0]?.content?.parts?.find((part) => part.inlineData)
|
|
?.inlineData ?? null;
|
|
|
|
if (!inlineData?.data || !inlineData?.mimeType) {
|
|
throw new Error("Gemini did not return audio data.");
|
|
}
|
|
|
|
const audioBytes = Uint8Array.from(
|
|
Buffer.from(inlineData.data, "base64")
|
|
);
|
|
|
|
return { audioBytes, mimeType: inlineData.mimeType };
|
|
};
|
|
|
|
const buildContents = (messages: SiteMenteMessage[]) => {
|
|
return messages
|
|
.filter((message) => message.role !== "system")
|
|
.map((message) => ({
|
|
role: message.role === "assistant" ? "model" : "user",
|
|
parts: [{ text: message.content }],
|
|
}));
|
|
};
|
|
|
|
const buildSystemInstruction = (messages: SiteMenteMessage[]) => {
|
|
const systemText = messages
|
|
.filter((message) => message.role === "system")
|
|
.map((message) => message.content)
|
|
.join("\n")
|
|
.trim();
|
|
|
|
if (!systemText) {
|
|
return undefined;
|
|
}
|
|
|
|
return { parts: [{ text: systemText }] };
|
|
};
|
|
|
|
export const generateSiteMenteText = async (
|
|
messages: SiteMenteMessage[]
|
|
): Promise<string> => {
|
|
try {
|
|
const client = getClient();
|
|
const params = {
|
|
model: TEXT_MODEL,
|
|
contents: buildContents(messages),
|
|
} as any;
|
|
params.systemInstruction = buildSystemInstruction(messages);
|
|
const response = await client.models.generateContent(params);
|
|
return extractText(response);
|
|
} catch (error) {
|
|
console.error("[SiteMente][Gemini] Text generation failed", error);
|
|
throw error;
|
|
}
|
|
};
|
|
|
|
export const generateSiteMenteSpeech = async (
|
|
text: string
|
|
): Promise<SiteMenteSpeechResult> => {
|
|
try {
|
|
const client = getClient();
|
|
const response = await client.models.generateContent({
|
|
model: SPEECH_MODEL,
|
|
contents: [{ role: "user", parts: [{ text }] }],
|
|
config: {
|
|
responseModalities: ["AUDIO"],
|
|
speechConfig: {
|
|
voiceConfig: {
|
|
prebuiltVoiceConfig: {
|
|
voiceName: "Kore",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
});
|
|
return extractAudio(response);
|
|
} catch (error) {
|
|
console.error("[SiteMente][Gemini] Speech generation failed", error);
|
|
throw error;
|
|
}
|
|
};
|