fdc7a4223a
* fix(voice): enforce upload size limit before buffering (issue #7) The previous implementation called request.formData() and audio.arrayBuffer() before checking MAX_VOICE_UPLOAD_BYTES, meaning oversized uploads were fully buffered into memory before rejection — a DoS/OOM risk. Changes: - Check Content-Length header early and return 413 if it exceeds the limit, preventing any request body from being read into memory for oversized uploads - Export MAX_VOICE_UPLOAD_BYTES for use in tests - Switch from instanceof File to duck-typing (checking .arrayBuffer method) to avoid cross-realm failures in jsdom test environments - Return HTTP 413 Payload Too Large for oversized uploads (was 400 before) - Retain a secondary post-buffer size check to catch missing/spoofed Content-Length headers Tests added (tests/unit/voiceTranscribe.test.ts): - Content-Length exceeding limit → 413 before any buffering - Content-Length at exactly the limit → proceeds normally - No Content-Length header, small file → proceeds normally (200) - No Content-Length header, oversized body → 413 after buffering - Missing audio field → 400 - Empty audio file (0 bytes) → 400 - Malformed Content-Length header → falls through gracefully Fixes: issue #7 * fix(issue-7): account for multipart overhead in Content-Length early check The early Content-Length guard was comparing total multipart request size against MAX_VOICE_UPLOAD_BYTES, but multipart/form-data includes boundary and header overhead (~200-500 bytes). A valid file at exactly the 20 MB limit was being rejected with 413. Fix: add a 1 KB MULTIPART_OVERHEAD_ALLOWANCE to the early check threshold. The post-buffer check remains the authoritative limit and measures actual audio bytes. Updated tests to reflect the corrected early-check boundary. --------- Co-authored-by: Neo (subagent) <neo@openclaw.local> Co-authored-by: Neo <neo@openclaw.ai>
199 lines
7.9 KiB
TypeScript
199 lines
7.9 KiB
TypeScript
/**
|
|
* Tests for the voice transcription API route — focusing on the upload size
|
|
* limit that must be enforced BEFORE the request body is buffered into memory
|
|
* (issue #7 fix).
|
|
*/
|
|
|
|
import { describe, expect, it, vi, beforeEach } from "vitest";
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Module mocks — must be hoisted before the route import
|
|
// ---------------------------------------------------------------------------
|
|
|
|
vi.mock("@/lib/openclaw/voiceTranscription", () => ({
|
|
transcribeVoiceWithOpenClaw: vi.fn().mockResolvedValue({
|
|
transcript: "hello world",
|
|
provider: "openai",
|
|
model: "whisper-1",
|
|
decision: { outcome: "success" },
|
|
ignored: false,
|
|
}),
|
|
}));
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const { MAX_VOICE_UPLOAD_BYTES, POST } = await import(
|
|
"@/app/api/office/voice/transcribe/route"
|
|
);
|
|
|
|
/** Build a minimal multipart/form-data Request with an audio file blob. */
|
|
function buildAudioRequest(
|
|
fileSizeBytes: number,
|
|
options: { contentLengthOverride?: number | null } = {},
|
|
): Request {
|
|
const audioBlob = new Blob([new Uint8Array(fileSizeBytes)], { type: "audio/webm" });
|
|
const formData = new FormData();
|
|
formData.append("audio", audioBlob, "voice.webm");
|
|
|
|
// Build headers
|
|
const headers: Record<string, string> = {};
|
|
if (options.contentLengthOverride !== undefined && options.contentLengthOverride !== null) {
|
|
headers["content-length"] = String(options.contentLengthOverride);
|
|
}
|
|
|
|
return new Request("http://localhost/api/office/voice/transcribe", {
|
|
method: "POST",
|
|
body: formData,
|
|
headers,
|
|
});
|
|
}
|
|
|
|
/** Build a Request with no audio field in the form. */
|
|
function buildNoAudioRequest(): Request {
|
|
const formData = new FormData();
|
|
return new Request("http://localhost/api/office/voice/transcribe", {
|
|
method: "POST",
|
|
body: formData,
|
|
});
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Tests
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe("POST /api/office/voice/transcribe — size limit enforcement (issue #7)", () => {
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
});
|
|
|
|
// ── Content-Length early rejection ────────────────────────────────────────
|
|
|
|
// The early Content-Length check uses MAX_VOICE_UPLOAD_BYTES + 1024 as its
|
|
// threshold because multipart/form-data requests include boundary/header
|
|
// overhead on top of the raw audio bytes. A request at exactly
|
|
// MAX_VOICE_UPLOAD_BYTES + 1 could still contain a valid audio file — the
|
|
// post-buffer check (which measures actual bytes) is the authoritative limit.
|
|
// The early check only rejects requests that are obviously too large.
|
|
const MULTIPART_OVERHEAD_ALLOWANCE = 1024;
|
|
|
|
it("returns 413 immediately when Content-Length clearly exceeds the limit + overhead allowance", async () => {
|
|
const oversizeBytes = MAX_VOICE_UPLOAD_BYTES + MULTIPART_OVERHEAD_ALLOWANCE + 1;
|
|
const request = buildAudioRequest(1, {
|
|
// Lie about size — we want to confirm the header check fires even when
|
|
// the actual payload is small (verifying header-based early rejection).
|
|
contentLengthOverride: oversizeBytes,
|
|
});
|
|
|
|
const response = await POST(request);
|
|
|
|
expect(response.status).toBe(413);
|
|
const body = await response.json();
|
|
expect(body.error).toMatch(/exceeds/i);
|
|
});
|
|
|
|
it("does NOT reject early when Content-Length is MAX + 1 (within multipart overhead allowance)", async () => {
|
|
// MAX_VOICE_UPLOAD_BYTES + 1 is within the multipart overhead window —
|
|
// the actual audio file may still be within the limit. The early check
|
|
// should pass; the post-buffer check is the authoritative limit.
|
|
const request = buildAudioRequest(1, {
|
|
contentLengthOverride: MAX_VOICE_UPLOAD_BYTES + 1,
|
|
});
|
|
const response = await POST(request);
|
|
// Should NOT return 413 from the early header check (body is 1 byte, fine).
|
|
expect(response.status).not.toBe(413);
|
|
});
|
|
|
|
it("does NOT reject when Content-Length equals MAX_VOICE_UPLOAD_BYTES exactly", async () => {
|
|
// The actual body is tiny; we're testing the header path only here.
|
|
const request = buildAudioRequest(1, {
|
|
contentLengthOverride: MAX_VOICE_UPLOAD_BYTES,
|
|
});
|
|
const response = await POST(request);
|
|
// Should not be a 413 from the header check (actual body is 1 byte, fine).
|
|
expect(response.status).not.toBe(413);
|
|
});
|
|
|
|
// ── No Content-Length header — handled gracefully ─────────────────────────
|
|
|
|
it("proceeds normally when Content-Length header is absent and file is within limit", async () => {
|
|
// Small valid audio; no content-length header at all.
|
|
const request = buildAudioRequest(1024 /* 1 KB */);
|
|
|
|
const response = await POST(request);
|
|
// Should succeed (mocked transcription returns 200).
|
|
expect(response.status).toBe(200);
|
|
const body = await response.json();
|
|
expect(body.transcript).toBe("hello world");
|
|
});
|
|
|
|
it("returns 413 after buffering when Content-Length is absent but body exceeds limit", async () => {
|
|
// Build a real oversized body with no content-length header.
|
|
// We use MAX_VOICE_UPLOAD_BYTES + 1 bytes to trigger the post-buffer check.
|
|
const oversizeBytes = MAX_VOICE_UPLOAD_BYTES + 1;
|
|
const audioBlob = new Blob([new Uint8Array(oversizeBytes)], { type: "audio/webm" });
|
|
const formData = new FormData();
|
|
formData.append("audio", audioBlob, "big.webm");
|
|
|
|
const request = new Request("http://localhost/api/office/voice/transcribe", {
|
|
method: "POST",
|
|
body: formData,
|
|
// No content-length header — the post-buffer check must catch this.
|
|
});
|
|
|
|
const response = await POST(request);
|
|
expect(response.status).toBe(413);
|
|
const body = await response.json();
|
|
expect(body.error).toMatch(/exceeds/i);
|
|
});
|
|
|
|
// ── Normal happy path ─────────────────────────────────────────────────────
|
|
|
|
it("returns 200 with transcript for a valid upload within the size limit", async () => {
|
|
const request = buildAudioRequest(4096 /* 4 KB */);
|
|
const response = await POST(request);
|
|
|
|
expect(response.status).toBe(200);
|
|
const body = await response.json();
|
|
expect(body).toMatchObject({
|
|
transcript: "hello world",
|
|
provider: "openai",
|
|
model: "whisper-1",
|
|
});
|
|
});
|
|
|
|
// ── Edge cases ────────────────────────────────────────────────────────────
|
|
|
|
it("returns 400 when no audio field is present in the form", async () => {
|
|
const response = await POST(buildNoAudioRequest());
|
|
expect(response.status).toBe(400);
|
|
const body = await response.json();
|
|
expect(body.error).toMatch(/audio file is required/i);
|
|
});
|
|
|
|
it("returns 400 for an empty audio file (0 bytes)", async () => {
|
|
const request = buildAudioRequest(0);
|
|
const response = await POST(request);
|
|
expect(response.status).toBe(400);
|
|
const body = await response.json();
|
|
expect(body.error).toMatch(/empty/i);
|
|
});
|
|
|
|
it("ignores a malformed (non-numeric) Content-Length header and falls through", async () => {
|
|
const audioBlob = new Blob([new Uint8Array(512)], { type: "audio/webm" });
|
|
const formData = new FormData();
|
|
formData.append("audio", audioBlob, "voice.webm");
|
|
|
|
const request = new Request("http://localhost/api/office/voice/transcribe", {
|
|
method: "POST",
|
|
body: formData,
|
|
headers: { "content-length": "not-a-number" },
|
|
});
|
|
|
|
// Should NOT blow up; header is NaN so we skip the early check and proceed.
|
|
const response = await POST(request);
|
|
expect(response.status).toBe(200);
|
|
});
|
|
});
|