diff --git a/apps/api/package.json b/apps/api/package.json index bb4ea268..aebd90a5 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -81,7 +81,6 @@ "escape-html": "^1.0.3", "express-rate-limit": "^7.3.1", "express-ws": "^5.0.2", - "form-data": "^4.0.0", "glob": "^10.4.2", "gpt3-tokenizer": "^1.1.5", "ioredis": "^5.4.1", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 3350c74e..f98055fb 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -101,9 +101,6 @@ importers: express-ws: specifier: ^5.0.2 version: 5.0.2(express@4.19.2) - form-data: - specifier: ^4.0.0 - version: 4.0.0 glob: specifier: ^10.4.2 version: 10.4.2 @@ -3932,8 +3929,8 @@ packages: engines: {node: '>=14.17'} hasBin: true - typescript@5.6.2: - resolution: {integrity: sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==} + typescript@5.6.3: + resolution: {integrity: sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==} engines: {node: '>=14.17'} hasBin: true @@ -7742,7 +7739,7 @@ snapshots: csv-parse: 5.5.6 gpt3-tokenizer: 1.1.5 openai: 3.3.0 - typescript: 5.6.2 + typescript: 5.6.3 uuid: 9.0.1 zod: 3.23.8 transitivePeerDependencies: @@ -8320,7 +8317,7 @@ snapshots: typescript@5.4.5: {} - typescript@5.6.2: {} + typescript@5.6.3: {} typesense@1.8.2(@babel/runtime@7.24.6): dependencies: diff --git a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts index ea44b051..8b42ee71 100644 --- a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts @@ -1,5 +1,4 @@ import { createReadStream, promises as fs } from "node:fs"; -import FormData from "form-data"; import { Meta } from "../.."; import { EngineScrapeResult } from ".."; import * as marked from "marked"; @@ -16,10 +15,26 @@ async function scrapePDFWithLlamaParse(meta: Meta, tempFilePath: string): Promis meta.logger.debug("Processing PDF document with LlamaIndex", { tempFilePath }); const uploadForm = new FormData(); - uploadForm.append("file", createReadStream(tempFilePath), { - filename: tempFilePath, - contentType: "application/pdf", // NOTE: request.headers["Content-Type"]? - }); + + // This is utterly stupid but it works! - mogery + uploadForm.append("file", { + [Symbol.toStringTag]: "Blob", + name: tempFilePath, + stream() { + return createReadStream(tempFilePath) as unknown as ReadableStream + }, + arrayBuffer() { + throw Error("Unimplemented in mock Blob: arrayBuffer") + }, + size: (await fs.stat(tempFilePath)).size, + text() { + throw Error("Unimplemented in mock Blob: text") + }, + slice(start, end, contentType) { + throw Error("Unimplemented in mock Blob: slice") + }, + type: "application/pdf", + } as Blob); const upload = await robustFetch({ url: "https://api.cloud.llamaindex.ai/api/parsing/upload", diff --git a/apps/api/src/scraper/scrapeURL/lib/fetch.ts b/apps/api/src/scraper/scrapeURL/lib/fetch.ts index 03bbd80c..738e240e 100644 --- a/apps/api/src/scraper/scrapeURL/lib/fetch.ts +++ b/apps/api/src/scraper/scrapeURL/lib/fetch.ts @@ -2,7 +2,6 @@ import { Logger } from "winston"; import { z, ZodError } from "zod"; import { v4 as uuid } from "uuid"; import * as Sentry from "@sentry/node"; -import FormData from "form-data"; export type RobustFetchParams> = { url: string; @@ -38,14 +37,14 @@ export async function robustFetch, Output = z.infer method, headers: { ...(body instanceof FormData - ? body.getHeaders() + ? ({}) : body !== undefined ? ({ "Content-Type": "application/json", }) : {}), ...(headers !== undefined ? headers : {}), }, ...(body instanceof FormData ? ({ - body: body.getBuffer(), + body, }) : body !== undefined ? ({ body: JSON.stringify(body), }) : {}),