diff --git a/apps/js-sdk/example.js b/apps/js-sdk/example.js index b4ee7747..5698a017 100644 --- a/apps/js-sdk/example.js +++ b/apps/js-sdk/example.js @@ -29,5 +29,21 @@ if (job.data) { console.log(job.data[0].markdown); } +// Map a website: const mapResult = await app.map('https://firecrawl.dev'); console.log(mapResult) + +// Crawl a website with WebSockets: +const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5}); + +watch.addEventListener("document", doc => { + console.log("DOC", doc.detail); +}); + +watch.addEventListener("error", err => { + console.error("ERR", err.detail.error); +}); + +watch.addEventListener("done", state => { + console.log("DONE", state.detail.status); +}); diff --git a/apps/js-sdk/example.ts b/apps/js-sdk/example.ts index f8d7d5d9..80589f5a 100644 --- a/apps/js-sdk/example.ts +++ b/apps/js-sdk/example.ts @@ -32,8 +32,24 @@ const main = async () => { console.log(checkStatus.data[0].markdown); } + // Map a website: const mapResult = await app.mapUrl('https://firecrawl.dev'); console.log(mapResult) + + // Crawl a website with WebSockets: + const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5}); + + watch.addEventListener("document", doc => { + console.log("DOC", doc.detail); + }); + + watch.addEventListener("error", err => { + console.error("ERR", err.detail.error); + }); + + watch.addEventListener("done", state => { + console.log("DONE", state.detail.status); + }); } main() \ No newline at end of file diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 4d9254ac..7f25babc 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,16 +1,18 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.36", + "version": "1.0.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.36", + "version": "1.0.3", "license": "MIT", "dependencies": { "axios": "^1.6.8", "dotenv": "^16.4.5", + "isows": "^1.0.4", + "typescript-event-target": "^1.1.1", "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" @@ -2137,6 +2139,20 @@ "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", "dev": true }, + "node_modules/isows": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/isows/-/isows-1.0.4.tgz", + "integrity": "sha512-hEzjY+x9u9hPmBom9IIAqdJCwNLax+xrPb51vEPpERoFlIxgmZcHzsT5jKG06nvInKOBGvReAVz80Umed5CczQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/wagmi-dev" + } + ], + "peerDependencies": { + "ws": "*" + } + }, "node_modules/istanbul-lib-coverage": { "version": "3.2.2", "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", @@ -3733,6 +3749,11 @@ "node": ">=14.17" } }, + "node_modules/typescript-event-target": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/typescript-event-target/-/typescript-event-target-1.1.1.tgz", + "integrity": "sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg==" + }, "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", @@ -3855,6 +3876,27 @@ "node": "^12.13.0 || ^14.15.0 || >=16.0.0" } }, + "node_modules/ws": { + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", + "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==", + "peer": true, + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 6eb37a22..cadd1eaf 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.0.3", + "version": "1.0.4", "description": "JavaScript SDK for Firecrawl API", "main": "build/cjs/index.js", "types": "types/index.d.ts", @@ -30,6 +30,8 @@ "dependencies": { "axios": "^1.6.8", "dotenv": "^16.4.5", + "isows": "^1.0.4", + "typescript-event-target": "^1.1.1", "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index cb2a0e4f..7f4eb1e4 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,6 +1,8 @@ import axios, { AxiosResponse, AxiosRequestHeaders } from "axios"; import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; +import { WebSocket } from "isows"; +import { TypedEventTarget } from "typescript-event-target"; /** * Configuration interface for FirecrawlApp. @@ -315,8 +317,8 @@ export interface SearchResponseV0 { * Provides methods for scraping, searching, crawling, and mapping web content. */ export default class FirecrawlApp { - private apiKey: string; - private apiUrl: string; + public apiKey: string; + public apiUrl: string; public version: T; /** @@ -561,6 +563,21 @@ export default class FirecrawlApp { } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse); } + async crawlUrlAndWatch( + url: string, + params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams, + idempotencyKey?: string, + ) { + if (this.version === 'v0') { + throw new Error("crawlUrlAndWatch is only available on v1"); + } + + const crawl = await this.crawlUrl(url, params, false, 0, idempotencyKey); + const id = this.version === 'v0' ? (crawl as CrawlResponseV0).jobId : (crawl as CrawlResponse).id; + + return new CrawlWatcher(id as string, this as FirecrawlApp<"v1">); + } + async mapUrl(url: string, params?: MapParams): Promise { if (this.version == 'v0') { throw new Error("Map is not supported in v0"); @@ -696,3 +713,111 @@ export default class FirecrawlApp { } } } + +interface CrawlWatcherEvents { + document: CustomEvent, + done: CustomEvent<{ + status: CrawlStatusResponse["status"]; + data: FirecrawlDocument[]; + }>, + error: CustomEvent<{ + status: CrawlStatusResponse["status"], + data: FirecrawlDocument[], + error: string, + }>, +} + +export class CrawlWatcher extends TypedEventTarget { + private ws: WebSocket; + public data: FirecrawlDocument[]; + public status: CrawlStatusResponse["status"]; + + constructor(id: string, app: FirecrawlApp<"v1">) { + super(); + this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); + this.status = "scraping"; + this.data = []; + + type ErrorMessage = { + type: "error", + error: string, + } + + type CatchupMessage = { + type: "catchup", + data: CrawlStatusResponse, + } + + type DocumentMessage = { + type: "document", + data: FirecrawlDocument, + } + + type DoneMessage = { type: "done" } + + type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage; + + const messageHandler = (msg: Message) => { + if (msg.type === "done") { + this.status = "completed"; + this.dispatchTypedEvent("done", new CustomEvent("done", { + detail: { + status: this.status, + data: this.data, + }, + })); + } else if (msg.type === "error") { + this.status = "failed"; + this.dispatchTypedEvent("error", new CustomEvent("error", { + detail: { + status: this.status, + data: this.data, + error: msg.error, + }, + })); + } else if (msg.type === "catchup") { + this.status = msg.data.status; + this.data.push(...(msg.data.data ?? [])); + for (const doc of this.data) { + this.dispatchTypedEvent("document", new CustomEvent("document", { + detail: doc, + })); + } + } else if (msg.type === "document") { + this.dispatchTypedEvent("document", new CustomEvent("document", { + detail: msg.data, + })); + } + } + + this.ws.onmessage = ((ev: MessageEvent) => { + if (typeof ev.data !== "string") { + this.ws.close(); + return; + } + + const msg = JSON.parse(ev.data) as Message; + messageHandler(msg); + }).bind(this); + + this.ws.onclose = ((ev: CloseEvent) => { + const msg = JSON.parse(ev.reason) as Message; + messageHandler(msg); + }).bind(this); + + this.ws.onerror = ((_: Event) => { + this.status = "failed" + this.dispatchTypedEvent("error", new CustomEvent("error", { + detail: { + status: this.status, + data: this.data, + error: "WebSocket error", + }, + })); + }).bind(this); + } + + close() { + this.ws.close(); + } +}