mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
feat(js-sdk): add crawlUrlAndWatch
This commit is contained in:
parent
d4001e4528
commit
53018a683f
|
@ -29,5 +29,21 @@ if (job.data) {
|
|||
console.log(job.data[0].markdown);
|
||||
}
|
||||
|
||||
// Map a website:
|
||||
const mapResult = await app.map('https://firecrawl.dev');
|
||||
console.log(mapResult)
|
||||
|
||||
// Crawl a website with WebSockets:
|
||||
const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
|
||||
|
||||
watch.addEventListener("document", doc => {
|
||||
console.log("DOC", doc.detail);
|
||||
});
|
||||
|
||||
watch.addEventListener("error", err => {
|
||||
console.error("ERR", err.detail.error);
|
||||
});
|
||||
|
||||
watch.addEventListener("done", state => {
|
||||
console.log("DONE", state.detail.status);
|
||||
});
|
||||
|
|
|
@ -32,8 +32,24 @@ const main = async () => {
|
|||
console.log(checkStatus.data[0].markdown);
|
||||
}
|
||||
|
||||
// Map a website:
|
||||
const mapResult = await app.mapUrl('https://firecrawl.dev');
|
||||
console.log(mapResult)
|
||||
|
||||
// Crawl a website with WebSockets:
|
||||
const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
|
||||
|
||||
watch.addEventListener("document", doc => {
|
||||
console.log("DOC", doc.detail);
|
||||
});
|
||||
|
||||
watch.addEventListener("error", err => {
|
||||
console.error("ERR", err.detail.error);
|
||||
});
|
||||
|
||||
watch.addEventListener("done", state => {
|
||||
console.log("DONE", state.detail.status);
|
||||
});
|
||||
}
|
||||
|
||||
main()
|
46
apps/js-sdk/firecrawl/package-lock.json
generated
46
apps/js-sdk/firecrawl/package-lock.json
generated
|
@ -1,16 +1,18 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.36",
|
||||
"version": "1.0.3",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.36",
|
||||
"version": "1.0.3",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"isows": "^1.0.4",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
|
@ -2137,6 +2139,20 @@
|
|||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/isows": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/isows/-/isows-1.0.4.tgz",
|
||||
"integrity": "sha512-hEzjY+x9u9hPmBom9IIAqdJCwNLax+xrPb51vEPpERoFlIxgmZcHzsT5jKG06nvInKOBGvReAVz80Umed5CczQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wagmi-dev"
|
||||
}
|
||||
],
|
||||
"peerDependencies": {
|
||||
"ws": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/istanbul-lib-coverage": {
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
|
||||
|
@ -3733,6 +3749,11 @@
|
|||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript-event-target": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/typescript-event-target/-/typescript-event-target-1.1.1.tgz",
|
||||
"integrity": "sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg=="
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
|
@ -3855,6 +3876,27 @@
|
|||
"node": "^12.13.0 || ^14.15.0 || >=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.18.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz",
|
||||
"integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/y18n": {
|
||||
"version": "5.0.8",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.0.3",
|
||||
"version": "1.0.4",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/cjs/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
|
@ -30,6 +30,8 @@
|
|||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"isows": "^1.0.4",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
import { WebSocket } from "isows";
|
||||
import { TypedEventTarget } from "typescript-event-target";
|
||||
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
|
@ -315,8 +317,8 @@ export interface SearchResponseV0 {
|
|||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||
*/
|
||||
export default class FirecrawlApp<T extends "v0" | "v1"> {
|
||||
private apiKey: string;
|
||||
private apiUrl: string;
|
||||
public apiKey: string;
|
||||
public apiUrl: string;
|
||||
public version: T;
|
||||
|
||||
/**
|
||||
|
@ -561,6 +563,21 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
||||
}
|
||||
|
||||
async crawlUrlAndWatch(
|
||||
url: string,
|
||||
params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
|
||||
idempotencyKey?: string,
|
||||
) {
|
||||
if (this.version === 'v0') {
|
||||
throw new Error("crawlUrlAndWatch is only available on v1");
|
||||
}
|
||||
|
||||
const crawl = await this.crawlUrl(url, params, false, 0, idempotencyKey);
|
||||
const id = this.version === 'v0' ? (crawl as CrawlResponseV0).jobId : (crawl as CrawlResponse).id;
|
||||
|
||||
return new CrawlWatcher(id as string, this as FirecrawlApp<"v1">);
|
||||
}
|
||||
|
||||
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
|
||||
if (this.version == 'v0') {
|
||||
throw new Error("Map is not supported in v0");
|
||||
|
@ -696,3 +713,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface CrawlWatcherEvents {
|
||||
document: CustomEvent<FirecrawlDocument>,
|
||||
done: CustomEvent<{
|
||||
status: CrawlStatusResponse["status"];
|
||||
data: FirecrawlDocument[];
|
||||
}>,
|
||||
error: CustomEvent<{
|
||||
status: CrawlStatusResponse["status"],
|
||||
data: FirecrawlDocument[],
|
||||
error: string,
|
||||
}>,
|
||||
}
|
||||
|
||||
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
||||
private ws: WebSocket;
|
||||
public data: FirecrawlDocument[];
|
||||
public status: CrawlStatusResponse["status"];
|
||||
|
||||
constructor(id: string, app: FirecrawlApp<"v1">) {
|
||||
super();
|
||||
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
||||
this.status = "scraping";
|
||||
this.data = [];
|
||||
|
||||
type ErrorMessage = {
|
||||
type: "error",
|
||||
error: string,
|
||||
}
|
||||
|
||||
type CatchupMessage = {
|
||||
type: "catchup",
|
||||
data: CrawlStatusResponse,
|
||||
}
|
||||
|
||||
type DocumentMessage = {
|
||||
type: "document",
|
||||
data: FirecrawlDocument,
|
||||
}
|
||||
|
||||
type DoneMessage = { type: "done" }
|
||||
|
||||
type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
|
||||
|
||||
const messageHandler = (msg: Message) => {
|
||||
if (msg.type === "done") {
|
||||
this.status = "completed";
|
||||
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
},
|
||||
}));
|
||||
} else if (msg.type === "error") {
|
||||
this.status = "failed";
|
||||
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
error: msg.error,
|
||||
},
|
||||
}));
|
||||
} else if (msg.type === "catchup") {
|
||||
this.status = msg.data.status;
|
||||
this.data.push(...(msg.data.data ?? []));
|
||||
for (const doc of this.data) {
|
||||
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||
detail: doc,
|
||||
}));
|
||||
}
|
||||
} else if (msg.type === "document") {
|
||||
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||
detail: msg.data,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
this.ws.onmessage = ((ev: MessageEvent) => {
|
||||
if (typeof ev.data !== "string") {
|
||||
this.ws.close();
|
||||
return;
|
||||
}
|
||||
|
||||
const msg = JSON.parse(ev.data) as Message;
|
||||
messageHandler(msg);
|
||||
}).bind(this);
|
||||
|
||||
this.ws.onclose = ((ev: CloseEvent) => {
|
||||
const msg = JSON.parse(ev.reason) as Message;
|
||||
messageHandler(msg);
|
||||
}).bind(this);
|
||||
|
||||
this.ws.onerror = ((_: Event) => {
|
||||
this.status = "failed"
|
||||
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
error: "WebSocket error",
|
||||
},
|
||||
}));
|
||||
}).bind(this);
|
||||
}
|
||||
|
||||
close() {
|
||||
this.ws.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user