mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
feat(js-sdk): type-safe LLM extract
This commit is contained in:
parent
eec22a56d3
commit
a2903e75cf
|
@ -1,5 +1,5 @@
|
||||||
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
|
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
|
||||||
import type { ZodSchema } from "zod";
|
import type { infer as ZodInfer, ZodSchema } from "zod";
|
||||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||||
import { WebSocket } from "isows";
|
import { WebSocket } from "isows";
|
||||||
import { TypedEventTarget } from "typescript-event-target";
|
import { TypedEventTarget } from "typescript-event-target";
|
||||||
|
@ -58,13 +58,13 @@ export interface FirecrawlDocumentMetadata {
|
||||||
* Document interface for Firecrawl.
|
* Document interface for Firecrawl.
|
||||||
* Represents a document retrieved or processed by Firecrawl.
|
* Represents a document retrieved or processed by Firecrawl.
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlDocument {
|
export interface FirecrawlDocument<T> {
|
||||||
url?: string;
|
url?: string;
|
||||||
markdown?: string;
|
markdown?: string;
|
||||||
html?: string;
|
html?: string;
|
||||||
rawHtml?: string;
|
rawHtml?: string;
|
||||||
links?: string[];
|
links?: string[];
|
||||||
extract?: Record<any, any>;
|
extract?: T;
|
||||||
screenshot?: string;
|
screenshot?: string;
|
||||||
metadata?: FirecrawlDocumentMetadata;
|
metadata?: FirecrawlDocumentMetadata;
|
||||||
}
|
}
|
||||||
|
@ -73,26 +73,29 @@ export interface FirecrawlDocument {
|
||||||
* Parameters for scraping operations.
|
* Parameters for scraping operations.
|
||||||
* Defines the options and configurations available for scraping web content.
|
* Defines the options and configurations available for scraping web content.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeParams {
|
export interface CrawlScrapeOptions {
|
||||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
includeTags?: string[];
|
includeTags?: string[];
|
||||||
excludeTags?: string[];
|
excludeTags?: string[];
|
||||||
onlyMainContent?: boolean;
|
onlyMainContent?: boolean;
|
||||||
extract?: {
|
|
||||||
prompt?: string;
|
|
||||||
schema?: ZodSchema | any;
|
|
||||||
systemPrompt?: string;
|
|
||||||
};
|
|
||||||
waitFor?: number;
|
waitFor?: number;
|
||||||
timeout?: number;
|
timeout?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ScrapeParams<LLMSchema extends ZodSchema> extends CrawlScrapeOptions {
|
||||||
|
extract?: {
|
||||||
|
prompt?: string;
|
||||||
|
schema?: LLMSchema;
|
||||||
|
systemPrompt?: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response interface for scraping operations.
|
* Response interface for scraping operations.
|
||||||
* Defines the structure of the response received after a scraping operation.
|
* Defines the structure of the response received after a scraping operation.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeResponse extends FirecrawlDocument {
|
export interface ScrapeResponse<LLMResult> extends FirecrawlDocument<LLMResult> {
|
||||||
success: true;
|
success: true;
|
||||||
warning?: string;
|
warning?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
|
@ -110,7 +113,7 @@ export interface CrawlParams {
|
||||||
allowBackwardLinks?: boolean;
|
allowBackwardLinks?: boolean;
|
||||||
allowExternalLinks?: boolean;
|
allowExternalLinks?: boolean;
|
||||||
ignoreSitemap?: boolean;
|
ignoreSitemap?: boolean;
|
||||||
scrapeOptions?: ScrapeParams;
|
scrapeOptions?: CrawlScrapeOptions;
|
||||||
webhook?: string;
|
webhook?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,7 +140,7 @@ export interface CrawlStatusResponse {
|
||||||
creditsUsed: number;
|
creditsUsed: number;
|
||||||
expiresAt: Date;
|
expiresAt: Date;
|
||||||
next?: string;
|
next?: string;
|
||||||
data: FirecrawlDocument[];
|
data: FirecrawlDocument<undefined>[];
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -197,10 +200,10 @@ export default class FirecrawlApp {
|
||||||
* @param params - Additional parameters for the scrape request.
|
* @param params - Additional parameters for the scrape request.
|
||||||
* @returns The response from the scrape operation.
|
* @returns The response from the scrape operation.
|
||||||
*/
|
*/
|
||||||
async scrapeUrl(
|
async scrapeUrl<T extends ZodSchema>(
|
||||||
url: string,
|
url: string,
|
||||||
params?: ScrapeParams
|
params?: ScrapeParams<T>
|
||||||
): Promise<ScrapeResponse | ErrorResponse> {
|
): Promise<ScrapeResponse<ZodInfer<T>> | ErrorResponse> {
|
||||||
const headers: AxiosRequestHeaders = {
|
const headers: AxiosRequestHeaders = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
|
@ -528,21 +531,21 @@ export default class FirecrawlApp {
|
||||||
}
|
}
|
||||||
|
|
||||||
interface CrawlWatcherEvents {
|
interface CrawlWatcherEvents {
|
||||||
document: CustomEvent<FirecrawlDocument>,
|
document: CustomEvent<FirecrawlDocument<undefined>>,
|
||||||
done: CustomEvent<{
|
done: CustomEvent<{
|
||||||
status: CrawlStatusResponse["status"];
|
status: CrawlStatusResponse["status"];
|
||||||
data: FirecrawlDocument[];
|
data: FirecrawlDocument<undefined>[];
|
||||||
}>,
|
}>,
|
||||||
error: CustomEvent<{
|
error: CustomEvent<{
|
||||||
status: CrawlStatusResponse["status"],
|
status: CrawlStatusResponse["status"],
|
||||||
data: FirecrawlDocument[],
|
data: FirecrawlDocument<undefined>[],
|
||||||
error: string,
|
error: string,
|
||||||
}>,
|
}>,
|
||||||
}
|
}
|
||||||
|
|
||||||
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
||||||
private ws: WebSocket;
|
private ws: WebSocket;
|
||||||
public data: FirecrawlDocument[];
|
public data: FirecrawlDocument<undefined>[];
|
||||||
public status: CrawlStatusResponse["status"];
|
public status: CrawlStatusResponse["status"];
|
||||||
|
|
||||||
constructor(id: string, app: FirecrawlApp) {
|
constructor(id: string, app: FirecrawlApp) {
|
||||||
|
@ -563,7 +566,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
||||||
|
|
||||||
type DocumentMessage = {
|
type DocumentMessage = {
|
||||||
type: "document",
|
type: "document",
|
||||||
data: FirecrawlDocument,
|
data: FirecrawlDocument<undefined>,
|
||||||
}
|
}
|
||||||
|
|
||||||
type DoneMessage = { type: "done" }
|
type DoneMessage = { type: "done" }
|
||||||
|
|
Loading…
Reference in New Issue
Block a user