feat(js-sdk): type-safe LLM extract

This commit is contained in:
Gergő Móricz 2024-09-12 18:48:19 +02:00
parent eec22a56d3
commit a2903e75cf

View File

@ -1,5 +1,5 @@
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios"; import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
import type { ZodSchema } from "zod"; import type { infer as ZodInfer, ZodSchema } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema"; import { zodToJsonSchema } from "zod-to-json-schema";
import { WebSocket } from "isows"; import { WebSocket } from "isows";
import { TypedEventTarget } from "typescript-event-target"; import { TypedEventTarget } from "typescript-event-target";
@ -58,13 +58,13 @@ export interface FirecrawlDocumentMetadata {
* Document interface for Firecrawl. * Document interface for Firecrawl.
* Represents a document retrieved or processed by Firecrawl. * Represents a document retrieved or processed by Firecrawl.
*/ */
export interface FirecrawlDocument { export interface FirecrawlDocument<T> {
url?: string; url?: string;
markdown?: string; markdown?: string;
html?: string; html?: string;
rawHtml?: string; rawHtml?: string;
links?: string[]; links?: string[];
extract?: Record<any, any>; extract?: T;
screenshot?: string; screenshot?: string;
metadata?: FirecrawlDocumentMetadata; metadata?: FirecrawlDocumentMetadata;
} }
@ -73,26 +73,29 @@ export interface FirecrawlDocument {
* Parameters for scraping operations. * Parameters for scraping operations.
* Defines the options and configurations available for scraping web content. * Defines the options and configurations available for scraping web content.
*/ */
export interface ScrapeParams { export interface CrawlScrapeOptions {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[]; formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
headers?: Record<string, string>; headers?: Record<string, string>;
includeTags?: string[]; includeTags?: string[];
excludeTags?: string[]; excludeTags?: string[];
onlyMainContent?: boolean; onlyMainContent?: boolean;
extract?: {
prompt?: string;
schema?: ZodSchema | any;
systemPrompt?: string;
};
waitFor?: number; waitFor?: number;
timeout?: number; timeout?: number;
} }
export interface ScrapeParams<LLMSchema extends ZodSchema> extends CrawlScrapeOptions {
extract?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
}
/** /**
* Response interface for scraping operations. * Response interface for scraping operations.
* Defines the structure of the response received after a scraping operation. * Defines the structure of the response received after a scraping operation.
*/ */
export interface ScrapeResponse extends FirecrawlDocument { export interface ScrapeResponse<LLMResult> extends FirecrawlDocument<LLMResult> {
success: true; success: true;
warning?: string; warning?: string;
error?: string; error?: string;
@ -110,7 +113,7 @@ export interface CrawlParams {
allowBackwardLinks?: boolean; allowBackwardLinks?: boolean;
allowExternalLinks?: boolean; allowExternalLinks?: boolean;
ignoreSitemap?: boolean; ignoreSitemap?: boolean;
scrapeOptions?: ScrapeParams; scrapeOptions?: CrawlScrapeOptions;
webhook?: string; webhook?: string;
} }
@ -137,7 +140,7 @@ export interface CrawlStatusResponse {
creditsUsed: number; creditsUsed: number;
expiresAt: Date; expiresAt: Date;
next?: string; next?: string;
data: FirecrawlDocument[]; data: FirecrawlDocument<undefined>[];
}; };
/** /**
@ -197,10 +200,10 @@ export default class FirecrawlApp {
* @param params - Additional parameters for the scrape request. * @param params - Additional parameters for the scrape request.
* @returns The response from the scrape operation. * @returns The response from the scrape operation.
*/ */
async scrapeUrl( async scrapeUrl<T extends ZodSchema>(
url: string, url: string,
params?: ScrapeParams params?: ScrapeParams<T>
): Promise<ScrapeResponse | ErrorResponse> { ): Promise<ScrapeResponse<ZodInfer<T>> | ErrorResponse> {
const headers: AxiosRequestHeaders = { const headers: AxiosRequestHeaders = {
"Content-Type": "application/json", "Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`, Authorization: `Bearer ${this.apiKey}`,
@ -528,21 +531,21 @@ export default class FirecrawlApp {
} }
interface CrawlWatcherEvents { interface CrawlWatcherEvents {
document: CustomEvent<FirecrawlDocument>, document: CustomEvent<FirecrawlDocument<undefined>>,
done: CustomEvent<{ done: CustomEvent<{
status: CrawlStatusResponse["status"]; status: CrawlStatusResponse["status"];
data: FirecrawlDocument[]; data: FirecrawlDocument<undefined>[];
}>, }>,
error: CustomEvent<{ error: CustomEvent<{
status: CrawlStatusResponse["status"], status: CrawlStatusResponse["status"],
data: FirecrawlDocument[], data: FirecrawlDocument<undefined>[],
error: string, error: string,
}>, }>,
} }
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> { export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
private ws: WebSocket; private ws: WebSocket;
public data: FirecrawlDocument[]; public data: FirecrawlDocument<undefined>[];
public status: CrawlStatusResponse["status"]; public status: CrawlStatusResponse["status"];
constructor(id: string, app: FirecrawlApp) { constructor(id: string, app: FirecrawlApp) {
@ -563,7 +566,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
type DocumentMessage = { type DocumentMessage = {
type: "document", type: "document",
data: FirecrawlDocument, data: FirecrawlDocument<undefined>,
} }
type DoneMessage = { type: "done" } type DoneMessage = { type: "done" }