removed v0 support

This commit is contained in:
rafaelsideguide 2024-08-29 17:40:43 -03:00
parent 5f11275fe7
commit 377e8ded34
2 changed files with 90 additions and 306 deletions

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.0.4",
"version": "1.1.4",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/cjs/index.js",
"types": "types/index.d.ts",

View File

@ -8,12 +8,10 @@ import { TypedEventTarget } from "typescript-event-target";
* Configuration interface for FirecrawlApp.
* @param apiKey - Optional API key for authentication.
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
* @param version - API version, either 'v0' or 'v1'.
*/
export interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
version?: "v0" | "v1";
}
/**
@ -56,17 +54,6 @@ export interface FirecrawlDocumentMetadata {
[key: string]: any; // Allows for additional metadata properties not explicitly defined.
}
/**
* Metadata for a Firecrawl document on v0.
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
*/
export interface FirecrawlDocumentMetadataV0 {
// Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
pageStatusCode?: number;
pageError?: string;
[key: string]: any;
}
/**
* Document interface for Firecrawl.
* Represents a document retrieved or processed by Firecrawl.
@ -78,28 +65,7 @@ export interface FirecrawlDocument {
rawHtml?: string;
links?: string[];
screenshot?: string;
metadata: FirecrawlDocumentMetadata;
}
/**
* Document interface for Firecrawl on v0.
* Represents a document specifically for API version v0 with additional properties.
*/
export interface FirecrawlDocumentV0 {
id?: string;
url?: string;
content: string;
markdown?: string;
html?: string;
llm_extraction?: Record<string, any>;
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: FirecrawlDocumentMetadataV0;
childrenLinks?: string[];
provider?: string;
warning?: string;
index?: number;
metadata?: FirecrawlDocumentMetadata;
}
/**
@ -107,38 +73,12 @@ export interface FirecrawlDocumentV0 {
* Defines the options and configurations available for scraping web content.
*/
export interface ScrapeParams {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
waitFor?: number;
timeout?: number;
}
/**
* Parameters for scraping operations on v0.
* Includes page and extractor options specific to API version v0.
*/
export interface ScrapeParamsV0 {
pageOptions?: {
headers?: Record<string, string>;
includeHtml?: boolean;
includeRawHtml?: boolean;
onlyIncludeTags?: string[];
onlyMainContent?: boolean;
removeTags?: string[];
replaceAllPathsWithAbsolutePaths?: boolean;
screenshot?: boolean;
fullPageScreenshot?: boolean;
waitFor?: number;
};
extractorOptions?: {
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
extractionPrompt?: string;
extractionSchema?: Record<string, any> | z.ZodSchema | any;
};
timeout?: number;
}
@ -147,21 +87,11 @@ export interface ScrapeParamsV0 {
* Defines the structure of the response received after a scraping operation.
*/
export interface ScrapeResponse extends FirecrawlDocument {
success: boolean;
success: true;
warning?: string;
error?: string;
}
/**
* Response interface for scraping operations on v0.
* Similar to ScrapeResponse but tailored for responses from API version v0.
*/
export interface ScrapeResponseV0 {
success: boolean;
data?: FirecrawlDocumentV0;
error?: string;
}
/**
* Parameters for crawling operations.
* Includes options for both scraping and mapping during a crawl.
@ -177,37 +107,6 @@ export interface CrawlParams {
scrapeOptions?: ScrapeParams;
}
/**
* Parameters for crawling operations on v0.
* Tailored for API version v0, includes specific options for crawling.
*/
export interface CrawlParamsV0 {
crawlerOptions?: {
includes?: string[];
excludes?: string[];
generateImgAltText?: boolean;
returnOnlyUrls?: boolean;
maxDepth?: number;
mode?: "default" | "fast";
ignoreSitemap?: boolean;
limit?: number;
allowBackwardCrawling?: boolean;
allowExternalContentLinks?: boolean;
};
pageOptions?: {
headers?: Record<string, string>;
includeHtml?: boolean;
includeRawHtml?: boolean;
onlyIncludeTags?: string[];
onlyMainContent?: boolean;
removeTags?: string[];
replaceAllPathsWithAbsolutePaths?: boolean;
screenshot?: boolean;
fullPageScreenshot?: boolean;
waitFor?: number;
};
}
/**
* Response interface for crawling operations.
* Defines the structure of the response received after initiating a crawl.
@ -215,17 +114,7 @@ export interface CrawlParamsV0 {
export interface CrawlResponse {
id?: string;
url?: string;
success: boolean;
error?: string;
}
/**
* Response interface for crawling operations on v0.
* Similar to CrawlResponse but tailored for responses from API version v0.
*/
export interface CrawlResponseV0 {
jobId?: string;
success: boolean;
success: true;
error?: string;
}
@ -234,7 +123,7 @@ export interface CrawlResponseV0 {
* Provides detailed status of a crawl job including progress and results.
*/
export interface CrawlStatusResponse {
success: boolean;
success: true;
total: number;
completed: number;
creditsUsed: number;
@ -245,23 +134,6 @@ export interface CrawlStatusResponse {
error?: string;
}
/**
* Response interface for job status checks on v0.
* Tailored for API version v0, provides status and partial data of a crawl job.
*/
export interface CrawlStatusResponseV0 {
success: boolean;
status: string;
current?: number;
current_url?: string;
current_step?: string;
total?: number;
data?: FirecrawlDocumentV0[];
partial_data?: FirecrawlDocumentV0[];
error?: string;
}
/**
* Parameters for mapping operations.
* Defines options for mapping URLs during a crawl.
@ -278,57 +150,35 @@ export interface MapParams {
* Defines the structure of the response received after a mapping operation.
*/
export interface MapResponse {
success: boolean;
success: true;
links?: string[];
error?: string;
}
/**
* Parameters for searching operations on v0.
* Tailored for API version v0, includes specific options for searching content.
* Error response interface.
* Defines the structure of the response received when an error occurs.
*/
export interface SearchParamsV0 {
pageOptions?: {
onlyMainContent?: boolean;
fetchPageContent?: boolean;
includeHtml?: boolean;
includeRawHtml?: boolean;
};
searchOptions?: {
limit?: number;
};
}
/**
* Response interface for searching operations on v0.
* Defines the structure of the response received after a search operation on v0.
*/
export interface SearchResponseV0 {
success: boolean;
data?: FirecrawlDocumentV0[];
error?: string;
export interface ErrorResponse {
success: false;
error: string;
}
/**
* Main class for interacting with the Firecrawl API.
* Provides methods for scraping, searching, crawling, and mapping web content.
*/
export default class FirecrawlApp<T extends "v0" | "v1"> {
export default class FirecrawlApp {
public apiKey: string;
public apiUrl: string;
public version: T;
/**
* Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
this.apiKey = apiKey || "";
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
this.version = version as T;
if (!this.apiKey) {
throw new Error("No API key provided");
}
}
/**
@ -339,8 +189,8 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
*/
async scrapeUrl(
url: string,
params?: ScrapeParams | ScrapeParamsV0
): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
params?: ScrapeParams
): Promise<ScrapeResponse | ErrorResponse> {
const headers: AxiosRequestHeaders = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
@ -363,19 +213,19 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
}
try {
const response: AxiosResponse = await axios.post(
this.apiUrl + `/${this.version}/scrape`,
this.apiUrl + `/v1/scrape`,
jsonData,
{ headers }
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
return {
success: true,
warning: responseData.warning,
error: responseData.error,
...responseData.data
}) as ScrapeResponse;
};
} else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
}
@ -385,100 +235,47 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
return { success: false, error: "Internal server error." };
}
/**
* Searches for a query using the Firecrawl API.
* @param query - The query to search for.
* @param params - Additional parameters for the search request.
* @returns The response from the search operation.
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
* @param query - The search query string.
* @param params - Additional parameters for the search.
* @returns Throws an error advising to use version 0 of the API.
*/
async search(
query: string,
params?: SearchParamsV0
): Promise<SearchResponseV0> {
if (this.version === "v1") {
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
}
const headers: AxiosRequestHeaders = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: any = { query };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await axios.post(
this.apiUrl + "/v0/search",
jsonData,
{ headers }
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
} else {
throw new Error(`Failed to search. Error: ${responseData.error}`);
}
} else {
this.handleError(response, "search");
}
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." };
params?: any
): Promise<any> {
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
}
/**
* Initiates a crawl job for a URL using the Firecrawl API.
* @param url - The URL to crawl.
* @param params - Additional parameters for the crawl request.
* @param waitUntilDone - Whether to wait for the crawl job to complete.
* @param pollInterval - Time in seconds for job status checks.
* @param idempotencyKey - Optional idempotency key for the request.
* @returns The response from the crawl operation.
*/
async crawlUrl(
url: string,
params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
waitUntilDone: boolean = true,
params?: CrawlParams,
pollInterval: number = 2,
idempotencyKey?: string
): Promise<
this['version'] extends 'v0'
? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
: CrawlResponse | CrawlStatusResponse
> {
): Promise<CrawlStatusResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { url, ...params };
try {
const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/${this.version}/crawl`,
this.apiUrl + `/v1/crawl`,
jsonData,
headers
);
if (response.status === 200) {
const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
let checkUrl: string | undefined = undefined;
if (waitUntilDone) {
if (this.version === 'v1') { checkUrl = response.data.url }
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
} else {
if (this.version === 'v0') {
return {
success: true,
jobId: id
} as CrawlResponseV0;
} else {
return {
success: true,
id: id
} as CrawlResponse;
}
}
const id: string = response.data.id;
return this.monitorJobStatus(id, headers, pollInterval);
} else {
this.handleError(response, "start crawl job");
}
@ -489,7 +286,35 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
throw new Error(error.message);
}
}
return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
return { success: false, error: "Internal server error." };
}
async asyncCrawlUrl(
url: string,
params?: CrawlParams,
idempotencyKey?: string
): Promise<CrawlResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { url, ...params };
try {
const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/crawl`,
jsonData,
headers
);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, "start crawl job");
}
} catch (error: any) {
if (error.response?.data?.error) {
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
} else {
throw new Error(error.message);
}
}
return { success: false, error: "Internal server error." };
}
/**
@ -497,7 +322,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
* @param id - The ID of the crawl operation.
* @returns The response containing the job status.
*/
async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
if (!id) {
throw new Error("No crawl ID provided");
}
@ -505,86 +330,52 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
const headers: AxiosRequestHeaders = this.prepareHeaders();
try {
const response: AxiosResponse = await this.getRequest(
this.version === 'v1' ?
`${this.apiUrl}/${this.version}/crawl/${id}` :
`${this.apiUrl}/${this.version}/crawl/status/${id}`,
`${this.apiUrl}/v1/crawl/${id}`,
headers
);
if (response.status === 200) {
if (this.version === 'v0') {
return ({
success: true,
status: response.data.status,
current: response.data.current,
current_url: response.data.current_url,
current_step: response.data.current_step,
total: response.data.total,
data: response.data.data,
partial_data: !response.data.data
? response.data.partial_data
: undefined,
} as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
} else {
return ({
success: true,
status: response.data.status,
total: response.data.total,
completed: response.data.completed,
creditsUsed: response.data.creditsUsed,
expiresAt: new Date(response.data.expiresAt),
next: response.data.next,
data: response.data.data,
error: response.data.error
} as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
}
return ({
success: true,
status: response.data.status,
total: response.data.total,
completed: response.data.completed,
creditsUsed: response.data.creditsUsed,
expiresAt: new Date(response.data.expiresAt),
next: response.data.next,
data: response.data.data,
error: response.data.error
})
} else {
this.handleError(response, "check crawl status");
}
} catch (error: any) {
throw new Error(error.message);
}
return this.version === 'v0' ?
({
success: false,
status: "unknown",
current: 0,
current_url: "",
current_step: "",
total: 0,
error: "Internal server error.",
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
({
success: false,
error: "Internal server error.",
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
return { success: false, error: "Internal server error." };
}
async crawlUrlAndWatch(
url: string,
params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
params?: CrawlParams,
idempotencyKey?: string,
) {
if (this.version === 'v0') {
throw new Error("crawlUrlAndWatch is only available on v1");
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
if (crawl.success && crawl.id) {
const id = crawl.id;
return new CrawlWatcher(id, this);
}
const crawl = await this.crawlUrl(url, params, false, 0, idempotencyKey);
const id = this.version === 'v0' ? (crawl as CrawlResponseV0).jobId : (crawl as CrawlResponse).id;
return new CrawlWatcher(id as string, this as FirecrawlApp<"v1">);
throw new Error("Crawl job failed to start");
}
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
if (this.version == 'v0') {
throw new Error("Map is not supported in v0");
}
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
const headers = this.prepareHeaders();
let jsonData: { url: string } & MapParams = { url, ...params };
try {
const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/${this.version}/map`,
this.apiUrl + `/v1/map`,
jsonData,
headers
);
@ -596,7 +387,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." } as MapResponse;
return { success: false, error: "Internal server error." };
}
/**
@ -651,25 +442,18 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
async monitorJobStatus(
id: string,
headers: AxiosRequestHeaders,
checkInterval: number,
checkUrl?: string
): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
let apiUrl: string = '';
checkInterval: number
): Promise<CrawlStatusResponse> {
while (true) {
if (this.version === 'v1') {
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
} else if (this.version === 'v0') {
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
}
const statusResponse: AxiosResponse = await this.getRequest(
apiUrl,
`${this.apiUrl}/v1/crawl/${id}`,
headers
);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === "completed") {
if ("data" in statusData) {
return this.version === 'v0' ? statusData.data : statusData;
return statusData;
} else {
throw new Error("Crawl job completed but no data was returned");
}
@ -729,7 +513,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
public data: FirecrawlDocument[];
public status: CrawlStatusResponse["status"];
constructor(id: string, app: FirecrawlApp<"v1">) {
constructor(id: string, app: FirecrawlApp) {
super();
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
this.status = "scraping";