mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-15 19:22:19 +08:00
Merge pull request #858 from mendableai/nsc/new-actions
Some checks are pending
Deploy Images to GHCR / push-app-image (push) Waiting to run
Some checks are pending
Deploy Images to GHCR / push-app-image (push) Waiting to run
Support for the 2 new actions
This commit is contained in:
commit
45debc9977
|
@ -61,8 +61,14 @@ export type ExtractOptions = z.infer<typeof extractOptions>;
|
|||
export const actionsSchema = z.array(z.union([
|
||||
z.object({
|
||||
type: z.literal("wait"),
|
||||
milliseconds: z.number().int().positive().finite(),
|
||||
}),
|
||||
milliseconds: z.number().int().positive().finite().optional(),
|
||||
selector: z.string().optional(),
|
||||
}).refine(
|
||||
(data) => (data.milliseconds !== undefined || data.selector !== undefined) && !(data.milliseconds !== undefined && data.selector !== undefined),
|
||||
{
|
||||
message: "Either 'milliseconds' or 'selector' must be provided, but not both.",
|
||||
}
|
||||
),
|
||||
z.object({
|
||||
type: z.literal("click"),
|
||||
selector: z.string(),
|
||||
|
@ -83,6 +89,9 @@ export const actionsSchema = z.array(z.union([
|
|||
type: z.literal("scroll"),
|
||||
direction: z.enum(["up", "down"]),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("scrape"),
|
||||
}),
|
||||
]));
|
||||
|
||||
export const scrapeOptions = z.object({
|
||||
|
|
|
@ -12,7 +12,8 @@ export interface Progress {
|
|||
|
||||
export type Action = {
|
||||
type: "wait",
|
||||
milliseconds: number,
|
||||
milliseconds?: number,
|
||||
selector?: string,
|
||||
} | {
|
||||
type: "click",
|
||||
selector: string,
|
||||
|
@ -28,7 +29,9 @@ export type Action = {
|
|||
} | {
|
||||
type: "scroll",
|
||||
direction: "up" | "down"
|
||||
};
|
||||
} | {
|
||||
type: "scrape",
|
||||
}
|
||||
|
||||
export type PageOptions = {
|
||||
includeMarkdown?: boolean;
|
||||
|
@ -163,11 +166,17 @@ export class SearchResult {
|
|||
}
|
||||
}
|
||||
|
||||
export interface ScrapeActionContent {
|
||||
url: string;
|
||||
html: string;
|
||||
}
|
||||
|
||||
export interface FireEngineResponse {
|
||||
html: string;
|
||||
screenshots?: string[];
|
||||
pageStatusCode?: number;
|
||||
pageError?: string;
|
||||
scrapeActionContent?: ScrapeActionContent[];
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -200,11 +200,13 @@ export async function scrapWithFireEngine({
|
|||
logParams.html = data.content ?? "";
|
||||
logParams.response_code = data.pageStatusCode;
|
||||
logParams.error_message = data.pageError ?? data.error;
|
||||
|
||||
return {
|
||||
html: data.content ?? "",
|
||||
screenshots: data.screenshots ?? [data.screenshot] ?? [],
|
||||
pageStatusCode: data.pageStatusCode,
|
||||
pageError: data.pageError ?? data.error,
|
||||
scrapeActionContent: data?.actionContent ?? [],
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import { extractLinks } from "./utils/utils";
|
|||
import { Logger } from "../../lib/logger";
|
||||
import { ScrapeEvents } from "../../lib/scrape-events";
|
||||
import { clientSideError } from "../../strings";
|
||||
import { ScrapeActionContent } from "../../lib/entities";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
|
@ -180,7 +181,8 @@ export async function scrapSingleUrl(
|
|||
text: string;
|
||||
screenshot: string;
|
||||
actions?: {
|
||||
screenshots: string[];
|
||||
screenshots?: string[];
|
||||
scrapes?: ScrapeActionContent[];
|
||||
};
|
||||
metadata: { pageStatusCode?: number; pageError?: string | null };
|
||||
} = { text: "", screenshot: "", metadata: {} };
|
||||
|
@ -259,6 +261,7 @@ export async function scrapSingleUrl(
|
|||
if (pageOptions.actions) {
|
||||
scraperResponse.actions = {
|
||||
screenshots: response.screenshots ?? [],
|
||||
scrapes: response.scrapeActionContent ?? [],
|
||||
};
|
||||
}
|
||||
scraperResponse.metadata.pageStatusCode = response.pageStatusCode;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.7.2",
|
||||
"version": "1.7.3",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
|
|
|
@ -90,7 +90,8 @@ export interface CrawlScrapeOptions {
|
|||
|
||||
export type Action = {
|
||||
type: "wait",
|
||||
milliseconds: number,
|
||||
milliseconds?: number,
|
||||
selector?: string,
|
||||
} | {
|
||||
type: "click",
|
||||
selector: string,
|
||||
|
@ -106,6 +107,8 @@ export type Action = {
|
|||
} | {
|
||||
type: "scroll",
|
||||
direction: "up" | "down",
|
||||
} | {
|
||||
type: "scrape",
|
||||
};
|
||||
|
||||
export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
|
||||
|
|
Loading…
Reference in New Issue
Block a user