diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 633bbdf1..229893d1 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -62,6 +62,7 @@ export const actionsSchema = z.array(z.union([ z.object({ type: z.literal("wait"), milliseconds: z.number().int().positive().finite(), + selector: z.string().optional(), }), z.object({ type: z.literal("click"), @@ -83,6 +84,9 @@ export const actionsSchema = z.array(z.union([ type: z.literal("scroll"), direction: z.enum(["up", "down"]), }), + z.object({ + type: z.literal("scrape"), + }), ])); export const scrapeOptions = z.object({ diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 8aa1d004..81bca571 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -12,7 +12,8 @@ export interface Progress { export type Action = { type: "wait", - milliseconds: number, + milliseconds?: number, + selector?: string, } | { type: "click", selector: string, @@ -28,7 +29,9 @@ export type Action = { } | { type: "scroll", direction: "up" | "down" -}; +} | { + type: "scrape", +} export type PageOptions = { includeMarkdown?: boolean; @@ -163,11 +166,17 @@ export class SearchResult { } } +export interface ScrapeActionContent { + url: string; + html: string; +} + export interface FireEngineResponse { html: string; screenshots?: string[]; pageStatusCode?: number; pageError?: string; + scrapeActionContent?: ScrapeActionContent[]; } diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 3bbd74eb..7332874f 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -200,11 +200,13 @@ export async function scrapWithFireEngine({ logParams.html = data.content ?? ""; logParams.response_code = data.pageStatusCode; logParams.error_message = data.pageError ?? data.error; + return { html: data.content ?? "", screenshots: data.screenshots ?? [data.screenshot] ?? [], pageStatusCode: data.pageStatusCode, pageError: data.pageError ?? data.error, + scrapeActionContent: data?.actionContent ?? [], }; } } catch (error) { diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index c7185b79..611a7b5c 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -21,6 +21,7 @@ import { extractLinks } from "./utils/utils"; import { Logger } from "../../lib/logger"; import { ScrapeEvents } from "../../lib/scrape-events"; import { clientSideError } from "../../strings"; +import { ScrapeActionContent } from "../../lib/entities"; dotenv.config(); @@ -180,7 +181,8 @@ export async function scrapSingleUrl( text: string; screenshot: string; actions?: { - screenshots: string[]; + screenshots?: string[]; + scrapes?: ScrapeActionContent[]; }; metadata: { pageStatusCode?: number; pageError?: string | null }; } = { text: "", screenshot: "", metadata: {} }; @@ -259,6 +261,7 @@ export async function scrapSingleUrl( if (pageOptions.actions) { scraperResponse.actions = { screenshots: response.screenshots ?? [], + scrapes: response.scrapeActionContent ?? [], }; } scraperResponse.metadata.pageStatusCode = response.pageStatusCode;