mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
Nick:
This commit is contained in:
parent
bfc7f5882e
commit
e098e88ea7
|
@ -129,3 +129,11 @@ export interface FireEngineResponse {
|
|||
pageError?: string;
|
||||
}
|
||||
|
||||
|
||||
export interface FireEngineOptions{
|
||||
mobileProxy?: boolean;
|
||||
method?: string;
|
||||
engine?: string;
|
||||
blockMedia?: boolean;
|
||||
blockAds?: boolean;
|
||||
}
|
||||
|
|
|
@ -8,7 +8,6 @@ import { scrapSingleUrl } from "./single_url";
|
|||
import robotsParser from "robots-parser";
|
||||
import { getURLDepth } from "./utils/maxDepthUtils";
|
||||
import { axiosTimeout } from "../../../src/lib/timeout";
|
||||
import { scrapWithFireEngine } from "./scrapers/fireEngine";
|
||||
|
||||
export class WebCrawler {
|
||||
private initialUrl: string;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import axios from "axios";
|
||||
import { FireEngineResponse } from "../../../lib/entities";
|
||||
import { FireEngineOptions, FireEngineResponse } from "../../../lib/entities";
|
||||
import { logScrape } from "../../../services/logging/scrape_log";
|
||||
import { generateRequestParams } from "../single_url";
|
||||
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
|
||||
|
@ -20,6 +20,7 @@ export async function scrapWithFireEngine({
|
|||
waitFor = 0,
|
||||
screenshot = false,
|
||||
pageOptions = { parsePDF: true },
|
||||
fireEngineOptions = {},
|
||||
headers,
|
||||
options,
|
||||
}: {
|
||||
|
@ -27,6 +28,7 @@ export async function scrapWithFireEngine({
|
|||
waitFor?: number;
|
||||
screenshot?: boolean;
|
||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
|
||||
fireEngineOptions?: FireEngineOptions;
|
||||
headers?: Record<string, string>;
|
||||
options?: any;
|
||||
}): Promise<FireEngineResponse> {
|
||||
|
@ -57,6 +59,7 @@ export async function scrapWithFireEngine({
|
|||
screenshot: screenshotParam,
|
||||
headers: headers,
|
||||
pageOptions: pageOptions,
|
||||
...fireEngineOptions,
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
|
|
|
@ -21,7 +21,7 @@ export async function getLinksFromSitemap(
|
|||
const response = await axios.get(sitemapUrl, { timeout: axiosTimeout });
|
||||
content = response.data;
|
||||
} else if (mode === 'fire-engine') {
|
||||
const response = await scrapWithFireEngine({ url: sitemapUrl });
|
||||
const response = await scrapWithFireEngine({ url: sitemapUrl, fireEngineOptions: { engine: "request", method: "get", mobileProxy: true } });
|
||||
content = response.html;
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user