mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-15 19:22:19 +08:00
fix crawl option conversion
Some checks are pending
STAGING Deploy Images to GHCR / push-app-image (push) Waiting to run
Some checks are pending
STAGING Deploy Images to GHCR / push-app-image (push) Waiting to run
This commit is contained in:
parent
2a96717f67
commit
cd534326ba
|
@ -15,7 +15,7 @@ import { getScrapeQueue } from "../../../src/services/queue-service";
|
|||
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { getJobPriority } from "../../lib/job-priority";
|
||||
import { fromLegacyCrawlerOptions, fromLegacyScrapeOptions, url as urlSchema } from "../v1/types";
|
||||
import { fromLegacyScrapeOptions, url as urlSchema } from "../v1/types";
|
||||
import { ZodError } from "zod";
|
||||
|
||||
export async function crawlController(req: Request, res: Response) {
|
||||
|
@ -140,7 +140,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||
|
||||
const sc: StoredCrawl = {
|
||||
originUrl: url,
|
||||
crawlerOptions: fromLegacyCrawlerOptions(crawlerOptions),
|
||||
crawlerOptions,
|
||||
scrapeOptions,
|
||||
internalOptions,
|
||||
team_id,
|
||||
|
@ -177,7 +177,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||
data: {
|
||||
url,
|
||||
mode: "single_urls",
|
||||
crawlerOptions: crawlerOptions,
|
||||
crawlerOptions,
|
||||
team_id,
|
||||
plan,
|
||||
pageOptions: pageOptions,
|
||||
|
|
|
@ -8,7 +8,7 @@ import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "..
|
|||
import { addScrapeJob } from "../../../src/services/queue-jobs";
|
||||
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { fromLegacyCrawlerOptions, fromLegacyScrapeOptions } from "../v1/types";
|
||||
import { fromLegacyScrapeOptions } from "../v1/types";
|
||||
|
||||
export async function crawlPreviewController(req: Request, res: Response) {
|
||||
try {
|
||||
|
@ -91,7 +91,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
|||
|
||||
const sc: StoredCrawl = {
|
||||
originUrl: url,
|
||||
crawlerOptions: fromLegacyCrawlerOptions(crawlerOptions),
|
||||
crawlerOptions,
|
||||
scrapeOptions,
|
||||
internalOptions,
|
||||
team_id,
|
||||
|
|
|
@ -5,6 +5,7 @@ import {
|
|||
crawlRequestSchema,
|
||||
CrawlResponse,
|
||||
RequestWithAuth,
|
||||
toLegacyCrawlerOptions,
|
||||
} from "./types";
|
||||
import {
|
||||
addCrawlJob,
|
||||
|
@ -70,7 +71,7 @@ export async function crawlController(
|
|||
|
||||
const sc: StoredCrawl = {
|
||||
originUrl: req.body.url,
|
||||
crawlerOptions,
|
||||
crawlerOptions: toLegacyCrawlerOptions(crawlerOptions),
|
||||
scrapeOptions,
|
||||
internalOptions: {},
|
||||
team_id: req.auth.team_id,
|
||||
|
|
|
@ -440,6 +440,20 @@ export interface ResponseWithSentry<
|
|||
sentry?: string,
|
||||
}
|
||||
|
||||
export function toLegacyCrawlerOptions(x: CrawlerOptions) {
|
||||
return {
|
||||
includes: x.includePaths,
|
||||
excludes: x.excludePaths,
|
||||
maxCrawledLinks: x.limit,
|
||||
maxDepth: x.maxDepth,
|
||||
limit: x.limit,
|
||||
generateImgAltText: false,
|
||||
allowBackwardCrawling: x.allowBackwardLinks,
|
||||
allowExternalContentLinks: x.allowExternalLinks,
|
||||
ignoreSitemap: x.ignoreSitemap,
|
||||
};
|
||||
}
|
||||
|
||||
export function fromLegacyCrawlerOptions(x: any): { crawlOptions: CrawlerOptions; internalOptions: InternalOptions } {
|
||||
return {
|
||||
crawlOptions: crawlerOptions.parse({
|
||||
|
@ -493,10 +507,10 @@ export function fromLegacyScrapeOptions(pageOptions: PageOptions, extractorOptio
|
|||
}
|
||||
}
|
||||
|
||||
export function fromLegacyCombo(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined, crawlerOptions: any): { scrapeOptions: ScrapeOptions, crawlOptions: CrawlerOptions, internalOptions: InternalOptions} {
|
||||
export function fromLegacyCombo(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined, crawlerOptions: any): { scrapeOptions: ScrapeOptions, internalOptions: InternalOptions} {
|
||||
const { scrapeOptions, internalOptions: i1 } = fromLegacyScrapeOptions(pageOptions, extractorOptions, timeout);
|
||||
const { crawlOptions, internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
|
||||
return { scrapeOptions, crawlOptions, internalOptions: Object.assign(i1, i2) };
|
||||
const { internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
|
||||
return { scrapeOptions, internalOptions: Object.assign(i1, i2) };
|
||||
}
|
||||
|
||||
export function toLegacyDocument(document: Document, internalOptions: InternalOptions): V0Document | { url: string; } {
|
||||
|
|
Loading…
Reference in New Issue
Block a user