diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 8a01e502..dd0faff7 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -143,7 +143,6 @@ export const scrapeOptions = z.object({ }).optional(), skipTlsVerification: z.boolean().default(false), removeBase64Images: z.boolean().default(true), - deduplicateSimilarURLs: z.boolean().default(true), }).strict(strictMessage) @@ -200,6 +199,7 @@ const crawlerOptions = z.object({ allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME??? allowExternalLinks: z.boolean().default(false), ignoreSitemap: z.boolean().default(true), + deduplicateSimilarURLs: z.boolean().default(true), }).strict(strictMessage); // export type CrawlerOptions = { diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index f1c1d956..3d918263 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -107,7 +107,7 @@ export async function lockURL(id: string, sc: StoredCrawl, url: string): Promise url = normalizeURL(url); let res: boolean; - if (!sc.scrapeOptions.deduplicateSimilarURLs) { + if (!sc.crawlerOptions.deduplicateSimilarURLs) { res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0 } else { const urlO = new URL(url); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 5c83170b..4f68f303 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -88,7 +88,6 @@ export interface CrawlScrapeOptions { }; skipTlsVerification?: boolean; removeBase64Images?: boolean; - deduplicateSimilarURLs?: boolean; } export type Action = { @@ -151,6 +150,7 @@ export interface CrawlParams { ignoreSitemap?: boolean; scrapeOptions?: CrawlScrapeOptions; webhook?: string; + deduplicateSimilarURLs?: boolean; } /**