fix(crawl): further fixing

This commit is contained in:
Móricz Gergő 2024-11-12 16:39:08 +01:00
parent f2eb3a2d9a
commit 1ddace3a0f

View File

@ -347,7 +347,7 @@ async function processJob(job: Job & { id: string }, token: string) {
const crawler = crawlToCrawler(job.data.crawl_id, sc, doc.metadata?.url ?? doc.metadata?.sourceURL ?? undefined); const crawler = crawlToCrawler(job.data.crawl_id, sc, doc.metadata?.url ?? doc.metadata?.sourceURL ?? undefined);
const links = crawler.filterLinks( const links = crawler.filterLinks(
crawler.extractLinksFromHTML(rawHtml ?? "", sc.originUrl as string), crawler.extractLinksFromHTML(rawHtml ?? "", doc.metadata?.url ?? doc.metadata?.sourceURL ?? sc.originUrl as string),
Infinity, Infinity,
sc.crawlerOptions?.maxDepth ?? 10 sc.crawlerOptions?.maxDepth ?? 10
); );