Merge pull request #779 from mendableai/fix/check-files
Some checks failed
Deploy Images to GHCR / push-app-image (push) Waiting to run
Check Queues / clean-jobs (push) Has been cancelled

[BUG] added check files on crawl
This commit is contained in:
Nicolas 2024-10-14 16:02:01 -03:00 committed by GitHub
commit ca84491ccb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -136,6 +136,10 @@ export class WebCrawler {
return false;
}
if (this.isFile(link)) {
return false;
}
return true;
})
.slice(0, limit);
@ -478,7 +482,14 @@ export class WebCrawler {
".webp",
".inc"
];
return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
try {
const urlWithoutQuery = url.split('?')[0].toLowerCase();
return fileExtensions.some((ext) => urlWithoutQuery.endsWith(ext));
} catch (error) {
Logger.error(`Error processing URL in isFile: ${error}`);
return false;
}
}
private isSocialMediaOrEmail(url: string): boolean {