fix/check files on crawl

This commit is contained in:
rafaelsideguide 2024-10-14 15:44:45 -03:00
parent e40036caf7
commit 180801225b

View File

@ -136,6 +136,10 @@ export class WebCrawler {
return false;
}
if (this.isFile(link)) {
return false;
}
return true;
})
.slice(0, limit);
@ -478,7 +482,14 @@ export class WebCrawler {
".webp",
".inc"
];
return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
try {
const urlWithoutQuery = url.split('?')[0].toLowerCase();
return fileExtensions.some((ext) => urlWithoutQuery.endsWith(ext));
} catch (error) {
Logger.error(`Error processing URL in isFile: ${error}`);
return false;
}
}
private isSocialMediaOrEmail(url: string): boolean {