Update apps/api/src/scraper/WebScraper/crawler.ts

no need for regex

Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
This commit is contained in:
Rafael Miller 2024-07-24 08:33:00 -03:00 committed by GitHub
parent a684bd3c5d
commit 5e728c1a4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -64,15 +64,7 @@ export class WebCrawler {
private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
return sitemapLinks
.filter((link) => {
// if link is not a complete url, add the base url
link = link.trim();
const isCompleteUrl = new RegExp('^(?:[a-z+]+:)?//', 'i');
if (!isCompleteUrl.test(link)){
link = this.baseUrl + link;
}
const url = new URL(link);
const url = new URL(link.trim(), this.baseUrl);
const path = url.pathname;
const depth = getURLDepth(url.toString());