From dba1fb2dc818c38c8b6bed9be56ad5b700476087 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:22:56 -0300 Subject: [PATCH] Update removeUnwantedElements.ts --- .../WebScraper/utils/removeUnwantedElements.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/utils/removeUnwantedElements.ts b/apps/api/src/scraper/WebScraper/utils/removeUnwantedElements.ts index 38e4c5a0..7962a4a0 100644 --- a/apps/api/src/scraper/WebScraper/utils/removeUnwantedElements.ts +++ b/apps/api/src/scraper/WebScraper/utils/removeUnwantedElements.ts @@ -8,7 +8,11 @@ export const removeUnwantedElements = ( ) => { const soup = cheerio.load(html); - if (pageOptions.onlyIncludeTags) { + if ( + pageOptions.onlyIncludeTags && + pageOptions.onlyIncludeTags.length > 0 && + pageOptions.onlyIncludeTags[0] !== '' + ) { if (typeof pageOptions.onlyIncludeTags === "string") { pageOptions.onlyIncludeTags = [pageOptions.onlyIncludeTags]; } @@ -26,7 +30,11 @@ export const removeUnwantedElements = ( soup("script, style, iframe, noscript, meta, head").remove(); - if (pageOptions.removeTags) { + if ( + pageOptions.removeTags && + pageOptions.removeTags.length > 0 && + pageOptions.removeTags[0] !== '' + ) { if (typeof pageOptions.removeTags === "string") { pageOptions.removeTags = [pageOptions.removeTags]; }