fix(scrapeURL): includeTags/excludeTags

2024-11-15 19:22:19 +08:00 · 2024-11-07 21:10:27 +01:00 · 2024-11-07 21:10:27 +01:00 · 552d55c8fc
commit 552d55c8fc
parent 8d467c8ca7
1 changed files with 2 additions and 2 deletions
--- a/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts
+++ b/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts
@ -56,7 +56,7 @@ export const removeUnwantedElements = (
 ) => {
  const soup = load(html);

-  if (scrapeOptions.includeTags && scrapeOptions.includeTags.length > 0) {
+  if (scrapeOptions.includeTags && scrapeOptions.includeTags.filter(x => x.trim().length !== 0).length > 0) {
    // Create a new root element to hold the tags to keep
    const newRoot = load("<div></div>")("div");
    scrapeOptions.includeTags.forEach((tag) => {
@ -69,7 +69,7 @@ export const removeUnwantedElements = (

  soup("script, style, noscript, meta, head").remove();

-  if (scrapeOptions.excludeTags && scrapeOptions.excludeTags.length > 0) {
+  if (scrapeOptions.excludeTags && scrapeOptions.excludeTags.filter(x => x.trim().length !== 0).length > 0) {
        scrapeOptions.excludeTags.forEach((tag) => {
            let elementsToRemove: Cheerio<AnyNode>;
            if (tag.startsWith("*") && tag.endsWith("*")) {