Merge pull request #685 from devflowinc/main
Some checks are pending
Fly Deploy / Pre-deploy checks (push) Waiting to run
Fly Deploy / Deploy app (push) Blocked by required conditions

bugfix: using onlyIncludeTags and removeTags together
This commit is contained in:
Nicolas 2024-09-30 17:18:30 -03:00 committed by GitHub
commit ff4b7a835b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 27 additions and 9 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,30 +1,31 @@
import cheerio, { AnyNode, Cheerio } from "cheerio";
import { AnyNode, Cheerio, load } from "cheerio";
import { PageOptions } from "../../../lib/entities";
import { excludeNonMainTags } from "./excludeTags";
export const removeUnwantedElements = (
html: string,
pageOptions: PageOptions
pageOptions: PageOptions,
) => {
const soup = cheerio.load(html);
let soup = load(html);
if (
pageOptions.onlyIncludeTags &&
pageOptions.onlyIncludeTags.length > 0 &&
pageOptions.onlyIncludeTags[0] !== ''
pageOptions.onlyIncludeTags[0] !== ""
) {
if (typeof pageOptions.onlyIncludeTags === "string") {
pageOptions.onlyIncludeTags = [pageOptions.onlyIncludeTags];
}
if (pageOptions.onlyIncludeTags.length !== 0) {
// Create a new root element to hold the tags to keep
const newRoot = cheerio.load("<div></div>")("div");
const newRoot = load("<div></div>")("div");
pageOptions.onlyIncludeTags.forEach((tag) => {
soup(tag).each((index, element) => {
newRoot.append(soup(element).clone());
});
});
return newRoot.html();
soup = load(newRoot.html());
}
}
@ -33,7 +34,7 @@ export const removeUnwantedElements = (
if (
pageOptions.removeTags &&
pageOptions.removeTags.length > 0 &&
pageOptions.removeTags[0] !== ''
pageOptions.removeTags[0] !== ""
) {
if (typeof pageOptions.removeTags === "string") {
pageOptions.removeTags = [pageOptions.removeTags];
@ -51,11 +52,11 @@ export const removeUnwantedElements = (
const attributes = element.attribs;
const tagNameMatches = regexPattern.test(element.name);
const attributesMatch = Object.keys(attributes).some((attr) =>
regexPattern.test(`${attr}="${attributes[attr]}"`)
regexPattern.test(`${attr}="${attributes[attr]}"`),
);
if (tag.startsWith("*.")) {
classMatch = Object.keys(attributes).some((attr) =>
regexPattern.test(`class="${attributes[attr]}"`)
regexPattern.test(`class="${attributes[attr]}"`),
);
}
return tagNameMatches || attributesMatch || classMatch;