fix: image url

This commit is contained in:
Yanlong Wang 2024-05-16 00:39:24 +08:00
parent 6f65083f8d
commit 4556954d17
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 18 additions and 3 deletions

View File

@ -190,7 +190,12 @@ export class CrawlerHost extends RPCHost {
}
}
const src = linkPreferredSrc;
let src;
try {
src = new URL(linkPreferredSrc, nominalUrl).toString();
} catch (_err) {
void 0;
}
const alt = cleanAttribute(node.getAttribute('alt'));
if (!src) {
return '';

View File

@ -206,7 +206,7 @@ function briefImgs(elem) {
}
return {
src: linkPreferredSrc,
src: new URL(linkPreferredSrc, document.location.href).toString(),
loaded: x.complete,
width: x.width,
height: x.height,
@ -437,7 +437,17 @@ document.addEventListener('load', handlePageLoad);
const textContent = elem.textContent;
const cleanedText = textContent?.split('\n').map((x: any) => x.trimEnd()).join('\n').replace(/\n{3,}/g, '\n\n');
const imageTags = Array.from(elem.querySelectorAll('img[src],img[data-src]')).map((x: any) => [x.getAttribute('src'), x.getAttribute('data-src')]).flat().filter(Boolean);
const imageTags = Array.from(elem.querySelectorAll('img[src],img[data-src]'))
.map((x: any) => [x.getAttribute('src'), x.getAttribute('data-src')])
.flat()
.map((x) => {
try {
return new URL(x, snapshot.href).toString();
} catch (err) {
return null;
}
})
.filter(Boolean);
const imageSet = new Set(imageTags);