Merge pull request #561 from mendableai/bug/dealing-with-dns-error

[Bug] Added a way for dealing with DNS without IP resolution
This commit is contained in:
Nicolas 2024-08-20 16:59:09 -03:00 committed by GitHub
commit 2030ec6031
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -136,27 +136,29 @@ export async function scrapWithFireEngine({
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
} }
if (response.status !== 200) { if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) {
Logger.debug( Logger.debug(
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}`
); );
logParams.error_message = response.data?.pageError; logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error;
logParams.response_code = response.data?.pageStatusCode; logParams.response_code = checkStatusResponse.data?.pageStatusCode;
if(response.data && response.data?.pageStatusCode !== 200) { if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) {
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
} }
const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined;
return { return {
html: "", html: "",
screenshot: "", screenshot: "",
pageStatusCode: response.data?.pageStatusCode, pageStatusCode,
pageError: response.data?.pageError, pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error,
}; };
} }
const contentType = response.headers["content-type"]; const contentType = checkStatusResponse.headers["content-type"];
if (contentType && contentType.includes("application/pdf")) { if (contentType && contentType.includes("application/pdf")) {
const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
url, url,
@ -167,18 +169,18 @@ export async function scrapWithFireEngine({
logParams.error_message = pageError; logParams.error_message = pageError;
return { html: content, screenshot: "", pageStatusCode, pageError }; return { html: content, screenshot: "", pageStatusCode, pageError };
} else { } else {
const data = response.data; const data = checkStatusResponse.data;
logParams.success = logParams.success =
(data.pageStatusCode >= 200 && data.pageStatusCode < 300) || (data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
data.pageStatusCode === 404; data.pageStatusCode === 404;
logParams.html = data.content ?? ""; logParams.html = data.content ?? "";
logParams.response_code = data.pageStatusCode; logParams.response_code = data.pageStatusCode;
logParams.error_message = data.pageError; logParams.error_message = data.pageError ?? data.error;
return { return {
html: data.content ?? "", html: data.content ?? "",
screenshot: data.screenshot ?? "", screenshot: data.screenshot ?? "",
pageStatusCode: data.pageStatusCode, pageStatusCode: data.pageStatusCode,
pageError: data.pageError, pageError: data.pageError ?? data.error,
}; };
} }
} catch (error) { } catch (error) {