From f98be7d94e55a54a537cc060f63570da177c1b11 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 20 Aug 2024 16:53:01 -0300 Subject: [PATCH] Update fireEngine.ts --- .../scraper/WebScraper/scrapers/fireEngine.ts | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 10be4a1d..574f1944 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -136,27 +136,29 @@ export async function scrapWithFireEngine({ return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; } - if (response.status !== 200) { + if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) { Logger.debug( - `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` + `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}` ); - logParams.error_message = response.data?.pageError; - logParams.response_code = response.data?.pageStatusCode; + logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error; + logParams.response_code = checkStatusResponse.data?.pageStatusCode; - if(response.data && response.data?.pageStatusCode !== 200) { + if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) { Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); } + const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined; + return { html: "", screenshot: "", - pageStatusCode: response.data?.pageStatusCode, - pageError: response.data?.pageError, + pageStatusCode, + pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error, }; } - const contentType = response.headers["content-type"]; + const contentType = checkStatusResponse.headers["content-type"]; if (contentType && contentType.includes("application/pdf")) { const { content, pageStatusCode, pageError } = await fetchAndProcessPdf( url, @@ -167,18 +169,18 @@ export async function scrapWithFireEngine({ logParams.error_message = pageError; return { html: content, screenshot: "", pageStatusCode, pageError }; } else { - const data = response.data; + const data = checkStatusResponse.data; logParams.success = (data.pageStatusCode >= 200 && data.pageStatusCode < 300) || data.pageStatusCode === 404; logParams.html = data.content ?? ""; logParams.response_code = data.pageStatusCode; - logParams.error_message = data.pageError; + logParams.error_message = data.pageError ?? data.error; return { html: data.content ?? "", screenshot: data.screenshot ?? "", pageStatusCode: data.pageStatusCode, - pageError: data.pageError, + pageError: data.pageError ?? data.error, }; } } catch (error) {