fixed for testing the crawl algorithm only

This commit is contained in:
rafaelsideguide 2024-05-15 17:16:03 -03:00
parent 95ffaa2236
commit da8d94105d

View File

@ -40,10 +40,10 @@ describe("Crawling Checkup (E2E)", () => {
.post("/v0/crawl") .post("/v0/crawl")
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.send({ url: websiteData.website, pageOptions: { onlyMainContent: true }, crawlerOptions: { limit: 100 }}); .send({ url: websiteData.website, pageOptions: { onlyMainContent: true }, crawlerOptions: { limit: 100, returnOnlyUrls: true }});
const jobId = crawlResponse.body.jobId; const jobId = crawlResponse.body.jobId;
let completedResponse; let completedResponse: any;
let isFinished = false; let isFinished = false;
while (!isFinished) { while (!isFinished) {
@ -58,25 +58,47 @@ describe("Crawling Checkup (E2E)", () => {
} }
} }
console.log('-------------------')
console.log(websiteData.website);
if(!completedResponse) { if(!completedResponse) {
// fail the test // fail the test
console.log('No response'); console.log('No response');
continue; continue;
} }
if (!completedResponse.body.data) { if (!completedResponse.body || completedResponse.body.status !== "completed") {
console.log(completedResponse.body.partial_data.length); errorLog.push({
const urls = completedResponse.body.partial_data.map((page: any) => page.metadata?.sourceURL); website: websiteData.website,
console.log(urls); prompt: 'CRAWL',
} else { expected_output: 'SUCCESS',
console.log(completedResponse.body.data.length); actual_output: 'FAILURE',
const urls = completedResponse.body.data.map((page: any) => page.metadata?.sourceURL); error: `Crawl job did not complete successfully.`
console.log(urls); });
return null;
} }
console.log('-------------------') // check how many webpages were crawled successfully
// compares with expected_num_of_pages
if (completedResponse.body.data.length < websiteData.expected_min_num_of_pages) {
errorLog.push({
website: websiteData.website,
prompt: 'CRAWL',
expected_output: `SUCCESS: ${websiteData.expected_min_num_of_pages}`,
actual_output: `FAILURE: ${completedResponse.body.data.length}`,
error: `Expected at least ${websiteData.expected_min_num_of_pages} webpages, but got ${completedResponse.body.data.length}`
});
return null;
}
// checks if crawled pages contain expected_crawled_pages
if (websiteData.expected_crawled_pages.some(page => !completedResponse.body.data.some((d: { url: string }) => d.url === page))) {
errorLog.push({
website: websiteData.website,
prompt: 'CRAWL',
expected_output: `SUCCESS: ${websiteData.expected_crawled_pages}`,
actual_output: `FAILURE: ${completedResponse.body.data}`,
error: `Expected crawled pages to contain ${websiteData.expected_crawled_pages}, but got ${completedResponse.body.data}`
});
return null;
}
passedTests++; passedTests++;
} catch (error) { } catch (error) {