Merge pull request #323 from mendableai/tests/crawl-limit-unit-tests

[Tests] Added crawl limit unit test
This commit is contained in:
Nicolas 2024-07-01 17:56:04 -03:00 committed by GitHub
commit 6d0c7a9ccd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -188,5 +188,38 @@ describe('WebCrawler', () => {
// Check that the backward link is included if allowBackwardCrawling is true
expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
});
it('should respect the limit parameter by not returning more links than specified', async () => {
const initialUrl = 'http://example.com';
const limit = 2; // Set a limit for the number of links
crawler = new WebCrawler({
initialUrl: initialUrl,
includes: [],
excludes: [],
limit: limit, // Apply the limit
maxCrawledDepth: 10
});
// Mock sitemap fetching function to return more links than the limit
crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
initialUrl,
initialUrl + '/page1',
initialUrl + '/page2',
initialUrl + '/page3'
]);
const filteredLinks = crawler['filterLinks'](
[initialUrl, initialUrl + '/page1', initialUrl + '/page2', initialUrl + '/page3'],
limit,
10
);
expect(filteredLinks.length).toBe(limit); // Check if the number of results respects the limit
expect(filteredLinks).toEqual([
initialUrl,
initialUrl + '/page1'
]);
});
});