Merge pull request #677 from mendableai/fix/js-sdk-full-page-screenshot

[Bug] Fixed screenshot typo and added test for fullpage screenshot
This commit is contained in:
Nicolas 2024-09-17 12:31:57 -04:00 committed by GitHub
commit 6b920aa87f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 46 additions and 9 deletions

View File

@ -1,12 +1,12 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.3.0",
"version": "1.4.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@mendable/firecrawl-js",
"version": "1.3.0",
"version": "1.4.3",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.4.2",
"version": "1.4.3",
"description": "JavaScript SDK for Firecrawl API",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@ -28,14 +28,22 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse;
const response = await app.scrapeUrl('https://roastmywebsite.ai');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response?.markdown).toContain("_Roast_");
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse;
const response = await app.scrapeUrl('https://roastmywebsite.ai');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response).not.toHaveProperty('content'); // v0
expect(response).not.toHaveProperty('html');
@ -58,7 +66,11 @@ describe('FirecrawlApp E2E Tests', () => {
onlyMainContent: true,
timeout: 30000,
waitFor: 1000
}) as ScrapeResponse;
});
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response).not.toHaveProperty('content'); // v0
expect(response.markdown).toContain("_Roast_");
@ -86,6 +98,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.metadata).not.toHaveProperty("pageStatusCode");
expect(response.metadata).toHaveProperty("statusCode");
expect(response.metadata).not.toHaveProperty("pageError");
if (response.metadata !== undefined) {
expect(response.metadata.error).toBeUndefined();
expect(response.metadata.title).toBe("Roast My Website");
@ -103,16 +116,40 @@ describe('FirecrawlApp E2E Tests', () => {
}
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl(
'https://roastmywebsite.ai', {
formats: ['screenshot@fullPage'],
});
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response.screenshot).not.toBeUndefined();
expect(response.screenshot).not.toBeNull();
expect(response.screenshot).toContain("https://");
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponse;
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponse;
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout

View File

@ -74,7 +74,7 @@ export interface FirecrawlDocument<T> {
* Defines the options and configurations available for scraping web content.
*/
export interface CrawlScrapeOptions {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];