Merge pull request #677 from mendableai/fix/js-sdk-full-page-screenshot

[Bug] Fixed screenshot typo and added test for fullpage screenshot
This commit is contained in:
Nicolas 2024-09-17 12:31:57 -04:00 committed by GitHub
commit 6b920aa87f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 46 additions and 9 deletions

View File

@ -1,12 +1,12 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.3.0", "version": "1.4.3",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.3.0", "version": "1.4.3",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "axios": "^1.6.8",

View File

@ -1,6 +1,6 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.4.2", "version": "1.4.3",
"description": "JavaScript SDK for Firecrawl API", "description": "JavaScript SDK for Firecrawl API",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",

View File

@ -28,14 +28,22 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should return successful response with valid preview token', async () => { test.concurrent('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; const response = await app.scrapeUrl('https://roastmywebsite.ai');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response?.markdown).toContain("_Roast_"); expect(response?.markdown).toContain("_Roast_");
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape', async () => { test.concurrent('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; const response = await app.scrapeUrl('https://roastmywebsite.ai');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response).not.toHaveProperty('content'); // v0 expect(response).not.toHaveProperty('content'); // v0
expect(response).not.toHaveProperty('html'); expect(response).not.toHaveProperty('html');
@ -58,7 +66,11 @@ describe('FirecrawlApp E2E Tests', () => {
onlyMainContent: true, onlyMainContent: true,
timeout: 30000, timeout: 30000,
waitFor: 1000 waitFor: 1000
}) as ScrapeResponse; });
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response).not.toHaveProperty('content'); // v0 expect(response).not.toHaveProperty('content'); // v0
expect(response.markdown).toContain("_Roast_"); expect(response.markdown).toContain("_Roast_");
@ -86,6 +98,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.metadata).not.toHaveProperty("pageStatusCode"); expect(response.metadata).not.toHaveProperty("pageStatusCode");
expect(response.metadata).toHaveProperty("statusCode"); expect(response.metadata).toHaveProperty("statusCode");
expect(response.metadata).not.toHaveProperty("pageError"); expect(response.metadata).not.toHaveProperty("pageError");
if (response.metadata !== undefined) { if (response.metadata !== undefined) {
expect(response.metadata.error).toBeUndefined(); expect(response.metadata.error).toBeUndefined();
expect(response.metadata.title).toBe("Roast My Website"); expect(response.metadata.title).toBe("Roast My Website");
@ -103,16 +116,40 @@ describe('FirecrawlApp E2E Tests', () => {
} }
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl(
'https://roastmywebsite.ai', {
formats: ['screenshot@fullPage'],
});
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull();
expect(response.screenshot).not.toBeUndefined();
expect(response.screenshot).not.toBeNull();
expect(response.screenshot).toContain("https://");
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape with PDF file', async () => { test.concurrent('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponse; const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => { test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponse; const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
if (!response.success) {
throw new Error(response.error);
}
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout

View File

@ -74,7 +74,7 @@ export interface FirecrawlDocument<T> {
* Defines the options and configurations available for scraping web content. * Defines the options and configurations available for scraping web content.
*/ */
export interface CrawlScrapeOptions { export interface CrawlScrapeOptions {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[]; formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
headers?: Record<string, string>; headers?: Record<string, string>;
includeTags?: string[]; includeTags?: string[];
excludeTags?: string[]; excludeTags?: string[];