From 00941d94a40ade640c6dfacbc567af8d4f04d426 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 16 Apr 2024 18:03:48 -0300 Subject: [PATCH 01/96] Added anthropic vision to getImageDescription function --- apps/api/.env.local | 1 + apps/api/package.json | 1 + apps/api/pnpm-lock.yaml | 18 ++++ apps/api/src/scraper/WebScraper/index.ts | 6 +- .../src/scraper/WebScraper/utils/gptVision.ts | 41 -------- .../WebScraper/utils/imageDescription.ts | 98 +++++++++++++++++++ 6 files changed, 122 insertions(+), 43 deletions(-) delete mode 100644 apps/api/src/scraper/WebScraper/utils/gptVision.ts create mode 100644 apps/api/src/scraper/WebScraper/utils/imageDescription.ts diff --git a/apps/api/.env.local b/apps/api/.env.local index 301c64b1..88133b76 100644 --- a/apps/api/.env.local +++ b/apps/api/.env.local @@ -7,6 +7,7 @@ SUPABASE_SERVICE_TOKEN= REDIS_URL= SCRAPING_BEE_API_KEY= OPENAI_API_KEY= +ANTHROPIC_API_KEY= BULL_AUTH_KEY= LOGTAIL_KEY= PLAYWRIGHT_MICROSERVICE_URL= diff --git a/apps/api/package.json b/apps/api/package.json index 9e3a3d8d..a951aaf9 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -39,6 +39,7 @@ "typescript": "^5.4.2" }, "dependencies": { + "@anthropic-ai/sdk": "^0.20.5", "@brillout/import": "^0.2.2", "@bull-board/api": "^5.14.2", "@bull-board/express": "^5.8.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 3539868d..08b1de2e 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -5,6 +5,9 @@ settings: excludeLinksFromLockfile: false dependencies: + '@anthropic-ai/sdk': + specifier: ^0.20.5 + version: 0.20.5 '@brillout/import': specifier: ^0.2.2 version: 0.2.3 @@ -213,6 +216,21 @@ packages: '@jridgewell/trace-mapping': 0.3.25 dev: true + /@anthropic-ai/sdk@0.20.5: + resolution: {integrity: sha512-d0ch+zp6/gHR4+2wqWV7JU1EJ7PpHc3r3F6hebovJTouY+pkaId1FuYYaVsG3l/gyqhOZUwKCMSMqcFNf+ZmWg==} + dependencies: + '@types/node': 18.19.22 + '@types/node-fetch': 2.6.11 + abort-controller: 3.0.0 + agentkeepalive: 4.5.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + web-streams-polyfill: 3.3.3 + transitivePeerDependencies: + - encoding + dev: false + /@anthropic-ai/sdk@0.9.1: resolution: {integrity: sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==} dependencies: diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index b54d9e68..62ea16c9 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -4,7 +4,7 @@ import { scrapSingleUrl } from "./single_url"; import { SitemapEntry, fetchSitemapData, getLinksFromSitemap } from "./sitemap"; import { WebCrawler } from "./crawler"; import { getValue, setValue } from "../../services/redis"; -import { getImageDescription } from "./utils/gptVision"; +import { getImageDescription } from "./utils/imageDescription"; export type WebScraperOptions = { urls: string[]; @@ -16,6 +16,7 @@ export type WebScraperOptions = { maxCrawledLinks?: number; limit?: number; generateImgAltText?: boolean; + generateImgAltTextModel?: "gpt-4-turbo" | "anthropic"; }; concurrentRequests?: number; }; @@ -29,6 +30,7 @@ export class WebScraperDataProvider { private limit: number = 10000; private concurrentRequests: number = 20; private generateImgAltText: boolean = false; + private generateImgAltTextModel: "gpt-4-turbo" | "anthropic" = "gpt-4-turbo"; authorize(): void { throw new Error("Method not implemented."); @@ -312,7 +314,7 @@ export class WebScraperDataProvider { let backText = document.content.substring(imageIndex + image.length, Math.min(imageIndex + image.length + 1000, contentLength)); let frontTextStartIndex = Math.max(imageIndex - 1000, 0); let frontText = document.content.substring(frontTextStartIndex, imageIndex); - altText = await getImageDescription(newImageUrl, backText, frontText); + altText = await getImageDescription(newImageUrl, backText, frontText, this.generateImgAltTextModel); } document.content = document.content.replace(image, `![${altText}](${newImageUrl})`); diff --git a/apps/api/src/scraper/WebScraper/utils/gptVision.ts b/apps/api/src/scraper/WebScraper/utils/gptVision.ts deleted file mode 100644 index 7458a56e..00000000 --- a/apps/api/src/scraper/WebScraper/utils/gptVision.ts +++ /dev/null @@ -1,41 +0,0 @@ -export async function getImageDescription( - imageUrl: string, - backText: string, - frontText: string -): Promise { - const { OpenAI } = require("openai"); - const openai = new OpenAI(); - - try { - const response = await openai.chat.completions.create({ - model: "gpt-4-turbo", - messages: [ - { - role: "user", - content: [ - { - type: "text", - text: - "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " + - backText + - " and the following text: " + - frontText + - ". Be super concise.", - }, - { - type: "image_url", - image_url: { - url: imageUrl, - }, - }, - ], - }, - ], - }); - - return response.choices[0].message.content; - } catch (error) { - console.error("Error generating image alt text:", error?.message); - return ""; - } -} diff --git a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts new file mode 100644 index 00000000..d2db37ba --- /dev/null +++ b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts @@ -0,0 +1,98 @@ +import Anthropic from '@anthropic-ai/sdk'; +import axios from 'axios'; + +export async function getImageDescription( + imageUrl: string, + backText: string, + frontText: string, + model: string = "gpt-4-turbo" +): Promise { + try { + const prompt = "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " + + backText + + " and the following text: " + + frontText + + ". Be super concise." + + switch (model) { + case 'anthropic': { + if (!process.env.ANTHROPIC_API_KEY) { + throw new Error("No Anthropic API key provided"); + } + const imageRequest = await axios.get(imageUrl, { responseType: 'arraybuffer' }); + const imageMediaType = 'image/png'; + const imageData = Buffer.from(imageRequest.data, 'binary').toString('base64'); + + const anthropic = new Anthropic(); + const response = await anthropic.messages.create({ + model: "claude-3-opus-20240229", + max_tokens: 1024, + messages: [ + { + role: "user", + content: [ + { + type: "image", + source: { + type: "base64", + media_type: imageMediaType, + data: imageData, + }, + }, + { + type: "text", + text: prompt + } + ], + } + ] + }); + + return response.content[0].text; + + // const response = await anthropic.messages.create({ + // messages: [ + // { + // role: "user", + // content: prompt, + // }, + // ], + // }); + + } + default: { + if (!process.env.OPENAI_API_KEY) { + throw new Error("No OpenAI API key provided"); + } + + const { OpenAI } = require("openai"); + const openai = new OpenAI(); + + const response = await openai.chat.completions.create({ + model: "gpt-4-turbo", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: prompt, + }, + { + type: "image_url", + image_url: { + url: imageUrl, + }, + }, + ], + }, + ], + }); + return response.choices[0].message.content; + } + } + } catch (error) { + console.error("Error generating image alt text:", error?.message); + return ""; + } +} From ed5dc808c7f356d7a5f63a38ed42d2d087463d23 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 16 Apr 2024 18:05:07 -0300 Subject: [PATCH 02/96] Update imageDescription.ts --- .../src/scraper/WebScraper/utils/imageDescription.ts | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts index d2db37ba..a01c757a 100644 --- a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts +++ b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts @@ -49,16 +49,6 @@ export async function getImageDescription( }); return response.content[0].text; - - // const response = await anthropic.messages.create({ - // messages: [ - // { - // role: "user", - // content: prompt, - // }, - // ], - // }); - } default: { if (!process.env.OPENAI_API_KEY) { From 27674a624d93de19928f1e89a3db1e134cf300c8 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 17 Apr 2024 10:39:00 -0700 Subject: [PATCH 03/96] Update index.ts --- apps/api/src/scraper/WebScraper/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 62ea16c9..ce9c7bf2 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -16,7 +16,7 @@ export type WebScraperOptions = { maxCrawledLinks?: number; limit?: number; generateImgAltText?: boolean; - generateImgAltTextModel?: "gpt-4-turbo" | "anthropic"; + generateImgAltTextModel?: "gpt-4-turbo" | "claude-3-opus"; }; concurrentRequests?: number; }; @@ -30,7 +30,7 @@ export class WebScraperDataProvider { private limit: number = 10000; private concurrentRequests: number = 20; private generateImgAltText: boolean = false; - private generateImgAltTextModel: "gpt-4-turbo" | "anthropic" = "gpt-4-turbo"; + private generateImgAltTextModel: "gpt-4-turbo" | "claude-3-opus" = "gpt-4-turbo"; authorize(): void { throw new Error("Method not implemented."); From db15724b0c9573ad0c463fca76e96b1239be9df3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 17 Apr 2024 10:39:29 -0700 Subject: [PATCH 04/96] Update imageDescription.ts --- apps/api/src/scraper/WebScraper/utils/imageDescription.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts index a01c757a..3d780ab3 100644 --- a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts +++ b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts @@ -15,7 +15,7 @@ export async function getImageDescription( ". Be super concise." switch (model) { - case 'anthropic': { + case 'claude-3-opus': { if (!process.env.ANTHROPIC_API_KEY) { throw new Error("No Anthropic API key provided"); } From 9ab4cb478846e216c3e7bf310a0ec6013af17a71 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:13:30 -0300 Subject: [PATCH 05/96] [Bugfix] Trim and Lowercase all urls --- apps/api/src/index.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 7198988e..437c9671 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -103,10 +103,11 @@ app.post("/v0/scrape", async (req, res) => { } // authenticate on supabase - const url = req.body.url; + let url = req.body.url; if (!url) { return res.status(400).json({ error: "Url is required" }); } + url = url.trim().toLowerCase(); try { const a = new WebScraperDataProvider(); @@ -164,10 +165,12 @@ app.post("/v0/crawl", async (req, res) => { } // authenticate on supabase - const url = req.body.url; + let url = req.body.url; if (!url) { return res.status(400).json({ error: "Url is required" }); } + + url = url.trim().toLowerCase(); const mode = req.body.mode ?? "crawl"; const crawlerOptions = req.body.crawlerOptions ?? {}; @@ -225,10 +228,11 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => { } // authenticate on supabase - const url = req.body.url; + let url = req.body.url; if (!url) { return res.status(400).json({ error: "Url is required" }); } + url = url.trim().toLowerCase(); const mode = req.body.mode ?? "crawl"; const crawlerOptions = req.body.crawlerOptions ?? {}; const job = await addWebScraperJob({ From 890bde686f5bb7e94137a2a5b5aa51f1d999994d Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 19 Apr 2024 19:10:05 -0300 Subject: [PATCH 06/96] added type declarations --- apps/js-sdk/firecrawl/package.json | 3 +- apps/js-sdk/firecrawl/tsconfig.json | 8 +- apps/js-sdk/firecrawl/types/index.d.ts | 107 +++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 apps/js-sdk/firecrawl/types/index.d.ts diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 58aa5aca..811f87fe 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,8 +1,9 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.10", + "version": "0.0.11", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", + "types": "types/index.d.ts", "type": "module", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" diff --git a/apps/js-sdk/firecrawl/tsconfig.json b/apps/js-sdk/firecrawl/tsconfig.json index 5bca86d5..d7764a46 100644 --- a/apps/js-sdk/firecrawl/tsconfig.json +++ b/apps/js-sdk/firecrawl/tsconfig.json @@ -49,7 +49,7 @@ // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ /* Emit */ - // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ + "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ // "declarationMap": true, /* Create sourcemaps for d.ts files. */ // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ @@ -70,7 +70,7 @@ // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ - // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ + "declarationDir": "./types", /* Specify the output directory for generated declaration files. */ // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ /* Interop Constraints */ @@ -105,5 +105,7 @@ /* Completeness */ // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ "skipLibCheck": true /* Skip type checking all .d.ts files. */ - } + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/__tests__/*"] } diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts new file mode 100644 index 00000000..a9d04ba9 --- /dev/null +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -0,0 +1,107 @@ +import { AxiosResponse, AxiosRequestHeaders } from 'axios'; +/** + * Configuration interface for FirecrawlApp. + */ +export interface FirecrawlAppConfig { + apiKey?: string | null; +} +/** + * Generic parameter interface. + */ +export interface Params { + [key: string]: any; +} +/** + * Response interface for scraping operations. + */ +export interface ScrapeResponse { + success: boolean; + data?: any; + error?: string; +} +/** + * Response interface for crawling operations. + */ +export interface CrawlResponse { + success: boolean; + jobId?: string; + data?: any; + error?: string; +} +/** + * Response interface for job status checks. + */ +export interface JobStatusResponse { + success: boolean; + status: string; + jobId?: string; + data?: any; + error?: string; +} +/** + * Main class for interacting with the Firecrawl API. + */ +export default class FirecrawlApp { + private apiKey; + /** + * Initializes a new instance of the FirecrawlApp class. + * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. + */ + constructor({ apiKey }: FirecrawlAppConfig); + /** + * Scrapes a URL using the Firecrawl API. + * @param {string} url - The URL to scrape. + * @param {Params | null} params - Additional parameters for the scrape request. + * @returns {Promise} The response from the scrape operation. + */ + scrapeUrl(url: string, params?: Params | null): Promise; + /** + * Initiates a crawl job for a URL using the Firecrawl API. + * @param {string} url - The URL to crawl. + * @param {Params | null} params - Additional parameters for the crawl request. + * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. + * @param {number} timeout - Timeout in seconds for job status checks. + * @returns {Promise} The response from the crawl operation. + */ + crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise; + /** + * Checks the status of a crawl job using the Firecrawl API. + * @param {string} jobId - The job ID of the crawl operation. + * @returns {Promise} The response containing the job status. + */ + checkCrawlStatus(jobId: string): Promise; + /** + * Prepares the headers for an API request. + * @returns {AxiosRequestHeaders} The prepared headers. + */ + prepareHeaders(): AxiosRequestHeaders; + /** + * Sends a POST request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {Params} data - The data to send in the request. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the POST request. + */ + postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise; + /** + * Sends a GET request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the GET request. + */ + getRequest(url: string, headers: AxiosRequestHeaders): Promise; + /** + * Monitors the status of a crawl job until completion or failure. + * @param {string} jobId - The job ID of the crawl operation. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @param {number} timeout - Timeout in seconds for job status checks. + * @returns {Promise} The final job status or data. + */ + monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise; + /** + * Handles errors from API responses. + * @param {AxiosResponse} response - The response from the API. + * @param {string} action - The action being performed when the error occurred. + */ + handleError(response: AxiosResponse, action: string): void; +} From 389ac90f51339ad8da2396f170ebbdfcd6914fb7 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sat, 20 Apr 2024 09:19:09 -0700 Subject: [PATCH 07/96] Caleb: fixing some documentation and rebuilding the server --- CONTRIBUTING.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e11dae7d..224eb57b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,8 @@ # Contributing -We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. +We love contributions! Our contribution guide will be coming soon! + + + + From ddf9ff9c9acc9a6d9bc5003b95eafe3c54f25d2c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 11:46:06 -0700 Subject: [PATCH 08/96] Nick: --- apps/api/requests.http | 11 +++++++---- apps/api/src/main/runWebScraper.ts | 3 ++- apps/api/src/scraper/WebScraper/index.ts | 9 +++++++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/apps/api/requests.http b/apps/api/requests.http index 23501369..f8d87c21 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -13,12 +13,15 @@ GET http://localhost:3002/v0/jobs/active HTTP/1.1 ### Scrape Website -POST https://api.firecrawl.dev/v0/scrape HTTP/1.1 +POST http://localhost:3002/v0/crawl HTTP/1.1 Authorization: Bearer content-type: application/json { - "url":"https://www.mendable.ai" + "url":"https://www.mendable.ai", + "crawlerOptions": { + "returnOnlyUrls": true + } } @@ -34,7 +37,7 @@ content-type: application/json ### Check Job Status -GET http://localhost:3002/v0/crawl/status/333ab225-dc3e-418b-9d4b-8fb833cbaf89 HTTP/1.1 +GET http://localhost:3002/v0/crawl/status/4dbf2b62-487d-45d7-a4f7-8f5e883dfecd HTTP/1.1 Authorization: Bearer ### Get Job Result @@ -48,5 +51,5 @@ content-type: application/json } ### Check Job Status -GET https://api.firecrawl.dev/v0/crawl/status/cfcb71ac-23a3-4da5-bd85-d4e58b871d66 +GET https://api.firecrawl.dev/v0/crawl/status/abd12f69-06b2-4378-8753-118b811df59d Authorization: Bearer \ No newline at end of file diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index c43b1b38..1cc5ab06 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -66,6 +66,7 @@ export async function runWebScraper({ inProgress(progress); })) as CrawlResult[]; + if (docs.length === 0) { return { success: true, @@ -75,7 +76,7 @@ export async function runWebScraper({ } // remove docs with empty content - const filteredDocs = docs.filter((doc) => doc.content.trim().length > 0); + const filteredDocs = crawlerOptions.returnOnlyUrls ? docs : docs.filter((doc) => doc.content.trim().length > 0); onSuccess(filteredDocs); const { success, credit_usage } = await billTeam( diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index c2146be3..47d18e8e 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -80,11 +80,16 @@ export class WebScraperDataProvider { }); let links = await crawler.start(inProgress, 5, this.limit); if (this.returnOnlyUrls) { + inProgress({ + current: links.length, + total: links.length, + status: "COMPLETED", + currentDocumentUrl: this.urls[0], + }); return links.map((url) => ({ content: "", + markdown: "", metadata: { sourceURL: url }, - provider: "web", - type: "text", })); } From 1a3aa2999d2d88ff8ff8034d22b3a5bcbc39c295 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 11:59:42 -0700 Subject: [PATCH 09/96] Nick: return the only list of urls --- apps/api/src/lib/entities.ts | 4 ++++ apps/api/src/main/runWebScraper.ts | 23 +++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index e261dd4f..ac2d731b 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -28,6 +28,10 @@ export type WebScraperOptions = { concurrentRequests?: number; }; +export interface DocumentUrl { + url: string; +} + export class Document { id?: string; content: string; diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 1cc5ab06..23dd55bf 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -1,8 +1,9 @@ import { Job } from "bull"; import { CrawlResult, WebScraperOptions } from "../types"; import { WebScraperDataProvider } from "../scraper/WebScraper"; -import { Progress } from "../lib/entities"; +import { DocumentUrl, Progress } from "../lib/entities"; import { billTeam } from "../services/billing/credit_billing"; +import { Document } from "../lib/entities"; export async function startWebScraperPipeline({ job, @@ -44,7 +45,11 @@ export async function runWebScraper({ onSuccess: (result: any) => void; onError: (error: any) => void; team_id: string; -}): Promise<{ success: boolean; message: string; docs: CrawlResult[] }> { +}): Promise<{ + success: boolean; + message: string; + docs: Document[] | DocumentUrl[]; +}> { try { const provider = new WebScraperDataProvider(); if (mode === "crawl") { @@ -64,8 +69,7 @@ export async function runWebScraper({ } const docs = (await provider.getDocuments(false, (progress: Progress) => { inProgress(progress); - })) as CrawlResult[]; - + })) as Document[]; if (docs.length === 0) { return { @@ -76,7 +80,14 @@ export async function runWebScraper({ } // remove docs with empty content - const filteredDocs = crawlerOptions.returnOnlyUrls ? docs : docs.filter((doc) => doc.content.trim().length > 0); + const filteredDocs = crawlerOptions.returnOnlyUrls + ? docs.map((doc) => { + if (doc.metadata.sourceURL) { + return { url: doc.metadata.sourceURL }; + } + }) + : docs.filter((doc) => doc.content.trim().length > 0); + onSuccess(filteredDocs); const { success, credit_usage } = await billTeam( @@ -92,7 +103,7 @@ export async function runWebScraper({ }; } - return { success: true, message: "", docs: filteredDocs as CrawlResult[] }; + return { success: true, message: "", docs: filteredDocs }; } catch (error) { console.error("Error running web scraper", error); onError(error); From 6aa3cc3ce85c0d71fe6e0ae0e6f92fb007f04431 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 13:53:11 -0700 Subject: [PATCH 10/96] Nick: --- apps/api/src/main/runWebScraper.ts | 12 ++++++--- apps/api/src/services/logging/log_job.ts | 33 ++++++++++++++++++++++++ apps/api/src/services/queue-worker.ts | 19 +++++++++++++- apps/api/src/types.ts | 14 ++++++++++ 4 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 apps/api/src/services/logging/log_job.ts diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index c43b1b38..0f562a05 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -3,7 +3,7 @@ import { CrawlResult, WebScraperOptions } from "../types"; import { WebScraperDataProvider } from "../scraper/WebScraper"; import { Progress } from "../lib/entities"; import { billTeam } from "../services/billing/credit_billing"; - +import { Document } from "../lib/entities"; export async function startWebScraperPipeline({ job, }: { @@ -24,7 +24,7 @@ export async function startWebScraperPipeline({ job.moveToFailed(error); }, team_id: job.data.team_id, - })) as { success: boolean; message: string; docs: CrawlResult[] }; + })) as { success: boolean; message: string; docs: Document[] }; } export async function runWebScraper({ url, @@ -76,12 +76,12 @@ export async function runWebScraper({ // remove docs with empty content const filteredDocs = docs.filter((doc) => doc.content.trim().length > 0); - onSuccess(filteredDocs); const { success, credit_usage } = await billTeam( team_id, filteredDocs.length ); + if (!success) { // throw new Error("Failed to bill team, no subscription was found"); return { @@ -91,7 +91,11 @@ export async function runWebScraper({ }; } - return { success: true, message: "", docs: filteredDocs as CrawlResult[] }; + // This is where the returnvalue from the job is set + onSuccess(filteredDocs); + + // this return doesn't matter too much for the job completion result + return { success: true, message: "", docs: filteredDocs }; } catch (error) { console.error("Error running web scraper", error); onError(error); diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts new file mode 100644 index 00000000..cb7e6487 --- /dev/null +++ b/apps/api/src/services/logging/log_job.ts @@ -0,0 +1,33 @@ +import { supabase_service } from "../supabase"; +import { FirecrawlJob } from "../../types"; +import "dotenv/config"; + +export async function logJob(job: FirecrawlJob) { + try { + // Only log jobs in production + if (process.env.ENV !== "production") { + return; + } + const { data, error } = await supabase_service + .from("firecrawl_jobs") + .insert([ + { + success: job.success, + message: job.message, + num_docs: job.num_docs, + docs: job.docs, + time_taken: job.time_taken, + team_id: job.team_id, + mode: job.mode, + url: job.url, + crawler_options: job.crawlerOptions, + page_options: job.pageOptions, + }, + ]); + if (error) { + console.error("Error logging job:\n", error); + } + } catch (error) { + console.error("Error logging job:\n", error); + } +} diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index c9c5f73e..d4364012 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -4,6 +4,7 @@ import "dotenv/config"; import { logtail } from "./logtail"; import { startWebScraperPipeline } from "../main/runWebScraper"; import { callWebhook } from "./webhook"; +import { logJob } from "./logging/log_job"; getWebScraperQueue().process( Math.floor(Number(process.env.NUM_WORKERS_PER_QUEUE ?? 8)), @@ -15,8 +16,11 @@ getWebScraperQueue().process( current_step: "SCRAPING", current_url: "", }); + const start = Date.now(); const { success, message, docs } = await startWebScraperPipeline({ job }); - + const end = Date.now(); + const timeTakenInSeconds = (end - start) / 1000; + const data = { success: success, result: { @@ -29,6 +33,19 @@ getWebScraperQueue().process( }; await callWebhook(job.data.team_id, data); + + await logJob({ + success: success, + message: message, + num_docs: docs.length, + docs: docs, + time_taken: timeTakenInSeconds, + team_id: job.data.team_id, + mode: "crawl", + url: job.data.url, + crawlerOptions: job.data.crawlerOptions, + pageOptions: job.data.pageOptions, + }); done(null, data); } catch (error) { if (error instanceof CustomError) { diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 2123e0c2..7803d93a 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -25,4 +25,18 @@ export interface WebScraperOptions { } +export interface FirecrawlJob { + success: boolean; + message: string; + num_docs: number; + docs: any[]; + time_taken: number; + team_id: string; + mode: string; + url: string; + crawlerOptions?: any; + pageOptions?: any; +} + + From 408c7a479f62dd0a50c72481c524a6a18d95432f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 14:02:22 -0700 Subject: [PATCH 11/96] Nick: rate limit fixes --- apps/api/src/index.ts | 16 +++++++++------- apps/api/src/services/rate-limiter.ts | 19 +++++++++++++++++-- apps/api/src/types.ts | 8 ++++++++ 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 98be9458..fcd26b79 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -9,6 +9,7 @@ import { WebScraperDataProvider } from "./scraper/WebScraper"; import { billTeam, checkTeamCredits } from "./services/billing/credit_billing"; import { getRateLimiter, redisClient } from "./services/rate-limiter"; import { parseApi } from "./lib/parseApi"; +import { RateLimiterMode } from "./types"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); @@ -46,7 +47,7 @@ app.get("/test", async (req, res) => { res.send("Hello, world!"); }); -async function authenticateUser(req, res, mode?: string): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> { +async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> { const authHeader = req.headers.authorization; if (!authHeader) { return { success: false, error: "Unauthorized", status: 401 }; @@ -56,12 +57,13 @@ async function authenticateUser(req, res, mode?: string): Promise<{ success: boo return { success: false, error: "Unauthorized: Token missing", status: 401 }; } + + try { const incomingIP = (req.headers["x-forwarded-for"] || req.socket.remoteAddress) as string; const iptoken = incomingIP + token; - await getRateLimiter( - token === "this_is_just_a_preview_token" ? true : false + await getRateLimiter((token === "this_is_just_a_preview_token") ? RateLimiterMode.Preview : mode ).consume(iptoken); } catch (rateLimiterRes) { console.error(rateLimiterRes); @@ -88,7 +90,7 @@ async function authenticateUser(req, res, mode?: string): Promise<{ success: boo app.post("/v0/scrape", async (req, res) => { try { // make sure to authenticate user first, Bearer - const { success, team_id, error, status } = await authenticateUser(req, res, "scrape"); + const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Scrape); if (!success) { return res.status(status).json({ error }); } @@ -164,7 +166,7 @@ app.post("/v0/scrape", async (req, res) => { app.post("/v0/crawl", async (req, res) => { try { - const { success, team_id, error, status } = await authenticateUser(req, res, "crawl"); + const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Crawl); if (!success) { return res.status(status).json({ error }); } @@ -230,7 +232,7 @@ app.post("/v0/crawl", async (req, res) => { }); app.post("/v0/crawlWebsitePreview", async (req, res) => { try { - const { success, team_id, error, status } = await authenticateUser(req, res, "scrape"); + const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Crawl); if (!success) { return res.status(status).json({ error }); } @@ -259,7 +261,7 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => { app.get("/v0/crawl/status/:jobId", async (req, res) => { try { - const { success, team_id, error, status } = await authenticateUser(req, res, "scrape"); + const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.CrawlStatus); if (!success) { return res.status(status).json({ error }); } diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 5812f5d3..dcd05da4 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -1,5 +1,6 @@ import { RateLimiterRedis } from "rate-limiter-flexible"; import * as redis from "redis"; +import { RateLimiterMode } from "../../src/types"; const MAX_REQUESTS_PER_MINUTE_PREVIEW = 5; const MAX_CRAWLS_PER_MINUTE_STARTER = 2; @@ -8,6 +9,9 @@ const MAX_CRAWLS_PER_MINUTE_SCALE = 20; const MAX_REQUESTS_PER_MINUTE_ACCOUNT = 20; +const MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS = 120; + + export const redisClient = redis.createClient({ @@ -29,6 +33,13 @@ export const serverRateLimiter = new RateLimiterRedis({ duration: 60, // Duration in seconds }); +export const crawlStatusRateLimiter = new RateLimiterRedis({ + storeClient: redisClient, + keyPrefix: "middleware", + points: MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS, + duration: 60, // Duration in seconds +}); + export function crawlRateLimit(plan: string){ if(plan === "standard"){ @@ -56,9 +67,13 @@ export function crawlRateLimit(plan: string){ } -export function getRateLimiter(preview: boolean){ - if(preview){ + + +export function getRateLimiter(mode: RateLimiterMode){ + if(mode === RateLimiterMode.Preview){ return previewRateLimiter; + }else if(mode === RateLimiterMode.CrawlStatus){ + return crawlStatusRateLimiter; }else{ return serverRateLimiter; } diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 2123e0c2..9442176e 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -26,3 +26,11 @@ export interface WebScraperOptions { +export enum RateLimiterMode { + Crawl = "crawl", + CrawlStatus = "crawl-status", + Scrape = "scrape", + Preview = "preview", +} + + From 43c2e877e7a40add2a20bf86603bd7e27b668249 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 14:05:01 -0700 Subject: [PATCH 12/96] Update index.ts --- apps/api/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index fcd26b79..271d96d5 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -232,7 +232,7 @@ app.post("/v0/crawl", async (req, res) => { }); app.post("/v0/crawlWebsitePreview", async (req, res) => { try { - const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Crawl); + const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Preview); if (!success) { return res.status(status).json({ error }); } From 5b3c75b06e3756bfc09a469ee9f029582bbc16c7 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 14:10:29 -0700 Subject: [PATCH 13/96] Nick: --- apps/api/src/index.ts | 2 +- apps/api/src/services/rate-limiter.ts | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 271d96d5..0fbd91e4 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -70,7 +70,7 @@ async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<{ suc return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 }; } - if (token === "this_is_just_a_preview_token" && mode === "scrape") { + if (token === "this_is_just_a_preview_token" && (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview)) { return { success: true, team_id: "preview" }; } diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index dcd05da4..b1ee5625 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -70,11 +70,12 @@ export function crawlRateLimit(plan: string){ export function getRateLimiter(mode: RateLimiterMode){ - if(mode === RateLimiterMode.Preview){ - return previewRateLimiter; - }else if(mode === RateLimiterMode.CrawlStatus){ - return crawlStatusRateLimiter; - }else{ - return serverRateLimiter; + switch(mode) { + case RateLimiterMode.Preview: + return previewRateLimiter; + case RateLimiterMode.CrawlStatus: + return crawlStatusRateLimiter; + default: + return serverRateLimiter; } } From 23b2190e5df0b7559a634b412b97a6a23152eeaa Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 16:38:05 -0700 Subject: [PATCH 14/96] Nick: --- apps/api/jest.config.js | 3 + apps/api/src/controllers/auth.ts | 67 ++++++ apps/api/src/controllers/crawl-status.ts | 36 +++ apps/api/src/controllers/crawl.ts | 77 +++++++ apps/api/src/controllers/crawlPreview.ts | 37 ++++ apps/api/src/controllers/scrape.ts | 104 +++++++++ apps/api/src/controllers/status.ts | 25 +++ apps/api/src/index.ts | 270 +---------------------- apps/api/src/routes/v0.ts | 14 ++ 9 files changed, 369 insertions(+), 264 deletions(-) create mode 100644 apps/api/src/controllers/auth.ts create mode 100644 apps/api/src/controllers/crawl-status.ts create mode 100644 apps/api/src/controllers/crawl.ts create mode 100644 apps/api/src/controllers/crawlPreview.ts create mode 100644 apps/api/src/controllers/scrape.ts create mode 100644 apps/api/src/controllers/status.ts create mode 100644 apps/api/src/routes/v0.ts diff --git a/apps/api/jest.config.js b/apps/api/jest.config.js index c0992574..28544523 100644 --- a/apps/api/jest.config.js +++ b/apps/api/jest.config.js @@ -2,4 +2,7 @@ module.exports = { preset: "ts-jest", testEnvironment: "node", setupFiles: ["./jest.setup.js"], + // ignore dist folder root dir + modulePathIgnorePatterns: ["/dist/"], + }; diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts new file mode 100644 index 00000000..76bacbe0 --- /dev/null +++ b/apps/api/src/controllers/auth.ts @@ -0,0 +1,67 @@ +import { parseApi } from "../../src/lib/parseApi"; +import { getRateLimiter } from "../../src/services/rate-limiter"; +import { RateLimiterMode } from "../../src/types"; +import { supabase_service } from "../../src/services/supabase"; + +export async function authenticateUser( + req, + res, + mode?: RateLimiterMode +): Promise<{ + success: boolean; + team_id?: string; + error?: string; + status?: number; +}> { + const authHeader = req.headers.authorization; + if (!authHeader) { + return { success: false, error: "Unauthorized", status: 401 }; + } + const token = authHeader.split(" ")[1]; // Extract the token from "Bearer " + if (!token) { + return { + success: false, + error: "Unauthorized: Token missing", + status: 401, + }; + } + + try { + const incomingIP = (req.headers["x-forwarded-for"] || + req.socket.remoteAddress) as string; + const iptoken = incomingIP + token; + await getRateLimiter( + token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode + ).consume(iptoken); + } catch (rateLimiterRes) { + console.error(rateLimiterRes); + return { + success: false, + error: "Rate limit exceeded. Too many requests, try again in 1 minute.", + status: 429, + }; + } + + if ( + token === "this_is_just_a_preview_token" && + (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview) + ) { + return { success: true, team_id: "preview" }; + } + + const normalizedApi = parseApi(token); + // make sure api key is valid, based on the api_keys table in supabase + const { data, error } = await supabase_service + .from("api_keys") + .select("*") + .eq("key", normalizedApi); + if (error || !data || data.length === 0) { + return { + success: false, + error: "Unauthorized: Invalid token", + status: 401, + }; + } + + return { success: true, team_id: data[0].team_id }; +} diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/crawl-status.ts new file mode 100644 index 00000000..3534cd16 --- /dev/null +++ b/apps/api/src/controllers/crawl-status.ts @@ -0,0 +1,36 @@ +import { Request, Response } from "express"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../src/types"; +import { addWebScraperJob } from "../../src/services/queue-jobs"; +import { getWebScraperQueue } from "../../src/services/queue-service"; + +export async function crawlStatusController(req: Request, res: Response) { + try { + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.CrawlStatus + ); + if (!success) { + return res.status(status).json({ error }); + } + const job = await getWebScraperQueue().getJob(req.params.jobId); + if (!job) { + return res.status(404).json({ error: "Job not found" }); + } + + const { current, current_url, total, current_step } = await job.progress(); + res.json({ + status: await job.getState(), + // progress: job.progress(), + current: current, + current_url: current_url, + current_step: current_step, + total: total, + data: job.returnvalue, + }); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts new file mode 100644 index 00000000..2f7f8426 --- /dev/null +++ b/apps/api/src/controllers/crawl.ts @@ -0,0 +1,77 @@ +import { Request, Response } from "express"; +import { WebScraperDataProvider } from "../../src/scraper/WebScraper"; +import { billTeam } from "../../src/services/billing/credit_billing"; +import { checkTeamCredits } from "../../src/services/billing/credit_billing"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../src/types"; +import { addWebScraperJob } from "../../src/services/queue-jobs"; + +export async function crawlController(req: Request, res: Response) { + try { + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Crawl + ); + if (!success) { + return res.status(status).json({ error }); + } + + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); + } + + // authenticate on supabase + const url = req.body.url; + if (!url) { + return res.status(400).json({ error: "Url is required" }); + } + const mode = req.body.mode ?? "crawl"; + const crawlerOptions = req.body.crawlerOptions ?? {}; + const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + + if (mode === "single_urls" && !url.includes(",")) { + try { + const a = new WebScraperDataProvider(); + await a.setOptions({ + mode: "single_urls", + urls: [url], + crawlerOptions: { + returnOnlyUrls: true, + }, + pageOptions: pageOptions, + }); + + const docs = await a.getDocuments(false, (progress) => { + job.progress({ + current: progress.current, + total: progress.total, + current_step: "SCRAPING", + current_url: progress.currentDocumentUrl, + }); + }); + return res.json({ + success: true, + documents: docs, + }); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } + } + const job = await addWebScraperJob({ + url: url, + mode: mode ?? "crawl", // fix for single urls not working + crawlerOptions: { ...crawlerOptions }, + team_id: team_id, + pageOptions: pageOptions, + }); + + res.json({ jobId: job.id }); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts new file mode 100644 index 00000000..641468c4 --- /dev/null +++ b/apps/api/src/controllers/crawlPreview.ts @@ -0,0 +1,37 @@ +import { Request, Response } from "express"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../src/types"; +import { addWebScraperJob } from "../../src/services/queue-jobs"; + +export async function crawlPreviewController(req: Request, res: Response) { + try { + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Preview + ); + if (!success) { + return res.status(status).json({ error }); + } + // authenticate on supabase + const url = req.body.url; + if (!url) { + return res.status(400).json({ error: "Url is required" }); + } + const mode = req.body.mode ?? "crawl"; + const crawlerOptions = req.body.crawlerOptions ?? {}; + const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + const job = await addWebScraperJob({ + url: url, + mode: mode ?? "crawl", // fix for single urls not working + crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 }, + team_id: "preview", + pageOptions: pageOptions, + }); + + res.json({ jobId: job.id }); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts new file mode 100644 index 00000000..91735338 --- /dev/null +++ b/apps/api/src/controllers/scrape.ts @@ -0,0 +1,104 @@ +import { Request, Response } from "express"; +import { WebScraperDataProvider } from "../../src/scraper/WebScraper"; +import { billTeam } from "../../src/services/billing/credit_billing"; +import { checkTeamCredits } from "../../src/services/billing/credit_billing"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../src/types"; +import { logJob } from "../../src/services/logging/log_job"; +import { Document } from "../../src/lib/entities"; + +export async function scrapeHelper( + req: Request, + team_id: string, + crawlerOptions: any, + pageOptions: any +) : Promise<{ success: boolean; error?: string; data?: Document }> { + const url = req.body.url; + if (!url) { + throw new Error("Url is required"); + } + + const a = new WebScraperDataProvider(); + await a.setOptions({ + mode: "single_urls", + urls: [url], + crawlerOptions: { + ...crawlerOptions, + }, + pageOptions: pageOptions, + }); + + const docs = await a.getDocuments(false); + // make sure doc.content is not empty + const filteredDocs = docs.filter( + (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 + ); + if (filteredDocs.length === 0) { + return { success: true, error: "No pages found" }; + } + const { success, credit_usage } = await billTeam( + team_id, + filteredDocs.length + ); + if (!success) { + return { + success: false, + error: "Failed to bill team. Insufficient credits or subscription not found.", + }; + } + return { + success: true, + data: filteredDocs[0], + }; +} + +export async function scrapeController(req: Request, res: Response) { + try { + // make sure to authenticate user first, Bearer + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Scrape + ); + if (!success) { + return res.status(status).json({ error }); + } + const crawlerOptions = req.body.crawlerOptions ?? {}; + const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + + try { + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); + } + } catch (error) { + console.error(error); + return res.status(500).json({ error: "Internal server error" }); + } + + const result = await scrapeHelper( + req, + team_id, + crawlerOptions, + pageOptions + ); + logJob({ + success: result.success, + message: result.error, + num_docs: result.data.length, + docs: result.data, + time_taken: 0, + team_id: team_id, + mode: "scrape", + url: req.body.url, + crawlerOptions: crawlerOptions, + pageOptions: pageOptions, + }); + return res.json(result); + + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/status.ts b/apps/api/src/controllers/status.ts new file mode 100644 index 00000000..bd1d2ead --- /dev/null +++ b/apps/api/src/controllers/status.ts @@ -0,0 +1,25 @@ +import { Request, Response } from "express"; +import { getWebScraperQueue } from "../../src/services/queue-service"; + +export async function crawlJobStatusPreviewController(req: Request, res: Response) { + try { + const job = await getWebScraperQueue().getJob(req.params.jobId); + if (!job) { + return res.status(404).json({ error: "Job not found" }); + } + + const { current, current_url, total, current_step } = await job.progress(); + res.json({ + status: await job.getState(), + // progress: job.progress(), + current: current, + current_url: current_url, + current_step: current_step, + total: total, + data: job.returnvalue, + }); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 0fbd91e4..57a05f21 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -10,6 +10,7 @@ import { billTeam, checkTeamCredits } from "./services/billing/credit_billing"; import { getRateLimiter, redisClient } from "./services/rate-limiter"; import { parseApi } from "./lib/parseApi"; import { RateLimiterMode } from "./types"; +import { v0Router } from "./routes/v0"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); @@ -17,7 +18,6 @@ const { ExpressAdapter } = require("@bull-board/express"); export const app = express(); - global.isProduction = process.env.IS_PRODUCTION === "true"; app.use(bodyParser.urlencoded({ extended: true })); @@ -47,267 +47,8 @@ app.get("/test", async (req, res) => { res.send("Hello, world!"); }); -async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> { - const authHeader = req.headers.authorization; - if (!authHeader) { - return { success: false, error: "Unauthorized", status: 401 }; - } - const token = authHeader.split(" ")[1]; // Extract the token from "Bearer " - if (!token) { - return { success: false, error: "Unauthorized: Token missing", status: 401 }; - } - - - - try { - const incomingIP = (req.headers["x-forwarded-for"] || - req.socket.remoteAddress) as string; - const iptoken = incomingIP + token; - await getRateLimiter((token === "this_is_just_a_preview_token") ? RateLimiterMode.Preview : mode - ).consume(iptoken); - } catch (rateLimiterRes) { - console.error(rateLimiterRes); - return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 }; - } - - if (token === "this_is_just_a_preview_token" && (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview)) { - return { success: true, team_id: "preview" }; - } - - const normalizedApi = parseApi(token); - // make sure api key is valid, based on the api_keys table in supabase - const { data, error } = await supabase_service - .from("api_keys") - .select("*") - .eq("key", normalizedApi); - if (error || !data || data.length === 0) { - return { success: false, error: "Unauthorized: Invalid token", status: 401 }; - } - - return { success: true, team_id: data[0].team_id }; -} - -app.post("/v0/scrape", async (req, res) => { - try { - // make sure to authenticate user first, Bearer - const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Scrape); - if (!success) { - return res.status(status).json({ error }); - } - const crawlerOptions = req.body.crawlerOptions ?? {}; - - try { - const { success: creditsCheckSuccess, message: creditsCheckMessage } = - await checkTeamCredits(team_id, 1); - if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); - } - } catch (error) { - console.error(error); - return res.status(500).json({ error: "Internal server error" }); - } - - // authenticate on supabase - const url = req.body.url; - if (!url) { - return res.status(400).json({ error: "Url is required" }); - } - - const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; - - try { - const a = new WebScraperDataProvider(); - await a.setOptions({ - mode: "single_urls", - urls: [url], - crawlerOptions: { - ...crawlerOptions, - }, - pageOptions: pageOptions, - }); - - const docs = await a.getDocuments(false); - // make sure doc.content is not empty - const filteredDocs = docs.filter( - (doc: { content?: string }) => - doc.content && doc.content.trim().length > 0 - ); - if (filteredDocs.length === 0) { - return res.status(200).json({ success: true, data: [] }); - } - const { success, credit_usage } = await billTeam( - team_id, - filteredDocs.length - ); - if (!success) { - // throw new Error("Failed to bill team, no subscription was found"); - // return { - // success: false, - // message: "Failed to bill team, no subscription was found", - // docs: [], - // }; - return res - .status(402) - .json({ error: "Failed to bill, no subscription was found" }); - } - return res.json({ - success: true, - data: filteredDocs[0], - }); - } catch (error) { - console.error(error); - return res.status(500).json({ error: error.message }); - } - } catch (error) { - console.error(error); - return res.status(500).json({ error: error.message }); - } -}); - -app.post("/v0/crawl", async (req, res) => { - try { - const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Crawl); - if (!success) { - return res.status(status).json({ error }); - } - - const { success: creditsCheckSuccess, message: creditsCheckMessage } = - await checkTeamCredits(team_id, 1); - if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); - } - - // authenticate on supabase - const url = req.body.url; - if (!url) { - return res.status(400).json({ error: "Url is required" }); - } - const mode = req.body.mode ?? "crawl"; - const crawlerOptions = req.body.crawlerOptions ?? {}; - const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; - - if (mode === "single_urls" && !url.includes(",")) { - try { - const a = new WebScraperDataProvider(); - await a.setOptions({ - mode: "single_urls", - urls: [url], - crawlerOptions: { - returnOnlyUrls: true, - }, - pageOptions: pageOptions, - }); - - const docs = await a.getDocuments(false, (progress) => { - job.progress({ - current: progress.current, - total: progress.total, - current_step: "SCRAPING", - current_url: progress.currentDocumentUrl, - }); - }); - return res.json({ - success: true, - documents: docs, - }); - } catch (error) { - console.error(error); - return res.status(500).json({ error: error.message }); - } - } - const job = await addWebScraperJob({ - url: url, - mode: mode ?? "crawl", // fix for single urls not working - crawlerOptions: { ...crawlerOptions }, - team_id: team_id, - pageOptions: pageOptions, - - }); - - res.json({ jobId: job.id }); - } catch (error) { - console.error(error); - return res.status(500).json({ error: error.message }); - } -}); -app.post("/v0/crawlWebsitePreview", async (req, res) => { - try { - const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.Preview); - if (!success) { - return res.status(status).json({ error }); - } - // authenticate on supabase - const url = req.body.url; - if (!url) { - return res.status(400).json({ error: "Url is required" }); - } - const mode = req.body.mode ?? "crawl"; - const crawlerOptions = req.body.crawlerOptions ?? {}; - const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; - const job = await addWebScraperJob({ - url: url, - mode: mode ?? "crawl", // fix for single urls not working - crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 }, - team_id: "preview", - pageOptions: pageOptions, - }); - - res.json({ jobId: job.id }); - } catch (error) { - console.error(error); - return res.status(500).json({ error: error.message }); - } -}); - -app.get("/v0/crawl/status/:jobId", async (req, res) => { - try { - const { success, team_id, error, status } = await authenticateUser(req, res, RateLimiterMode.CrawlStatus); - if (!success) { - return res.status(status).json({ error }); - } - const job = await getWebScraperQueue().getJob(req.params.jobId); - if (!job) { - return res.status(404).json({ error: "Job not found" }); - } - - const { current, current_url, total, current_step } = await job.progress(); - res.json({ - status: await job.getState(), - // progress: job.progress(), - current: current, - current_url: current_url, - current_step: current_step, - total: total, - data: job.returnvalue, - }); - } catch (error) { - console.error(error); - return res.status(500).json({ error: error.message }); - } -}); - -app.get("/v0/checkJobStatus/:jobId", async (req, res) => { - try { - const job = await getWebScraperQueue().getJob(req.params.jobId); - if (!job) { - return res.status(404).json({ error: "Job not found" }); - } - - const { current, current_url, total, current_step } = await job.progress(); - res.json({ - status: await job.getState(), - // progress: job.progress(), - current: current, - current_url: current_url, - current_step: current_step, - total: total, - data: job.returnvalue, - }); - } catch (error) { - console.error(error); - return res.status(500).json({ error: error.message }); - } -}); +// register router +app.use(v0Router); const DEFAULT_PORT = process.env.PORT ?? 3002; const HOST = process.env.HOST ?? "localhost"; @@ -316,7 +57,9 @@ redisClient.connect(); export function startServer(port = DEFAULT_PORT) { const server = app.listen(Number(port), HOST, () => { console.log(`Server listening on port ${port}`); - console.log(`For the UI, open http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues`); + console.log( + `For the UI, open http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues` + ); console.log(""); console.log("1. Make sure Redis is running on port 6379 by default"); console.log( @@ -353,4 +96,3 @@ app.get(`/admin/${process.env.BULL_AUTH_KEY}/queues`, async (req, res) => { app.get("/is-production", (req, res) => { res.send({ isProduction: global.isProduction }); }); - diff --git a/apps/api/src/routes/v0.ts b/apps/api/src/routes/v0.ts new file mode 100644 index 00000000..023282a9 --- /dev/null +++ b/apps/api/src/routes/v0.ts @@ -0,0 +1,14 @@ +import express from "express"; +import { crawlController } from "../../src/controllers/crawl"; +import { crawlStatusController } from "../../src/controllers/crawl-status"; +import { scrapeController } from "../../src/controllers/scrape"; +import { crawlPreviewController } from "../../src/controllers/crawlPreview"; +import { crawlJobStatusPreviewController } from "../../src/controllers/status"; + +export const v0Router = express.Router(); + +v0Router.post("/v0/scrape", scrapeController); +v0Router.post("/v0/crawl", crawlController); +v0Router.post("/v0/crawlWebsitePreview", crawlPreviewController); +v0Router.get("/v0/crawl/status/:jobId", crawlStatusController); +v0Router.get("/v0/checkJobStatus/:jobId", crawlJobStatusPreviewController); From 5b8aed26dd85a9d5f23e0fd865882dfd5b14a865 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 18:55:39 -0700 Subject: [PATCH 15/96] Update scrape.ts --- apps/api/src/controllers/scrape.ts | 57 +++++++++++++++++------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 91735338..04fe525c 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -5,17 +5,22 @@ import { checkTeamCredits } from "../../src/services/billing/credit_billing"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../src/types"; import { logJob } from "../../src/services/logging/log_job"; -import { Document } from "../../src/lib/entities"; +import { Document } from "../../src/lib/entities"; export async function scrapeHelper( req: Request, team_id: string, crawlerOptions: any, pageOptions: any -) : Promise<{ success: boolean; error?: string; data?: Document }> { +): Promise<{ + success: boolean; + error?: string; + data?: Document; + returnCode?: number; +}> { const url = req.body.url; if (!url) { - throw new Error("Url is required"); + return { success: false, error: "Url is required", returnCode: 400 }; } const a = new WebScraperDataProvider(); @@ -34,7 +39,7 @@ export async function scrapeHelper( (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 ); if (filteredDocs.length === 0) { - return { success: true, error: "No pages found" }; + return { success: true, error: "No page found", returnCode: 200 }; } const { success, credit_usage } = await billTeam( team_id, @@ -43,12 +48,15 @@ export async function scrapeHelper( if (!success) { return { success: false, - error: "Failed to bill team. Insufficient credits or subscription not found.", + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, }; } return { success: true, data: filteredDocs[0], + returnCode: 200, }; } @@ -77,26 +85,25 @@ export async function scrapeController(req: Request, res: Response) { return res.status(500).json({ error: "Internal server error" }); } - const result = await scrapeHelper( - req, - team_id, - crawlerOptions, - pageOptions - ); - logJob({ - success: result.success, - message: result.error, - num_docs: result.data.length, - docs: result.data, - time_taken: 0, - team_id: team_id, - mode: "scrape", - url: req.body.url, - crawlerOptions: crawlerOptions, - pageOptions: pageOptions, - }); - return res.json(result); - + const result = await scrapeHelper( + req, + team_id, + crawlerOptions, + pageOptions + ); + logJob({ + success: result.success, + message: result.error, + num_docs: 1, + docs: [result.data], + time_taken: 0, + team_id: team_id, + mode: "scrape", + url: req.body.url, + crawlerOptions: crawlerOptions, + pageOptions: pageOptions, + }); + return res.json(result); } catch (error) { console.error(error); return res.status(500).json({ error: error.message }); From 4543c57e4e70dfe072c86c01c77f90a4df535979 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 19:04:27 -0700 Subject: [PATCH 16/96] Nick: --- apps/api/.env.local | 1 + apps/api/src/controllers/scrape.ts | 15 +++++++-------- apps/api/src/index.ts | 8 +------- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/apps/api/.env.local b/apps/api/.env.local index f5c625f2..6c58f190 100644 --- a/apps/api/.env.local +++ b/apps/api/.env.local @@ -1,3 +1,4 @@ +ENV= NUM_WORKERS_PER_QUEUE=8 PORT= HOST= diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 04fe525c..51d14f2d 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -1,11 +1,10 @@ import { Request, Response } from "express"; -import { WebScraperDataProvider } from "../../src/scraper/WebScraper"; -import { billTeam } from "../../src/services/billing/credit_billing"; -import { checkTeamCredits } from "../../src/services/billing/credit_billing"; +import { WebScraperDataProvider } from "../scraper/WebScraper"; +import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../../src/types"; -import { logJob } from "../../src/services/logging/log_job"; -import { Document } from "../../src/lib/entities"; +import { RateLimiterMode } from "../types"; +import { logJob } from "../services/logging/log_job"; +import { Document } from "../lib/entities"; export async function scrapeHelper( req: Request, @@ -16,7 +15,7 @@ export async function scrapeHelper( success: boolean; error?: string; data?: Document; - returnCode?: number; + returnCode: number; }> { const url = req.body.url; if (!url) { @@ -103,7 +102,7 @@ export async function scrapeController(req: Request, res: Response) { crawlerOptions: crawlerOptions, pageOptions: pageOptions, }); - return res.json(result); + return res.status(result.returnCode).json(result); } catch (error) { console.error(error); return res.status(500).json({ error: error.message }); diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 57a05f21..1a42eb44 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -3,13 +3,7 @@ import bodyParser from "body-parser"; import cors from "cors"; import "dotenv/config"; import { getWebScraperQueue } from "./services/queue-service"; -import { addWebScraperJob } from "./services/queue-jobs"; -import { supabase_service } from "./services/supabase"; -import { WebScraperDataProvider } from "./scraper/WebScraper"; -import { billTeam, checkTeamCredits } from "./services/billing/credit_billing"; -import { getRateLimiter, redisClient } from "./services/rate-limiter"; -import { parseApi } from "./lib/parseApi"; -import { RateLimiterMode } from "./types"; +import { redisClient } from "./services/rate-limiter"; import { v0Router } from "./routes/v0"; const { createBullBoard } = require("@bull-board/api"); From 0db0874b00742e7e7a6439a975501a397da5d6b8 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 19:37:45 -0700 Subject: [PATCH 17/96] Nick: --- apps/api/src/controllers/crawl.ts | 2 ++ apps/api/src/controllers/crawlPreview.ts | 2 ++ apps/api/src/controllers/scrape.ts | 8 ++++++-- apps/api/src/main/runWebScraper.ts | 10 +++++++--- apps/api/src/services/logging/log_job.ts | 3 ++- apps/api/src/services/queue-worker.ts | 6 ++++-- apps/api/src/services/webhook.ts | 9 +++++++-- apps/api/src/types.ts | 2 ++ 8 files changed, 32 insertions(+), 10 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 2f7f8426..17cfa625 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -42,6 +42,7 @@ export async function crawlController(req: Request, res: Response) { returnOnlyUrls: true, }, pageOptions: pageOptions, + }); const docs = await a.getDocuments(false, (progress) => { @@ -67,6 +68,7 @@ export async function crawlController(req: Request, res: Response) { crawlerOptions: { ...crawlerOptions }, team_id: team_id, pageOptions: pageOptions, + origin: req.body.origin ?? "api", }); res.json({ jobId: job.id }); diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts index 641468c4..3f28ef60 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/crawlPreview.ts @@ -21,12 +21,14 @@ export async function crawlPreviewController(req: Request, res: Response) { const mode = req.body.mode ?? "crawl"; const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + const job = await addWebScraperJob({ url: url, mode: mode ?? "crawl", // fix for single urls not working crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 }, team_id: "preview", pageOptions: pageOptions, + origin: "website-preview", }); res.json({ jobId: job.id }); diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 51d14f2d..632fff59 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -72,6 +72,7 @@ export async function scrapeController(req: Request, res: Response) { } const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + const origin = req.body.origin ?? "api"; try { const { success: creditsCheckSuccess, message: creditsCheckMessage } = @@ -83,24 +84,27 @@ export async function scrapeController(req: Request, res: Response) { console.error(error); return res.status(500).json({ error: "Internal server error" }); } - + const startTime = new Date().getTime(); const result = await scrapeHelper( req, team_id, crawlerOptions, pageOptions ); + const endTime = new Date().getTime(); + const timeTakenInSeconds = (endTime - startTime) / 1000; logJob({ success: result.success, message: result.error, num_docs: 1, docs: [result.data], - time_taken: 0, + time_taken: timeTakenInSeconds, team_id: team_id, mode: "scrape", url: req.body.url, crawlerOptions: crawlerOptions, pageOptions: pageOptions, + origin: origin, }); return res.status(result.returnCode).json(result); } catch (error) { diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 0f562a05..d9434291 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -44,7 +44,11 @@ export async function runWebScraper({ onSuccess: (result: any) => void; onError: (error: any) => void; team_id: string; -}): Promise<{ success: boolean; message: string; docs: CrawlResult[] }> { +}): Promise<{ + success: boolean; + message: string; + docs: CrawlResult[]; +}> { try { const provider = new WebScraperDataProvider(); if (mode === "crawl") { @@ -70,7 +74,7 @@ export async function runWebScraper({ return { success: true, message: "No pages found", - docs: [], + docs: [] }; } @@ -87,7 +91,7 @@ export async function runWebScraper({ return { success: false, message: "Failed to bill team, no subscription was found", - docs: [], + docs: [] }; } diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index cb7e6487..639b3a8c 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -17,11 +17,12 @@ export async function logJob(job: FirecrawlJob) { num_docs: job.num_docs, docs: job.docs, time_taken: job.time_taken, - team_id: job.team_id, + team_id: job.team_id === "preview" ? null : job.team_id, mode: job.mode, url: job.url, crawler_options: job.crawlerOptions, page_options: job.pageOptions, + origin: job.origin, }, ]); if (error) { diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index d4364012..dda876a7 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -17,10 +17,11 @@ getWebScraperQueue().process( current_url: "", }); const start = Date.now(); + console.log("Processing job", job.data); const { success, message, docs } = await startWebScraperPipeline({ job }); const end = Date.now(); const timeTakenInSeconds = (end - start) / 1000; - + const data = { success: success, result: { @@ -33,7 +34,7 @@ getWebScraperQueue().process( }; await callWebhook(job.data.team_id, data); - + await logJob({ success: success, message: message, @@ -45,6 +46,7 @@ getWebScraperQueue().process( url: job.data.url, crawlerOptions: job.data.crawlerOptions, pageOptions: job.data.pageOptions, + origin: job.data.origin, }); done(null, data); } catch (error) { diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index a086425c..ab1f90ea 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -1,6 +1,7 @@ import { supabase_service } from "./supabase"; export const callWebhook = async (teamId: string, data: any) => { + try { const { data: webhooksData, error } = await supabase_service .from('webhooks') .select('url') @@ -37,5 +38,9 @@ export const callWebhook = async (teamId: string, data: any) => { data: dataToSend, error: data.error || undefined, }), - }); -} \ No newline at end of file + }); + } catch (error) { + console.error(`Error sending webhook for team ID: ${teamId}`, error.message); + } +}; + diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index e3fc5dc7..f9e5c739 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -22,6 +22,7 @@ export interface WebScraperOptions { crawlerOptions: any; pageOptions: any; team_id: string; + origin?: string; } @@ -36,6 +37,7 @@ export interface FirecrawlJob { url: string; crawlerOptions?: any; pageOptions?: any; + origin: string; } From 9b31e68a7ef64ededa0531bece1fb340e72a9e70 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 19:38:44 -0700 Subject: [PATCH 18/96] Update queue-worker.ts --- apps/api/src/services/queue-worker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index dda876a7..8d7a7bd9 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -17,7 +17,7 @@ getWebScraperQueue().process( current_url: "", }); const start = Date.now(); - console.log("Processing job", job.data); + const { success, message, docs } = await startWebScraperPipeline({ job }); const end = Date.now(); const timeTakenInSeconds = (end - start) / 1000; From b361a76282e88b678d31306d1469f609f4d135a1 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sat, 20 Apr 2024 19:53:04 -0700 Subject: [PATCH 19/96] Caleb: added logging improvement --- .gitignore | 2 ++ apps/api/.env.local | 14 -------------- apps/api/src/__tests__/e2e/index.test.ts | 14 ++++++++++++-- apps/api/src/services/logtail.ts | 23 +++++++++++++++++++---- 4 files changed, 33 insertions(+), 20 deletions(-) delete mode 100644 apps/api/.env.local diff --git a/.gitignore b/.gitignore index cbfb076f..90290129 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ dump.rdb /mongo-data apps/js-sdk/node_modules/ + +apps/api/.env.local diff --git a/apps/api/.env.local b/apps/api/.env.local deleted file mode 100644 index f5c625f2..00000000 --- a/apps/api/.env.local +++ /dev/null @@ -1,14 +0,0 @@ -NUM_WORKERS_PER_QUEUE=8 -PORT= -HOST= -SUPABASE_ANON_TOKEN= -SUPABASE_URL= -SUPABASE_SERVICE_TOKEN= -REDIS_URL= -SCRAPING_BEE_API_KEY= -OPENAI_API_KEY= -BULL_AUTH_KEY= -LOGTAIL_KEY= -PLAYWRIGHT_MICROSERVICE_URL= -LLAMAPARSE_API_KEY= -TEST_API_KEY= \ No newline at end of file diff --git a/apps/api/src/__tests__/e2e/index.test.ts b/apps/api/src/__tests__/e2e/index.test.ts index 554453b2..ebf87c6e 100644 --- a/apps/api/src/__tests__/e2e/index.test.ts +++ b/apps/api/src/__tests__/e2e/index.test.ts @@ -3,12 +3,20 @@ import { app } from '../../index'; import dotenv from 'dotenv'; dotenv.config(); -const TEST_URL = 'http://localhost:3002' + +// const TEST_URL = 'http://localhost:3002' +const TEST_URL = 'http://127.0.0.1:3002' + + + + describe('E2E Tests for API Routes', () => { describe('GET /', () => { it('should return Hello, world! message', async () => { - const response = await request(TEST_URL).get('/'); + + const response = await request(TEST_URL).get('/'); + expect(response.statusCode).toBe(200); expect(response.text).toContain('SCRAPERS-JS: Hello, world! Fly.io'); }); @@ -16,6 +24,8 @@ describe('E2E Tests for API Routes', () => { describe('GET /test', () => { it('should return Hello, world! message', async () => { + + const response = await request(TEST_URL).get('/test'); expect(response.statusCode).toBe(200); expect(response.text).toContain('Hello, world!'); diff --git a/apps/api/src/services/logtail.ts b/apps/api/src/services/logtail.ts index 19ab7730..8b86a6b1 100644 --- a/apps/api/src/services/logtail.ts +++ b/apps/api/src/services/logtail.ts @@ -1,4 +1,19 @@ -const { Logtail } = require("@logtail/node"); -//dot env -require("dotenv").config(); -export const logtail = new Logtail(process.env.LOGTAIL_KEY); +import { Logtail } from "@logtail/node"; +import "dotenv/config"; + +// A mock Logtail class to handle cases where LOGTAIL_KEY is not provided +class MockLogtail { + info(message: string, context?: Record): void { + console.log(message, context); + } + error(message: string, context: Record = {}): void { + console.error(message, context); + } +} + +// Using the actual Logtail class if LOGTAIL_KEY exists, otherwise using the mock class +// Additionally, print a warning to the terminal if LOGTAIL_KEY is not provided +export const logtail = process.env.LOGTAIL_KEY ? new Logtail(process.env.LOGTAIL_KEY) : (() => { + console.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more."); + return new MockLogtail(); +})(); From e6b46178ddbe9678036e2c11e51030007a2998ee Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sat, 20 Apr 2024 19:53:27 -0700 Subject: [PATCH 20/96] Caleb: added .env.example --- apps/api/.env.example | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 apps/api/.env.example diff --git a/apps/api/.env.example b/apps/api/.env.example new file mode 100644 index 00000000..392db9a2 --- /dev/null +++ b/apps/api/.env.example @@ -0,0 +1,18 @@ +# Required +NUM_WORKERS_PER_QUEUE=8 +PORT= +HOST= +SUPABASE_ANON_TOKEN= +SUPABASE_URL= +SUPABASE_SERVICE_TOKEN= +REDIS_URL= + +# Optional + +SCRAPING_BEE_API_KEY= +OPENAI_API_KEY= +BULL_AUTH_KEY= +LOGTAIL_KEY= +PLAYWRIGHT_MICROSERVICE_URL= +LLAMAPARSE_API_KEY= +TEST_API_KEY= \ No newline at end of file From d2f808a5fd272f7a9fd845980d2ac0e21147fb99 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 19:54:37 -0700 Subject: [PATCH 21/96] Update queue-worker.ts --- apps/api/src/services/queue-worker.ts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 8d7a7bd9..78ea0301 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -17,7 +17,7 @@ getWebScraperQueue().process( current_url: "", }); const start = Date.now(); - + const { success, message, docs } = await startWebScraperPipeline({ job }); const end = Date.now(); const timeTakenInSeconds = (end - start) / 1000; @@ -74,6 +74,19 @@ getWebScraperQueue().process( "Something went wrong... Contact help@mendable.ai or try again." /* etc... */, }; await callWebhook(job.data.team_id, data); + await logJob({ + success: false, + message: typeof error === 'string' ? error : (error.message ?? "Something went wrong... Contact help@mendable.ai"), + num_docs: 0, + docs: [], + time_taken: 0, + team_id: job.data.team_id, + mode: "crawl", + url: job.data.url, + crawlerOptions: job.data.crawlerOptions, + pageOptions: job.data.pageOptions, + origin: job.data.origin, + }); done(null, data); } } From be75aaa195ade4e41e8225cad7ba06e5df661385 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sun, 21 Apr 2024 09:31:22 -0700 Subject: [PATCH 22/96] Caleb: first version of supabase proxy to make db authentication optional --- apps/api/src/controllers/auth.ts | 11 +++++++ apps/api/src/controllers/crawl.ts | 15 +++++---- apps/api/src/controllers/scrape.ts | 26 ++++++++------- apps/api/src/services/supabase.ts | 53 +++++++++++++++++++++++++++--- 4 files changed, 83 insertions(+), 22 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 76bacbe0..6ae234dd 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -3,6 +3,7 @@ import { getRateLimiter } from "../../src/services/rate-limiter"; import { RateLimiterMode } from "../../src/types"; import { supabase_service } from "../../src/services/supabase"; + export async function authenticateUser( req, res, @@ -13,6 +14,16 @@ export async function authenticateUser( error?: string; status?: number; }> { + + console.log(process.env) + + if(process.env.USE_DB_AUTHENTICATION === "false"){ + console.log("WARNING - YOU'RE bypassing Authentication"); + return { success: true}; + } + + console.log("USING SUPABASE AUTH"); + const authHeader = req.headers.authorization; if (!authHeader) { return { success: false, error: "Unauthorized", status: 401 }; diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 17cfa625..36c013e3 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -8,6 +8,8 @@ import { addWebScraperJob } from "../../src/services/queue-jobs"; export async function crawlController(req: Request, res: Response) { try { + + console.log("hello") const { success, team_id, error, status } = await authenticateUser( req, res, @@ -16,14 +18,15 @@ export async function crawlController(req: Request, res: Response) { if (!success) { return res.status(status).json({ error }); } - - const { success: creditsCheckSuccess, message: creditsCheckMessage } = - await checkTeamCredits(team_id, 1); - if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); + + if (process.env.USE_DB_AUTHENTICATION === "true") { + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); + } } - // authenticate on supabase const url = req.body.url; if (!url) { return res.status(400).json({ error: "Url is required" }); diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 632fff59..47b00f04 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -40,18 +40,22 @@ export async function scrapeHelper( if (filteredDocs.length === 0) { return { success: true, error: "No page found", returnCode: 200 }; } - const { success, credit_usage } = await billTeam( - team_id, - filteredDocs.length - ); - if (!success) { - return { - success: false, - error: - "Failed to bill team. Insufficient credits or subscription not found.", - returnCode: 402, - }; + + if (process.env.USE_DB_AUTHENTICATION === "true") { + const { success, credit_usage } = await billTeam( + team_id, + filteredDocs.length + ); + if (!success) { + return { + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, + }; + } } + return { success: true, data: filteredDocs[0], diff --git a/apps/api/src/services/supabase.ts b/apps/api/src/services/supabase.ts index 49121faf..9a2366d9 100644 --- a/apps/api/src/services/supabase.ts +++ b/apps/api/src/services/supabase.ts @@ -1,6 +1,49 @@ -import { createClient } from "@supabase/supabase-js"; +import { createClient, SupabaseClient } from '@supabase/supabase-js'; -export const supabase_service = createClient( - process.env.SUPABASE_URL, - process.env.SUPABASE_SERVICE_TOKEN, -); +// SupabaseService class initializes the Supabase client conditionally based on environment variables. +class SupabaseService { + private client: SupabaseClient | null = null; + + constructor() { + const supabaseUrl = process.env.SUPABASE_URL; + const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN; + + // Only initialize the Supabase client if both URL and Service Token are provided. + if (process.env.USE_DB_AUTHENTICATION === "false") { + + // Warn the user that Authentication is disabled by setting the client to null + console.warn("\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"); + this.client = null; + } else if (!supabaseUrl || !supabaseServiceToken) { + console.error("\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"); + } else { + this.client = createClient(supabaseUrl, supabaseServiceToken); + } + } + + // Provides access to the initialized Supabase client, if available. + getClient(): SupabaseClient | null { + return this.client; + } +} + +// Using a Proxy to handle dynamic access to the Supabase client or service methods. +// This approach ensures that if Supabase is not configured, any attempt to use it will result in a clear error. +export const supabase_service: SupabaseClient = new Proxy(new SupabaseService(), { + get: function (target, prop, receiver) { + const client = target.getClient(); + // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. + if (client === null) { + console.error("Attempted to access Supabase client when it's not configured."); + return () => { + throw new Error("Supabase client is not configured."); + }; + } + // Direct access to SupabaseService properties takes precedence. + if (prop in target) { + return Reflect.get(target, prop, receiver); + } + // Otherwise, delegate access to the Supabase client. + return Reflect.get(client, prop, receiver); + } +}) as unknown as SupabaseClient; \ No newline at end of file From 5cdbf3a0ac1838219813e064b1bf8d35fc2d538f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 10:36:48 -0700 Subject: [PATCH 23/96] Nick: cleaner functions to handle authenticated requests that dont require ifs everywhere --- apps/api/src/controllers/auth.ts | 18 +++---- apps/api/src/controllers/crawl.ts | 16 +++--- apps/api/src/controllers/scrape.ts | 2 - apps/api/src/lib/withAuth.ts | 19 +++++++ .../src/services/billing/credit_billing.ts | 10 +++- apps/api/src/services/supabase.ts | 51 +++++++++++-------- apps/api/src/types.ts | 10 ++-- 7 files changed, 76 insertions(+), 50 deletions(-) create mode 100644 apps/api/src/lib/withAuth.ts diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 6ae234dd..49b2146a 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -1,10 +1,15 @@ import { parseApi } from "../../src/lib/parseApi"; import { getRateLimiter } from "../../src/services/rate-limiter"; -import { RateLimiterMode } from "../../src/types"; +import { AuthResponse, RateLimiterMode } from "../../src/types"; import { supabase_service } from "../../src/services/supabase"; +import { withAuth } from "../../src/lib/withAuth"; -export async function authenticateUser( +export async function authenticateUser(req, res, mode?: RateLimiterMode) : Promise { + return withAuth(supaAuthenticateUser)(req, res, mode); +} + +export async function supaAuthenticateUser( req, res, mode?: RateLimiterMode @@ -15,15 +20,6 @@ export async function authenticateUser( status?: number; }> { - console.log(process.env) - - if(process.env.USE_DB_AUTHENTICATION === "false"){ - console.log("WARNING - YOU'RE bypassing Authentication"); - return { success: true}; - } - - console.log("USING SUPABASE AUTH"); - const authHeader = req.headers.authorization; if (!authHeader) { return { success: false, error: "Unauthorized", status: 401 }; diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 36c013e3..1fb26988 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -8,8 +8,7 @@ import { addWebScraperJob } from "../../src/services/queue-jobs"; export async function crawlController(req: Request, res: Response) { try { - - console.log("hello") + console.log("hello"); const { success, team_id, error, status } = await authenticateUser( req, res, @@ -18,13 +17,11 @@ export async function crawlController(req: Request, res: Response) { if (!success) { return res.status(status).json({ error }); } - - if (process.env.USE_DB_AUTHENTICATION === "true") { - const { success: creditsCheckSuccess, message: creditsCheckMessage } = - await checkTeamCredits(team_id, 1); - if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); - } + + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); } const url = req.body.url; @@ -45,7 +42,6 @@ export async function crawlController(req: Request, res: Response) { returnOnlyUrls: true, }, pageOptions: pageOptions, - }); const docs = await a.getDocuments(false, (progress) => { diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 47b00f04..be708008 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -41,7 +41,6 @@ export async function scrapeHelper( return { success: true, error: "No page found", returnCode: 200 }; } - if (process.env.USE_DB_AUTHENTICATION === "true") { const { success, credit_usage } = await billTeam( team_id, filteredDocs.length @@ -54,7 +53,6 @@ export async function scrapeHelper( returnCode: 402, }; } - } return { success: true, diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts new file mode 100644 index 00000000..3ed8906b --- /dev/null +++ b/apps/api/src/lib/withAuth.ts @@ -0,0 +1,19 @@ +import { AuthResponse } from "../../src/types"; + +export function withAuth( + originalFunction: (...args: U) => Promise +) { + return async function (...args: U): Promise { + if (process.env.USE_DB_AUTHENTICATION === "false") { + console.warn("WARNING - You're bypassing authentication"); + return { success: true } as T; + } else { + try { + return await originalFunction(...args); + } catch (error) { + console.error("Error in withAuth function: ", error); + return { success: false, error: error.message } as T; + } + } + }; +} diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 6ac08436..bf5be60e 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -1,7 +1,12 @@ +import { withAuth } from "../../lib/withAuth"; import { supabase_service } from "../supabase"; const FREE_CREDITS = 100; + export async function billTeam(team_id: string, credits: number) { + return withAuth(supaBillTeam)(team_id, credits); +} +export async function supaBillTeam(team_id: string, credits: number) { if (team_id === "preview") { return { success: true, message: "Preview team, no credits used" }; } @@ -52,8 +57,11 @@ export async function billTeam(team_id: string, credits: number) { return { success: true, credit_usage }; } -// if team has enough credits for the operation, return true, else return false export async function checkTeamCredits(team_id: string, credits: number) { + return withAuth(supaCheckTeamCredits)(team_id, credits); +} +// if team has enough credits for the operation, return true, else return false +export async function supaCheckTeamCredits(team_id: string, credits: number) { if (team_id === "preview") { return { success: true, message: "Preview team, no credits used" }; } diff --git a/apps/api/src/services/supabase.ts b/apps/api/src/services/supabase.ts index 9a2366d9..fa6404d7 100644 --- a/apps/api/src/services/supabase.ts +++ b/apps/api/src/services/supabase.ts @@ -1,4 +1,4 @@ -import { createClient, SupabaseClient } from '@supabase/supabase-js'; +import { createClient, SupabaseClient } from "@supabase/supabase-js"; // SupabaseService class initializes the Supabase client conditionally based on environment variables. class SupabaseService { @@ -7,15 +7,17 @@ class SupabaseService { constructor() { const supabaseUrl = process.env.SUPABASE_URL; const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN; - // Only initialize the Supabase client if both URL and Service Token are provided. if (process.env.USE_DB_AUTHENTICATION === "false") { - // Warn the user that Authentication is disabled by setting the client to null - console.warn("\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"); + console.warn( + "\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m" + ); this.client = null; } else if (!supabaseUrl || !supabaseServiceToken) { - console.error("\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"); + console.error( + "\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m" + ); } else { this.client = createClient(supabaseUrl, supabaseServiceToken); } @@ -29,21 +31,26 @@ class SupabaseService { // Using a Proxy to handle dynamic access to the Supabase client or service methods. // This approach ensures that if Supabase is not configured, any attempt to use it will result in a clear error. -export const supabase_service: SupabaseClient = new Proxy(new SupabaseService(), { - get: function (target, prop, receiver) { - const client = target.getClient(); - // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. - if (client === null) { - console.error("Attempted to access Supabase client when it's not configured."); - return () => { - throw new Error("Supabase client is not configured."); - }; - } - // Direct access to SupabaseService properties takes precedence. - if (prop in target) { - return Reflect.get(target, prop, receiver); - } - // Otherwise, delegate access to the Supabase client. - return Reflect.get(client, prop, receiver); +export const supabase_service: SupabaseClient = new Proxy( + new SupabaseService(), + { + get: function (target, prop, receiver) { + const client = target.getClient(); + // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. + if (client === null) { + console.error( + "Attempted to access Supabase client when it's not configured." + ); + return () => { + throw new Error("Supabase client is not configured."); + }; + } + // Direct access to SupabaseService properties takes precedence. + if (prop in target) { + return Reflect.get(target, prop, receiver); + } + // Otherwise, delegate access to the Supabase client. + return Reflect.get(client, prop, receiver); + }, } -}) as unknown as SupabaseClient; \ No newline at end of file +) as unknown as SupabaseClient; diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index f9e5c739..7f527fba 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -25,7 +25,6 @@ export interface WebScraperOptions { origin?: string; } - export interface FirecrawlJob { success: boolean; message: string; @@ -40,8 +39,6 @@ export interface FirecrawlJob { origin: string; } - - export enum RateLimiterMode { Crawl = "crawl", CrawlStatus = "crawl-status", @@ -49,4 +46,9 @@ export enum RateLimiterMode { Preview = "preview", } - +export interface AuthResponse { + success: boolean; + team_id?: string; + error?: string; + status?: number; +} From ef4ffd3a18e3b1c31a51d7fb3a53544f574a6c27 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sun, 21 Apr 2024 10:56:30 -0700 Subject: [PATCH 24/96] Adding contributors guide --- apps/api/.env.example | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 9a4541c1..34e24b1f 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -1,18 +1,24 @@ -ENV= -NUM_WORKERS_PER_QUEUE=8 -PORT= -HOST= -SUPABASE_ANON_TOKEN= -SUPABASE_URL= +# ===== Required ENVS ====== +NUM_WORKERS_PER_QUEUE=8 +PORT=3002 +HOST=0.0.0.0 +REDIS_URL=redis://localhost:6379 + +## To turn on DB authentication, you need to set up supabase. +USE_DB_AUTHENTICATION=true + +# ===== Optional ENVS ====== + +# Supabase Setup (used to support DB authentication, advanced logging, etc.) +SUPABASE_ANON_TOKEN= +SUPABASE_URL= SUPABASE_SERVICE_TOKEN= -REDIS_URL= -# Optional - -SCRAPING_BEE_API_KEY= -OPENAI_API_KEY= -BULL_AUTH_KEY= -LOGTAIL_KEY= -PLAYWRIGHT_MICROSERVICE_URL= -LLAMAPARSE_API_KEY= -TEST_API_KEY= \ No newline at end of file +# Other Optionals +TEST_API_KEY= # use if you've set up authentication and want to test with a real API key +SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking +OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) +BULL_AUTH_KEY= # +LOGTAIL_KEY= # Use if you're configuring basic logging with logtail +PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback +LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs \ No newline at end of file From 401f992c562fd94cb6034bab882c7a70839d4468 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sun, 21 Apr 2024 11:19:40 -0700 Subject: [PATCH 25/96] Caleb: added contributors guide --- CONTRIBUTING.md | 94 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 224eb57b..5d4b69e7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,8 +1,96 @@ -# Contributing -We love contributions! Our contribution guide will be coming soon! +# Contributors guide: - +Welcome to firecrawl 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) + +If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue! + + +## Hosting locally + +First, start by installing dependencies +1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) +2. pnpm [instructions](https://pnpm.io/installation) +3. redis - [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/) + + +Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example. + +To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features ) + +```.env +# ===== Required ENVS ====== +NUM_WORKERS_PER_QUEUE=8 +PORT=3002 +HOST=0.0.0.0 +REDIS_URL=redis://localhost:6379 + +## To turn on DB authentication, you need to set up supabase. +USE_DB_AUTHENTICATION=false + +# ===== Optional ENVS ====== + +# Supabase Setup (used to support DB authentication, advanced logging, etc.) +SUPABASE_ANON_TOKEN= +SUPABASE_URL= +SUPABASE_SERVICE_TOKEN= + +# Other Optionals +TEST_API_KEY= # use if you've set up authentication and want to test with a real API key +SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking +OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) +BULL_AUTH_KEY= # +LOGTAIL_KEY= # Use if you're configuring basic logging with logtail +PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback +LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs + +``` + +You're going to need to open 3 terminals. + +### Terminal 1 - setting up redis + +Run the command anywhere within your project + +`redis-server` + + +### Terminal 2 - setting up workers + +Now, navigate to the apps/api/ directory and run: +`pnpm run workers` + +### Terminal 3 - setting up the main server + + +To do this, navigate to the apps/api/ directory and run if you don’t have this already, install pnpm here: https://pnpm.io/installation +Next, run your server with`pnpm run start` + + + +### Terminal 3 - sending our first request. + +Alright: now let’s send our first request. + +```curl +curl -X GET http://localhost:3002/test +``` +This should return the response Hello, world! + + +If you’d like to test the crawl endpoint, you can run this + +```curl +curl -X POST http://localhost:3002/v0/crawl \ + -H 'Content-Type: application/json' \ + -d '{ + "url": "https://mendable.ai" + }' +``` + +## Tests: + +The best way to do this is run the test with npx:Once again, navigate to the `apps/api` directory`npx jest` From 898d729a8455785082e2015e695604d1c3c3ff0c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:27:31 -0700 Subject: [PATCH 26/96] Nick: tests --- apps/api/src/__tests__/e2e/index.test.ts | 346 +++++++++--------- .../src/__tests__/e2e_noAuth/index.test.ts | 156 ++++++++ apps/api/src/controllers/crawl.ts | 1 - apps/api/src/index.ts | 2 +- 4 files changed, 334 insertions(+), 171 deletions(-) create mode 100644 apps/api/src/__tests__/e2e_noAuth/index.test.ts diff --git a/apps/api/src/__tests__/e2e/index.test.ts b/apps/api/src/__tests__/e2e/index.test.ts index ebf87c6e..ba01a7ca 100644 --- a/apps/api/src/__tests__/e2e/index.test.ts +++ b/apps/api/src/__tests__/e2e/index.test.ts @@ -1,189 +1,197 @@ -import request from 'supertest'; -import { app } from '../../index'; -import dotenv from 'dotenv'; +import request from "supertest"; +import { app } from "../../index"; +import dotenv from "dotenv"; dotenv.config(); // const TEST_URL = 'http://localhost:3002' -const TEST_URL = 'http://127.0.0.1:3002' +const TEST_URL = "http://127.0.0.1:3002"; - - - -describe('E2E Tests for API Routes', () => { - describe('GET /', () => { - it('should return Hello, world! message', async () => { - - const response = await request(TEST_URL).get('/'); - - expect(response.statusCode).toBe(200); - expect(response.text).toContain('SCRAPERS-JS: Hello, world! Fly.io'); - }); - }); - - describe('GET /test', () => { - it('should return Hello, world! message', async () => { - - - const response = await request(TEST_URL).get('/test'); - expect(response.statusCode).toBe(200); - expect(response.text).toContain('Hello, world!'); - }); - }); - - describe('POST /v0/scrape', () => { - it('should require authorization', async () => { - const response = await request(app).post('/v0/scrape'); - expect(response.statusCode).toBe(401); + describe("E2E Tests for API Routes", () => { + beforeAll(() => { + process.env.USE_DB_AUTHENTICATION = "true"; }); - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/scrape') - .set('Authorization', `Bearer invalid-api-key`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(401); + afterAll(() => { + delete process.env.USE_DB_AUTHENTICATION; }); - it('should return a successful response with a valid preview token', async () => { - const response = await request(TEST_URL) - .post('/v0/scrape') - .set('Authorization', `Bearer this_is_just_a_preview_token`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - }, 10000); // 10 seconds timeout + describe("GET /", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/"); - it('should return a successful response with a valid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/scrape') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('data'); - expect(response.body.data).toHaveProperty('content'); - expect(response.body.data).toHaveProperty('markdown'); - expect(response.body.data).toHaveProperty('metadata'); - expect(response.body.data.content).toContain('🔥 FireCrawl'); - }, 30000); // 30 seconds timeout - }); - - describe('POST /v0/crawl', () => { - it('should require authorization', async () => { - const response = await request(TEST_URL).post('/v0/crawl'); - expect(response.statusCode).toBe(401); - }); - - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawl') - .set('Authorization', `Bearer invalid-api-key`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(401); - }); - - it('should return a successful response with a valid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawl') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('jobId'); - expect(response.body.jobId).toMatch(/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/); - }); - - // Additional tests for insufficient credits? - }); - - describe('POST /v0/crawlWebsitePreview', () => { - it('should require authorization', async () => { - const response = await request(TEST_URL).post('/v0/crawlWebsitePreview'); - expect(response.statusCode).toBe(401); - }); - - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawlWebsitePreview') - .set('Authorization', `Bearer invalid-api-key`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(401); - }); - - it('should return a successful response with a valid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawlWebsitePreview') - .set('Authorization', `Bearer this_is_just_a_preview_token`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('jobId'); - expect(response.body.jobId).toMatch(/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/); - }); - }); - - describe('GET /v0/crawl/status/:jobId', () => { - it('should require authorization', async () => { - const response = await request(TEST_URL).get('/v0/crawl/status/123'); - expect(response.statusCode).toBe(401); - }); - - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .get('/v0/crawl/status/123') - .set('Authorization', `Bearer invalid-api-key`); - expect(response.statusCode).toBe(401); - }); - - it('should return Job not found for invalid job ID', async () => { - const response = await request(TEST_URL) - .get('/v0/crawl/status/invalidJobId') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`); - expect(response.statusCode).toBe(404); - }); - - it('should return a successful response for a valid crawl job', async () => { - const crawlResponse = await request(TEST_URL) - .post('/v0/crawl') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(crawlResponse.statusCode).toBe(200); - - - const response = await request(TEST_URL) - .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`); expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('status'); - expect(response.body.status).toBe('active'); + expect(response.text).toContain("SCRAPERS-JS: Hello, world! Fly.io"); + }); + }); + + describe("GET /test", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/test"); + expect(response.statusCode).toBe(200); + expect(response.text).toContain("Hello, world!"); + }); + }); + + describe("POST /v0/scrape", () => { + it("should require authorization", async () => { + const response = await request(app).post("/v0/scrape"); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(401); + }); + it("should return a successful response with a valid preview token", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Authorization", `Bearer this_is_just_a_preview_token`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + }, 10000); // 10 seconds timeout + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("content"); + expect(response.body.data).toHaveProperty("markdown"); + expect(response.body.data).toHaveProperty("metadata"); + expect(response.body.data.content).toContain("🔥 FireCrawl"); + }, 30000); // 30 seconds timeout + }); + + describe("POST /v0/crawl", () => { + it("should require authorization", async () => { + const response = await request(TEST_URL).post("/v0/crawl"); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(401); + }); + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + + // Additional tests for insufficient credits? + }); + + describe("POST /v0/crawlWebsitePreview", () => { + it("should require authorization", async () => { + const response = await request(TEST_URL).post( + "/v0/crawlWebsitePreview" + ); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(401); + }); + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Authorization", `Bearer this_is_just_a_preview_token`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + }); + + describe("GET /v0/crawl/status/:jobId", () => { + it("should require authorization", async () => { + const response = await request(TEST_URL).get("/v0/crawl/status/123"); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .get("/v0/crawl/status/123") + .set("Authorization", `Bearer invalid-api-key`); + expect(response.statusCode).toBe(401); + }); + + it("should return Job not found for invalid job ID", async () => { + const response = await request(TEST_URL) + .get("/v0/crawl/status/invalidJobId") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + expect(response.statusCode).toBe(404); + }); + + it("should return a successful response for a valid crawl job", async () => { + const crawlResponse = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(crawlResponse.statusCode).toBe(200); + + const response = await request(TEST_URL) + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("status"); + expect(response.body.status).toBe("active"); // wait for 30 seconds await new Promise((r) => setTimeout(r, 30000)); const completedResponse = await request(TEST_URL) - .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`); + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); expect(completedResponse.statusCode).toBe(200); - expect(completedResponse.body).toHaveProperty('status'); - expect(completedResponse.body.status).toBe('completed'); - expect(completedResponse.body).toHaveProperty('data'); - expect(completedResponse.body.data[0]).toHaveProperty('content'); - expect(completedResponse.body.data[0]).toHaveProperty('markdown'); - expect(completedResponse.body.data[0]).toHaveProperty('metadata'); - expect(completedResponse.body.data[0].content).toContain('🔥 FireCrawl'); - }, 60000); // 60 seconds - }); + expect(completedResponse.body).toHaveProperty("status"); + expect(completedResponse.body.status).toBe("completed"); + expect(completedResponse.body).toHaveProperty("data"); + expect(completedResponse.body.data[0]).toHaveProperty("content"); + expect(completedResponse.body.data[0]).toHaveProperty("markdown"); + expect(completedResponse.body.data[0]).toHaveProperty("metadata"); + expect(completedResponse.body.data[0].content).toContain( + "🔥 FireCrawl" + ); + }, 60000); // 60 seconds + }); - describe('GET /is-production', () => { - it('should return the production status', async () => { - const response = await request(TEST_URL).get('/is-production'); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('isProduction'); + describe("GET /is-production", () => { + it("should return the production status", async () => { + const response = await request(TEST_URL).get("/is-production"); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("isProduction"); + }); }); }); -}); \ No newline at end of file diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts new file mode 100644 index 00000000..e0aca36f --- /dev/null +++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts @@ -0,0 +1,156 @@ +import request from "supertest"; +import { app } from "../../index"; +import dotenv from "dotenv"; +const fs = require("fs"); +const path = require("path"); + +dotenv.config(); + +const TEST_URL = "http://127.0.0.1:3002"; + +describe("E2E Tests for API Routes with No Authentication", () => { + let originalEnv: NodeJS.ProcessEnv; + + // save original process.env + beforeAll(() => { + originalEnv = { ...process.env }; + process.env.USE_DB_AUTHENTICATION = "false"; + process.env.SUPABASE_ANON_TOKEN = ""; + process.env.SUPABASE_URL = ""; + process.env.SUPABASE_SERVICE_TOKEN = ""; + process.env.SCRAPING_BEE_API_KEY = ""; + process.env.OPENAI_API_KEY = ""; + process.env.BULL_AUTH_KEY = ""; + process.env.LOGTAIL_KEY = ""; + process.env.PLAYWRIGHT_MICROSERVICE_URL = ""; + process.env.LLAMAPARSE_API_KEY = ""; + process.env.TEST_API_KEY = ""; + }); + + // restore original process.env + afterAll(() => { + process.env = originalEnv; + }); + + + describe("GET /", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/"); + expect(response.statusCode).toBe(200); + expect(response.text).toContain("SCRAPERS-JS: Hello, world! Fly.io"); + }); + }); + + describe("GET /test", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/test"); + expect(response.statusCode).toBe(200); + expect(response.text).toContain("Hello, world!"); + }); + }); + + describe("POST /v0/scrape", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).post("/v0/scrape"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return a successful response", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + }, 10000); // 10 seconds timeout + }); + + describe("POST /v0/crawl", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).post("/v0/crawl"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return a successful response", async () => { + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + }); + + describe("POST /v0/crawlWebsitePreview", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).post("/v0/crawlWebsitePreview"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return a successful response", async () => { + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + }); + + describe("GET /v0/crawl/status/:jobId", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).get("/v0/crawl/status/123"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return Job not found for invalid job ID", async () => { + const response = await request(TEST_URL).get( + "/v0/crawl/status/invalidJobId" + ); + expect(response.statusCode).toBe(404); + }); + + it("should return a successful response for a valid crawl job", async () => { + const crawlResponse = await request(TEST_URL) + .post("/v0/crawl") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(crawlResponse.statusCode).toBe(200); + + const response = await request(TEST_URL).get( + `/v0/crawl/status/${crawlResponse.body.jobId}` + ); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("status"); + expect(response.body.status).toBe("active"); + + // wait for 30 seconds + await new Promise((r) => setTimeout(r, 30000)); + + const completedResponse = await request(TEST_URL).get( + `/v0/crawl/status/${crawlResponse.body.jobId}` + ); + expect(completedResponse.statusCode).toBe(200); + expect(completedResponse.body).toHaveProperty("status"); + expect(completedResponse.body.status).toBe("completed"); + expect(completedResponse.body).toHaveProperty("data"); + expect(completedResponse.body.data[0]).toHaveProperty("content"); + expect(completedResponse.body.data[0]).toHaveProperty("markdown"); + expect(completedResponse.body.data[0]).toHaveProperty("metadata"); + expect(completedResponse.body.data[0].content).toContain("🔥 FireCrawl"); + }, 60000); // 60 seconds + }); + + describe("GET /is-production", () => { + it("should return the production status", async () => { + const response = await request(TEST_URL).get("/is-production"); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("isProduction"); + }); + }); +}); diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 1fb26988..bd3fecaf 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -8,7 +8,6 @@ import { addWebScraperJob } from "../../src/services/queue-jobs"; export async function crawlController(req: Request, res: Response) { try { - console.log("hello"); const { success, team_id, error, status } = await authenticateUser( req, res, diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 1a42eb44..a2e5c517 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -5,7 +5,6 @@ import "dotenv/config"; import { getWebScraperQueue } from "./services/queue-service"; import { redisClient } from "./services/rate-limiter"; import { v0Router } from "./routes/v0"; - const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { ExpressAdapter } = require("@bull-board/express"); @@ -48,6 +47,7 @@ const DEFAULT_PORT = process.env.PORT ?? 3002; const HOST = process.env.HOST ?? "localhost"; redisClient.connect(); + export function startServer(port = DEFAULT_PORT) { const server = app.listen(Number(port), HOST, () => { console.log(`Server listening on port ${port}`); From 52620bab16e087bfa4c9d1f11ca91af8f1f79632 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:39:36 -0700 Subject: [PATCH 27/96] Nick: prod and local-no-auth tests --- .github/workflows/ci.yml | 2 +- apps/api/package.json | 2 ++ apps/api/src/__tests__/{e2e => e2e_withAuth}/index.test.ts | 0 apps/api/src/lib/withAuth.ts | 7 ++++++- 4 files changed, 9 insertions(+), 2 deletions(-) rename apps/api/src/__tests__/{e2e => e2e_withAuth}/index.test.ts (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9a5b794..69a8a243 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,5 +54,5 @@ jobs: id: start_workers - name: Run E2E tests run: | - npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false + npm run test:prod working-directory: ./apps/api \ No newline at end of file diff --git a/apps/api/package.json b/apps/api/package.json index cbce4bed..0b533f99 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -11,6 +11,8 @@ "start:dev": "nodemon --exec ts-node src/index.ts", "build": "tsc", "test": "jest --verbose", + "test:local-no-auth":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", + "test:prod":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "workers": "nodemon --exec ts-node src/services/queue-worker.ts", "worker:production": "node dist/src/services/queue-worker.js", "mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest", diff --git a/apps/api/src/__tests__/e2e/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts similarity index 100% rename from apps/api/src/__tests__/e2e/index.test.ts rename to apps/api/src/__tests__/e2e_withAuth/index.test.ts diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts index 3ed8906b..ea5aa4d8 100644 --- a/apps/api/src/lib/withAuth.ts +++ b/apps/api/src/lib/withAuth.ts @@ -1,11 +1,16 @@ import { AuthResponse } from "../../src/types"; +let warningCount = 0; + export function withAuth( originalFunction: (...args: U) => Promise ) { return async function (...args: U): Promise { if (process.env.USE_DB_AUTHENTICATION === "false") { - console.warn("WARNING - You're bypassing authentication"); + if (warningCount < 5) { + console.warn("WARNING - You're bypassing authentication"); + warningCount++; + } return { success: true } as T; } else { try { From 30a8482a68e42084a36a892921854fa49144b524 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:41:34 -0700 Subject: [PATCH 28/96] Nick: --- README.md | 2 +- SELF_HOST.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 56f8c5cd..f6b67b71 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ We provide an easy to use API with our hosted version. You can find the playgrou - [ ] LangchainJS - Coming Soon -Self-host. To self-host refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/SELF_HOST.md). +To run locally, refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md). ### API Key diff --git a/SELF_HOST.md b/SELF_HOST.md index ba0ae234..8d1d490f 100644 --- a/SELF_HOST.md +++ b/SELF_HOST.md @@ -1,6 +1,6 @@ # Self-hosting Firecrawl -Guide coming soon. +Refer to [CONTRIBUTING.md](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md) for instructions on how to run it locally. *This repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository. The primary objective is to enhance the accuracy of LLM responses by utilizing clean data. It is not ready for full self-host yet - we're working on it* From 2f29a4da8eb3b10b9c9782e17e46d662ec3010c9 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:45:15 -0700 Subject: [PATCH 29/96] Update CONTRIBUTING.md --- CONTRIBUTING.md | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d4b69e7..abd3027d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,12 @@ # Contributors guide: -Welcome to firecrawl 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) +Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue! -## Hosting locally +## Running the project locally First, start by installing dependencies 1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) @@ -46,6 +46,16 @@ LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse ``` +### Installing dependencies + +First, install the dependencies using pnpm. + +```bash +pnpm install +``` + +### Running the project + You're going to need to open 3 terminals. ### Terminal 1 - setting up redis @@ -64,7 +74,7 @@ Now, navigate to the apps/api/ directory and run: To do this, navigate to the apps/api/ directory and run if you don’t have this already, install pnpm here: https://pnpm.io/installation -Next, run your server with`pnpm run start` +Next, run your server with `pnpm run start` @@ -90,7 +100,8 @@ curl -X POST http://localhost:3002/v0/crawl \ ## Tests: -The best way to do this is run the test with npx:Once again, navigate to the `apps/api` directory`npx jest` - +The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication. + +If you'd like to run the tests with authentication, run `npm run test:prod` From 84be3d2bcaa6af7263b8aaff5b71bdb805eb28e0 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:51:39 -0700 Subject: [PATCH 30/96] Update CONTRIBUTING.md --- CONTRIBUTING.md | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index abd3027d..733c7877 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,3 @@ - # Contributors guide: Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) @@ -11,14 +10,15 @@ If you're contributing, note that the process is similar to other open source re First, start by installing dependencies 1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) 2. pnpm [instructions](https://pnpm.io/installation) -3. redis - [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/) +3. redis [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/) Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example. To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features ) -```.env +.env: +``` # ===== Required ENVS ====== NUM_WORKERS_PER_QUEUE=8 PORT=3002 @@ -62,21 +62,28 @@ You're going to need to open 3 terminals. Run the command anywhere within your project -`redis-server` +```bash +redis-server +``` - ### Terminal 2 - setting up workers Now, navigate to the apps/api/ directory and run: -`pnpm run workers` - +```bash +pnpm run workers +``` + +This will start the workers who are responsible for processing crawl jobs. + ### Terminal 3 - setting up the main server To do this, navigate to the apps/api/ directory and run if you don’t have this already, install pnpm here: https://pnpm.io/installation -Next, run your server with `pnpm run start` - +Next, run your server with: +```bash +pnpm run start +``` ### Terminal 3 - sending our first request. From 6560c968e1ba182dfff2765bf7751e623e2d175f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 12:02:11 -0700 Subject: [PATCH 31/96] Update types.ts --- apps/api/src/types.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 7f527fba..5d778a22 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -52,3 +52,5 @@ export interface AuthResponse { error?: string; status?: number; } + + From 001bf0c504df8cb9ff08673fab69cdbeb3413dd7 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 12:05:12 -0700 Subject: [PATCH 32/96] Update package.json --- apps/api/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/package.json b/apps/api/package.json index 0b533f99..8ae16099 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -10,7 +10,7 @@ "flyio": "node dist/src/index.js", "start:dev": "nodemon --exec ts-node src/index.ts", "build": "tsc", - "test": "jest --verbose", + "test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "test:local-no-auth":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", "test:prod":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "workers": "nodemon --exec ts-node src/services/queue-worker.ts", From 3ead2efdcaab6fd407eddb01e3654c88ac6bfba9 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 12:05:30 -0700 Subject: [PATCH 33/96] Update fly.yml --- .github/workflows/fly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index fe042c6e..ddeee55d 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -54,7 +54,7 @@ jobs: id: start_workers - name: Run E2E tests run: | - npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false + npm run test:prod working-directory: ./apps/api deploy: name: Deploy app From 572b7e8dc57a7768321d15ef34dc688ad6337a94 Mon Sep 17 00:00:00 2001 From: Matt <77928207+mattzcarey@users.noreply.github.com> Date: Mon, 22 Apr 2024 16:38:05 +0100 Subject: [PATCH 34/96] chore: add context.close --- apps/playwright-service/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/playwright-service/main.py b/apps/playwright-service/main.py index 5d6f331c..b4b83de9 100644 --- a/apps/playwright-service/main.py +++ b/apps/playwright-service/main.py @@ -21,6 +21,7 @@ async def root(body: UrlModel): # Using Pydantic model for request body await page.goto(body.url) # Adjusted to use the url from the request body model page_content = await page.content() # Get the HTML content of the page + await context.close() await browser.close() json_compatible_item_data = {"content": page_content} From de7e1f501bc9708a7d018dce81abd40944eadd6a Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 22 Apr 2024 08:41:54 -0700 Subject: [PATCH 35/96] Update openapi.json --- apps/api/openapi.json | 566 ++++++++++++++++++++++-------------------- 1 file changed, 290 insertions(+), 276 deletions(-) diff --git a/apps/api/openapi.json b/apps/api/openapi.json index bb58ae38..3916738b 100644 --- a/apps/api/openapi.json +++ b/apps/api/openapi.json @@ -1,295 +1,309 @@ { - "openapi": "3.0.0", - "info": { - "title": "Firecrawl API", - "version": "1.0.0", - "description": "API for interacting with Firecrawl services to convert websites to LLM-ready data.", - "contact": { - "name": "Firecrawl Support", - "url": "https://firecrawl.dev/support", - "email": "help@mendable.ai" - } - }, - "servers": [ - { - "url": "https://api.firecrawl.dev/v0" - } - ], - "paths": { - "/scrape": { - "post": { - "summary": "Scrape a single URL", - "operationId": "scrapeSingleUrl", - "tags": ["Scraping"], - "security": [ - { - "bearerAuth": [] - } - ], - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "The URL to scrape" - } - }, - "required": ["url"] - } - } - } - }, - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScrapeResponse" - } - } - } - }, - "402": { - "description": "Payment required" - }, - "429": { - "description": "Too many requests" - }, - "500": { - "description": "Server error" - } + "openapi": "3.0.0", + "info": { + "title": "Firecrawl API", + "version": "1.0.0", + "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.", + "contact": { + "name": "Firecrawl Support", + "url": "https://firecrawl.dev/support", + "email": "support@firecrawl.dev" + } + }, + "servers": [ + { + "url": "https://api.firecrawl.dev/v0" + } + ], + "paths": { + "/scrape": { + "post": { + "summary": "Scrape a single URL", + "operationId": "scrapeSingleUrl", + "tags": ["Scraping"], + "security": [ + { + "bearerAuth": [] } - } - }, - "/crawl": { - "post": { - "summary": "Crawl multiple URLs based on options", - "operationId": "crawlUrls", - "tags": ["Crawling"], - "security": [ - { - "bearerAuth": [] - } - ], - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "The base URL to start crawling from" - }, - "crawlerOptions": { - "type": "object", - "properties": { - "includes": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL patterns to include" - }, - "excludes": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL patterns to exclude" - }, - "generateImgAltText": { - "type": "boolean", - "description": "Generate alt text for images using LLMs (must have a paid plan)", - "default": false - }, - "limit": { - "type": "integer", - "description": "Maximum number of pages to crawl" - } - } - }, - "pageOptions": { - "type": "object", - "properties": { - "onlyMainContent": { - "type": "boolean", - "description": "Only return the main content of the page excluding headers, navs, footers, etc.", - "default": false - } - } - } - }, - "required": ["url"] - } - } - } - }, - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CrawlResponse" - } - } - } - }, - "402": { - "description": "Payment required" - }, - "429": { - "description": "Too many requests" - }, - "500": { - "description": "Server error" - } - } - } - }, - "/crawl/status/{jobId}": { - "get": { - "tags": ["Crawl"], - "summary": "Get the status of a crawl job", - "operationId": "getCrawlStatus", - "security": [ - { - "bearerAuth": [] - } - ], - "parameters": [ - { - "name": "jobId", - "in": "path", - "description": "ID of the crawl job", - "required": true, + ], + "requestBody": { + "required": true, + "content": { + "application/json": { "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The URL to scrape" + }, + "pageOptions": { "type": "object", "properties": { - "status": { - "type": "string", - "description": "Status of the job (completed, active, failed, paused)" - }, - "current": { - "type": "integer", - "description": "Current page number" - }, - "current_url": { - "type": "string", - "description": "Current URL being scraped" - }, - "current_step": { - "type": "string", - "description": "Current step in the process" - }, - "total": { - "type": "integer", - "description": "Total number of pages" - }, - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ScrapeResponse" - }, - "description": "Data returned from the job (null when it is in progress)" + "onlyMainContent": { + "type": "boolean", + "description": "Only return the main content of the page excluding headers, navs, footers, etc.", + "default": false } } } - } - } - }, - "402": { - "description": "Payment required" - }, - "429": { - "description": "Too many requests" - }, - "500": { - "description": "Server error" - } - } - } - } - }, - "components": { - "securitySchemes": { - "bearerAuth": { - "type": "http", - "scheme": "bearer" - } - }, - "schemas": { - "ScrapeResponse": { - "type": "object", - "properties": { - "success": { - "type": "boolean" - }, - "data": { - "type": "object", - "properties": { - "content": { - "type": "string" }, - "markdown": { - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "language": { - "type": "string", - "nullable": true - }, - "sourceURL": { - "type": "string", - "format": "uri" - } - } - } + "required": ["url"] } } } }, - "CrawlResponse": { - "type": "object", - "properties": { - "jobId": { - "type": "string" + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ScrapeResponse" + } + } } + }, + "402": { + "description": "Payment required" + }, + "429": { + "description": "Too many requests" + }, + "500": { + "description": "Server error" } } } }, - "security": [ - { - "bearerAuth": [] + "/crawl": { + "post": { + "summary": "Crawl multiple URLs based on options", + "operationId": "crawlUrls", + "tags": ["Crawling"], + "security": [ + { + "bearerAuth": [] + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The base URL to start crawling from" + }, + "crawlerOptions": { + "type": "object", + "properties": { + "includes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "URL patterns to include" + }, + "excludes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "URL patterns to exclude" + }, + "generateImgAltText": { + "type": "boolean", + "description": "Generate alt text for images using LLMs (must have a paid plan)", + "default": false + }, + "returnOnlyUrls": { + "type": "boolean", + "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.", + "default": false + }, + "limit": { + "type": "integer", + "description": "Maximum number of pages to crawl" + } + } + }, + "pageOptions": { + "type": "object", + "properties": { + "onlyMainContent": { + "type": "boolean", + "description": "Only return the main content of the page excluding headers, navs, footers, etc.", + "default": false + } + } + } + }, + "required": ["url"] + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CrawlResponse" + } + } + } + }, + "402": { + "description": "Payment required" + }, + "429": { + "description": "Too many requests" + }, + "500": { + "description": "Server error" + } + } } - ] - } - \ No newline at end of file + }, + "/crawl/status/{jobId}": { + "get": { + "tags": ["Crawl"], + "summary": "Get the status of a crawl job", + "operationId": "getCrawlStatus", + "security": [ + { + "bearerAuth": [] + } + ], + "parameters": [ + { + "name": "jobId", + "in": "path", + "description": "ID of the crawl job", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "status": { + "type": "string", + "description": "Status of the job (completed, active, failed, paused)" + }, + "current": { + "type": "integer", + "description": "Current page number" + }, + "current_url": { + "type": "string", + "description": "Current URL being scraped" + }, + "current_step": { + "type": "string", + "description": "Current step in the process" + }, + "total": { + "type": "integer", + "description": "Total number of pages" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ScrapeResponse" + }, + "description": "Data returned from the job (null when it is in progress)" + } + } + } + } + } + }, + "402": { + "description": "Payment required" + }, + "429": { + "description": "Too many requests" + }, + "500": { + "description": "Server error" + } + } + } + } + }, + "components": { + "securitySchemes": { + "bearerAuth": { + "type": "http", + "scheme": "bearer" + } + }, + "schemas": { + "ScrapeResponse": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "data": { + "type": "object", + "properties": { + "content": { + "type": "string" + }, + "markdown": { + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "language": { + "type": "string", + "nullable": true + }, + "sourceURL": { + "type": "string", + "format": "uri" + } + } + } + } + } + } + }, + "CrawlResponse": { + "type": "object", + "properties": { + "jobId": { + "type": "string" + } + } + } + } + }, + "security": [ + { + "bearerAuth": [] + } + ] +} From 18450b5f9a51c20fa7464930c2065c0de478ae37 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 22 Apr 2024 12:42:46 -0700 Subject: [PATCH 36/96] Nick: tutorials --- tutorials/data-extraction-using-llms.mdx | 95 ++++++++++++++++++++++++ tutorials/rag-llama3.mdx | 91 +++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 tutorials/data-extraction-using-llms.mdx create mode 100644 tutorials/rag-llama3.mdx diff --git a/tutorials/data-extraction-using-llms.mdx b/tutorials/data-extraction-using-llms.mdx new file mode 100644 index 00000000..554e7877 --- /dev/null +++ b/tutorials/data-extraction-using-llms.mdx @@ -0,0 +1,95 @@ +--- +title: "Extract website data using LLMs" +description: "Learn how to use Firecrawl and Groq to extract structured data from a web page in a few lines of code." +'og:image': "/images/og.png" +'twitter:image': "/images/og.png" +--- + +## Setup + +Install our python dependencies, including groq and firecrawl-py. + +```bash +pip install groq firecrawl-py +``` + +## Getting your Groq and Firecrawl API Keys + +To use Groq and Firecrawl, you will need to get your API keys. You can get your Groq API key from [here](https://groq.com) and your Firecrawl API key from [here](https://firecrawl.dev). + +## Load website with Firecrawl + +To be able to get all the data from a website page and make sure it is in the cleanest format, we will use [FireCrawl](https://firecrawl.dev). It handles by-passing JS-blocked websites, extracting the main content, and outputting in a LLM-readable format for increased accuracy. + +Here is how we will scrape a website url using Firecrawl. We will also set a `pageOptions` for only extracting the main content (`onlyMainContent: True`) of the website page - excluding the navs, footers, etc. + +```python +from firecrawl import FirecrawlApp # Importing the FireCrawlLoader + +url = "https://about.fb.com/news/2024/04/introducing-our-open-mixed-reality-ecosystem/" + +firecrawl = FirecrawlApp( + api_key="fc-YOUR_FIRECRAWL_API_KEY", +) +page_content = firecrawl.scrape_url(url=url, # Target URL to crawl + params={ + "pageOptions":{ + "onlyMainContent": True # Ignore navs, footers, etc. + } + }) +print(page_content) +``` + +Perfect, now we have clean data from the website - ready to be fed to the LLM for data extraction. + +## Extraction and Generation + +Now that we have the website data, let's use Groq to pull out the information we need. We'll use Groq Llama 3 model in JSON mode and pick out certain fields from the page content. + +We are using LLama 3 8b model for this example. Feel free to use bigger models for improved results. + +```python +import json +from groq import Groq + +client = Groq( + api_key="gsk_YOUR_GROQ_API_KEY", # Note: Replace 'API_KEY' with your actual Groq API key +) + +# Here we define the fields we want to extract from the page content +extract = ["summary","date","companies_building_with_quest","title_of_the_article","people_testimonials"] + +completion = client.chat.completions.create( + model="llama3-8b-8192", + messages=[ + { + "role": "system", + "content": "You are a legal advisor who extracts information from documents in JSON." + }, + { + "role": "user", + # Here we pass the page content and the fields we want to extract + "content": f"Extract the following information from the provided documentation:\Page content:\n\n{page_content}\n\nInformation to extract: {extract}" + } + ], + temperature=0, + max_tokens=1024, + top_p=1, + stream=False, + stop=None, + # We set the response format to JSON object + response_format={"type": "json_object"} +) + + +# Pretty print the JSON response +dataExtracted = json.dumps(str(completion.choices[0].message.content), indent=4) + +print(dataExtracted) +``` + +## And Voila! + +You have now built a data extraction bot using Groq and Firecrawl. You can now use this bot to extract structured data from any website. + +If you have any questions or need help, feel free to reach out to us at [Firecrawl](https://firecrawl.dev). \ No newline at end of file diff --git a/tutorials/rag-llama3.mdx b/tutorials/rag-llama3.mdx new file mode 100644 index 00000000..ae9c48f3 --- /dev/null +++ b/tutorials/rag-llama3.mdx @@ -0,0 +1,91 @@ +--- +title: "Build a 'Chat with website' using Groq Llama 3" +description: "Learn how to use Firecrawl, Groq Llama 3, and Langchain to build a 'Chat with your website' bot." +--- + +## Setup + +Install our python dependencies, including langchain, groq, faiss, ollama, and firecrawl-py. + +```bash +pip install --upgrade --quiet langchain langchain-community groq faiss-cpu ollama firecrawl-py +``` + +We will be using Ollama for the embeddings, you can download Ollama [here](https://ollama.com/). But feel free to use any other embeddings you prefer. + +## Load website with Firecrawl + +To be able to get all the data from a website and make sure it is in the cleanest format, we will use FireCrawl. Firecrawl integrates very easily with Langchain as a document loader. + +Here is how you can load a website with FireCrawl: + +```python +from langchain_community.document_loaders import FireCrawlLoader # Importing the FireCrawlLoader + +url = "https://firecrawl.dev" +loader = FireCrawlLoader( + api_key="fc-YOUR_API_KEY", # Note: Replace 'YOUR_API_KEY' with your actual FireCrawl API key + url=url, # Target URL to crawl + mode="crawl" # Mode set to 'crawl' to crawl all accessible subpages +) +docs = loader.load() +``` + +## Setup the Vectorstore + +Next, we will setup the vectorstore. The vectorstore is a data structure that allows us to store and query embeddings. We will use the Ollama embeddings and the FAISS vectorstore. +We split the documents into chunks of 1000 characters each, with a 200 character overlap. This is to ensure that the chunks are not too small and not too big - and that it can fit into the LLM model when we query it. + +```python +from langchain_community.embeddings import OllamaEmbeddings +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_community.vectorstores import FAISS + +text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) +splits = text_splitter.split_documents(docs) +vectorstore = FAISS.from_documents(documents=splits, embedding=OllamaEmbeddings()) +``` + +## Retrieval and Generation + +Now that our documents are loaded and the vectorstore is setup, we can, based on user's question, do a similarity search to retrieve the most relevant documents. That way we can use these documents to be fed to the LLM model. + + +```python +question = "What is firecrawl?" +docs = vectorstore.similarity_search(query=question) +``` + +## Generation +Last but not least, you can use the Groq to generate a response to a question based on the documents we have loaded. + +```python +from groq import Groq + +client = Groq( + api_key="YOUR_GROQ_API_KEY", +) + +completion = client.chat.completions.create( + model="llama3-8b-8192", + messages=[ + { + "role": "user", + "content": f"You are a friendly assistant. Your job is to answer the users question based on the documentation provided below:\nDocs:\n\n{docs}\n\nQuestion: {question}" + } + ], + temperature=1, + max_tokens=1024, + top_p=1, + stream=False, + stop=None, +) + +print(completion.choices[0].message) +``` + +## And Voila! + +You have now built a 'Chat with your website' bot using Llama 3, Groq Llama 3, Langchain, and Firecrawl. You can now use this bot to answer questions based on the documentation of your website. + +If you have any questions or need help, feel free to reach out to us at [Firecrawl](https://firecrawl.dev). \ No newline at end of file From b33133f80bf8a25decf21832b4cde5d26166abd5 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 22 Apr 2024 12:45:44 -0700 Subject: [PATCH 37/96] Update data-extraction-using-llms.mdx --- tutorials/data-extraction-using-llms.mdx | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tutorials/data-extraction-using-llms.mdx b/tutorials/data-extraction-using-llms.mdx index 554e7877..879c1e79 100644 --- a/tutorials/data-extraction-using-llms.mdx +++ b/tutorials/data-extraction-using-llms.mdx @@ -1,9 +1,6 @@ ---- -title: "Extract website data using LLMs" -description: "Learn how to use Firecrawl and Groq to extract structured data from a web page in a few lines of code." -'og:image': "/images/og.png" -'twitter:image': "/images/og.png" ---- +# Extract website data using LLMs + +Learn how to use Firecrawl and Groq to extract structured data from a web page in a few lines of code. With Groq fast inference speeds and firecrawl parellization, you can extract data from web pages *super* fast. ## Setup @@ -92,4 +89,4 @@ print(dataExtracted) You have now built a data extraction bot using Groq and Firecrawl. You can now use this bot to extract structured data from any website. -If you have any questions or need help, feel free to reach out to us at [Firecrawl](https://firecrawl.dev). \ No newline at end of file +If you have any questions or need help, feel free to reach out to us at [Firecrawl](https://firecrawl.dev). From bf2df7a8535c02bc457bd0d4cbdcde6ea3a2d8be Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 10:55:40 -0700 Subject: [PATCH 38/96] Nick: fix js-sdk --- apps/js-sdk/firecrawl/build/index.js | 2 +- apps/js-sdk/firecrawl/package.json | 4 +++- apps/js-sdk/firecrawl/src/index.ts | 4 ++-- apps/js-sdk/firecrawl/types/index.d.ts | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index 25ae9991..1b23bb54 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -67,7 +67,7 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. - * @returns {Promise} The response from the crawl operation. + * @returns {Promise} The response from the crawl operation. */ crawlUrl(url_1) { return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) { diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 811f87fe..566fdde9 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,11 +1,13 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.11", + "version": "0.0.13", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", "type": "module", "scripts": { + "build": "tsc", + "publish":"npm run build && npm publish --access public", "test": "echo \"Error: no test specified\" && exit 1" }, "repository": { diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index be550668..65456001 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -102,9 +102,9 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. - * @returns {Promise} The response from the crawl operation. + * @returns {Promise} The response from the crawl operation. */ - async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise { + async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise { const headers = this.prepareHeaders(); let jsonData: Params = { url }; if (params) { diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index a9d04ba9..be960f7c 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -61,9 +61,9 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. - * @returns {Promise} The response from the crawl operation. + * @returns {Promise} The response from the crawl operation. */ - crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise; + crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise; /** * Checks the status of a crawl job using the Firecrawl API. * @param {string} jobId - The job ID of the crawl operation. From 306cfe4ce1d6b13b574be02315a0b2b80cdc4344 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 11:15:11 -0700 Subject: [PATCH 39/96] Nick: --- apps/api/package.json | 5 +++-- apps/api/pnpm-lock.yaml | 7 +++++++ apps/api/src/lib/html-to-markdown.ts | 4 +++- apps/api/src/scraper/WebScraper/single_url.ts | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/apps/api/package.json b/apps/api/package.json index 8ae16099..07e3b7a5 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -11,8 +11,8 @@ "start:dev": "nodemon --exec ts-node src/index.ts", "build": "tsc", "test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", - "test:local-no-auth":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", - "test:prod":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", + "test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", + "test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "workers": "nodemon --exec ts-node src/services/queue-worker.ts", "worker:production": "node dist/src/services/queue-worker.js", "mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest", @@ -66,6 +66,7 @@ "glob": "^10.3.12", "gpt3-tokenizer": "^1.1.5", "ioredis": "^5.3.2", + "joplin-turndown-plugin-gfm": "^1.0.12", "keyword-extractor": "^0.0.25", "langchain": "^0.1.25", "languagedetect": "^2.0.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index df669d55..5298d2b7 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -80,6 +80,9 @@ dependencies: ioredis: specifier: ^5.3.2 version: 5.3.2 + joplin-turndown-plugin-gfm: + specifier: ^1.0.12 + version: 1.0.12 keyword-extractor: specifier: ^0.0.25 version: 0.0.25 @@ -3923,6 +3926,10 @@ packages: - ts-node dev: true + /joplin-turndown-plugin-gfm@1.0.12: + resolution: {integrity: sha512-qL4+1iycQjZ1fs8zk3jSRk7cg3ROBUHk7GKtiLAQLFzLPKErnILUvz5DLszSQvz3s1sTjPbywLDISVUtBY6HaA==} + dev: false + /js-tiktoken@1.0.10: resolution: {integrity: sha512-ZoSxbGjvGyMT13x6ACo9ebhDha/0FHdKA+OsQcMOWcm1Zs7r90Rhk5lhERLzji+3rA7EKpXCgwXcM5fF3DMpdA==} dependencies: diff --git a/apps/api/src/lib/html-to-markdown.ts b/apps/api/src/lib/html-to-markdown.ts index 0fd8c938..e084f5ef 100644 --- a/apps/api/src/lib/html-to-markdown.ts +++ b/apps/api/src/lib/html-to-markdown.ts @@ -1,6 +1,8 @@ + export function parseMarkdown(html: string) { var TurndownService = require("turndown"); - var turndownPluginGfm = require("turndown-plugin-gfm"); + var turndownPluginGfm = require('joplin-turndown-plugin-gfm') + const turndownService = new TurndownService(); turndownService.addRule("inlineLink", { diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index fbcd9238..0f3cc380 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -142,6 +142,7 @@ export async function scrapSingleUrl( break; } let cleanedHtml = removeUnwantedElements(text, pageOptions); + return [await parseMarkdown(cleanedHtml), text]; }; From a680c7ce84985863607d1c10eacae481c28bd29a Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:46:29 -0300 Subject: [PATCH 40/96] [Feat] Server health check + slack message --- apps/api/.env.example | 3 +- apps/api/requests.http | 11 ++++++- apps/api/src/index.ts | 70 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 34e24b1f..3cd40c1e 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -21,4 +21,5 @@ OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) BULL_AUTH_KEY= # LOGTAIL_KEY= # Use if you're configuring basic logging with logtail PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback -LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs \ No newline at end of file +LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs +SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages \ No newline at end of file diff --git a/apps/api/requests.http b/apps/api/requests.http index 23501369..751ba5ee 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -49,4 +49,13 @@ content-type: application/json ### Check Job Status GET https://api.firecrawl.dev/v0/crawl/status/cfcb71ac-23a3-4da5-bd85-d4e58b871d66 -Authorization: Bearer \ No newline at end of file +Authorization: Bearer + +### Get Active Jobs Count +GET http://localhost:3002/serverHealthCheck +content-type: application/json + +### Notify Server Health Check +GET http://localhost:3002/serverHealthCheck/notify +content-type: application/json + diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index a2e5c517..6417f360 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -87,6 +87,76 @@ app.get(`/admin/${process.env.BULL_AUTH_KEY}/queues`, async (req, res) => { } }); +app.get(`/serverHealthCheck`, async (req, res) => { + try { + const webScraperQueue = getWebScraperQueue(); + const [activeJobs] = await Promise.all([ + webScraperQueue.getActiveCount(), + ]); + + const noActiveJobs = activeJobs === 0; + // 200 if no active jobs, 503 if there are active jobs + return res.status(noActiveJobs ? 200 : 500).json({ + activeJobs, + }); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +}); + +app.get('/serverHealthCheck/notify', async (req, res) => { + if (process.env.SLACK_WEBHOOK_URL) { + const treshold = 5; // The treshold value for the active jobs + const timeout = 60000; // 1 minute // The timeout value for the check in milliseconds + + const getActiveJobs = async () => { + const webScraperQueue = getWebScraperQueue(); + const [activeJobs] = await Promise.all([ + webScraperQueue.getActiveCount(), + ]); + + return activeJobs; + }; + + res.status(200).json({ message: "Check initiated" }); + + const checkActiveJobs = async () => { + try { + let activeJobs = await getActiveJobs(); + if (activeJobs >= treshold) { + setTimeout(async () => { + activeJobs = await getActiveJobs(); // Re-check the active jobs count + if (activeJobs >= treshold) { + const slackWebhookUrl = process.env.SLACK_WEBHOOK_URL; + const message = { + text: `⚠️ Warning: The number of active jobs (${activeJobs}) has exceeded the threshold (${treshold}) for more than ${timeout/60000} minute(s).`, + }; + + const response = await fetch(slackWebhookUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(message), + }) + + if (!response.ok) { + console.error('Failed to send Slack notification') + } + } + }, timeout); + } + } catch (error) { + console.error(error); + } + }; + + checkActiveJobs(); + } +}); + + app.get("/is-production", (req, res) => { res.send({ isProduction: global.isProduction }); }); From 9b01dc62817dca9488d890f0f58a5c4e654e7fa1 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:07:22 -0300 Subject: [PATCH 41/96] Changed from active to waiting jobs --- apps/api/src/index.ts | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 6417f360..27e87139 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -90,14 +90,14 @@ app.get(`/admin/${process.env.BULL_AUTH_KEY}/queues`, async (req, res) => { app.get(`/serverHealthCheck`, async (req, res) => { try { const webScraperQueue = getWebScraperQueue(); - const [activeJobs] = await Promise.all([ - webScraperQueue.getActiveCount(), + const [waitingJobs] = await Promise.all([ + webScraperQueue.getWaitingCount(), ]); - const noActiveJobs = activeJobs === 0; + const noWaitingJobs = waitingJobs === 0; // 200 if no active jobs, 503 if there are active jobs - return res.status(noActiveJobs ? 200 : 500).json({ - activeJobs, + return res.status(noWaitingJobs ? 200 : 500).json({ + waitingJobs, }); } catch (error) { console.error(error); @@ -107,30 +107,31 @@ app.get(`/serverHealthCheck`, async (req, res) => { app.get('/serverHealthCheck/notify', async (req, res) => { if (process.env.SLACK_WEBHOOK_URL) { - const treshold = 5; // The treshold value for the active jobs + const treshold = 1; // The treshold value for the active jobs const timeout = 60000; // 1 minute // The timeout value for the check in milliseconds - const getActiveJobs = async () => { + const getWaitingJobsCount = async () => { const webScraperQueue = getWebScraperQueue(); - const [activeJobs] = await Promise.all([ - webScraperQueue.getActiveCount(), + const [waitingJobsCount] = await Promise.all([ + webScraperQueue.getWaitingCount(), ]); - return activeJobs; + return waitingJobsCount; }; res.status(200).json({ message: "Check initiated" }); - const checkActiveJobs = async () => { + const checkWaitingJobs = async () => { try { - let activeJobs = await getActiveJobs(); - if (activeJobs >= treshold) { + let waitingJobsCount = await getWaitingJobsCount(); + if (waitingJobsCount >= treshold) { setTimeout(async () => { - activeJobs = await getActiveJobs(); // Re-check the active jobs count - if (activeJobs >= treshold) { + // Re-check the waiting jobs count after the timeout + waitingJobsCount = await getWaitingJobsCount(); + if (waitingJobsCount >= treshold) { const slackWebhookUrl = process.env.SLACK_WEBHOOK_URL; const message = { - text: `⚠️ Warning: The number of active jobs (${activeJobs}) has exceeded the threshold (${treshold}) for more than ${timeout/60000} minute(s).`, + text: `⚠️ Warning: The number of active jobs (${waitingJobsCount}) has exceeded the threshold (${treshold}) for more than ${timeout/60000} minute(s).`, }; const response = await fetch(slackWebhookUrl, { @@ -152,7 +153,7 @@ app.get('/serverHealthCheck/notify', async (req, res) => { } }; - checkActiveJobs(); + checkWaitingJobs(); } }); From 849c0b6ebfcc0c7d0e202330c7df0d6260c4b1a0 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 23 Apr 2024 18:50:35 -0300 Subject: [PATCH 42/96] [Feat] Added blocklist for social media urls --- .../src/__tests__/e2e_noAuth/index.test.ts | 30 ++++++++++++++++ .../src/__tests__/e2e_withAuth/index.test.ts | 35 +++++++++++++++++++ apps/api/src/controllers/crawl.ts | 6 ++++ apps/api/src/controllers/crawlPreview.ts | 6 ++++ apps/api/src/controllers/scrape.ts | 5 +++ .../src/scraper/WebScraper/utils/blocklist.ts | 19 ++++++++++ 6 files changed, 101 insertions(+) create mode 100644 apps/api/src/scraper/WebScraper/utils/blocklist.ts diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts index e0aca36f..f76a8dc1 100644 --- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts @@ -55,6 +55,16 @@ describe("E2E Tests for API Routes with No Authentication", () => { expect(response.statusCode).not.toBe(401); }); + it("should return an error for a blocklisted URL without requiring authorization", async () => { + const blocklistedUrl = "https://facebook.com/fake-test"; + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Content-Type", "application/json") + .send({ url: blocklistedUrl }); + expect(response.statusCode).toBe(403); + expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + }); + it("should return a successful response", async () => { const response = await request(TEST_URL) .post("/v0/scrape") @@ -70,6 +80,16 @@ describe("E2E Tests for API Routes with No Authentication", () => { expect(response.statusCode).not.toBe(401); }); + it("should return an error for a blocklisted URL", async () => { + const blocklistedUrl = "https://twitter.com/fake-test"; + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Content-Type", "application/json") + .send({ url: blocklistedUrl }); + expect(response.statusCode).toBe(403); + expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + }); + it("should return a successful response", async () => { const response = await request(TEST_URL) .post("/v0/crawl") @@ -89,6 +109,16 @@ describe("E2E Tests for API Routes with No Authentication", () => { expect(response.statusCode).not.toBe(401); }); + it("should return an error for a blocklisted URL", async () => { + const blocklistedUrl = "https://instagram.com/fake-test"; + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Content-Type", "application/json") + .send({ url: blocklistedUrl }); + expect(response.statusCode).toBe(403); + expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + }); + it("should return a successful response", async () => { const response = await request(TEST_URL) .post("/v0/crawlWebsitePreview") diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index ba01a7ca..578a0335 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -47,6 +47,18 @@ const TEST_URL = "http://127.0.0.1:3002"; .send({ url: "https://firecrawl.dev" }); expect(response.statusCode).toBe(401); }); + + it("should return an error for a blocklisted URL", async () => { + const blocklistedUrl = "https://facebook.com/fake-test"; + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: blocklistedUrl }); + expect(response.statusCode).toBe(403); + expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + }); + it("should return a successful response with a valid preview token", async () => { const response = await request(TEST_URL) .post("/v0/scrape") @@ -86,6 +98,17 @@ const TEST_URL = "http://127.0.0.1:3002"; expect(response.statusCode).toBe(401); }); + it("should return an error for a blocklisted URL", async () => { + const blocklistedUrl = "https://twitter.com/fake-test"; + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: blocklistedUrl }); + expect(response.statusCode).toBe(403); + expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + }); + it("should return a successful response with a valid API key", async () => { const response = await request(TEST_URL) .post("/v0/crawl") @@ -99,6 +122,7 @@ const TEST_URL = "http://127.0.0.1:3002"; ); }); + // Additional tests for insufficient credits? }); @@ -119,6 +143,17 @@ const TEST_URL = "http://127.0.0.1:3002"; expect(response.statusCode).toBe(401); }); + it("should return an error for a blocklisted URL", async () => { + const blocklistedUrl = "https://instagram.com/fake-test"; + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: blocklistedUrl }); + expect(response.statusCode).toBe(403); + expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + }); + it("should return a successful response with a valid API key", async () => { const response = await request(TEST_URL) .post("/v0/crawlWebsitePreview") diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index bd3fecaf..9301c4d9 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -5,6 +5,7 @@ import { checkTeamCredits } from "../../src/services/billing/credit_billing"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../src/types"; import { addWebScraperJob } from "../../src/services/queue-jobs"; +import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; export async function crawlController(req: Request, res: Response) { try { @@ -27,6 +28,11 @@ export async function crawlController(req: Request, res: Response) { if (!url) { return res.status(400).json({ error: "Url is required" }); } + + if (isUrlBlocked(url)) { + return res.status(403).json({ error: "URL is blocked due to policy restrictions" }); + } + const mode = req.body.mode ?? "crawl"; const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts index 3f28ef60..4c401974 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/crawlPreview.ts @@ -2,6 +2,7 @@ import { Request, Response } from "express"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../src/types"; import { addWebScraperJob } from "../../src/services/queue-jobs"; +import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; export async function crawlPreviewController(req: Request, res: Response) { try { @@ -18,6 +19,11 @@ export async function crawlPreviewController(req: Request, res: Response) { if (!url) { return res.status(400).json({ error: "Url is required" }); } + + if (isUrlBlocked(url)) { + return res.status(403).json({ error: "URL is blocked due to policy restrictions" }); + } + const mode = req.body.mode ?? "crawl"; const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index be708008..d24c882f 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -5,6 +5,7 @@ import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../types"; import { logJob } from "../services/logging/log_job"; import { Document } from "../lib/entities"; +import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function export async function scrapeHelper( req: Request, @@ -22,6 +23,10 @@ export async function scrapeHelper( return { success: false, error: "Url is required", returnCode: 400 }; } + if (isUrlBlocked(url)) { + return { success: false, error: "URL is blocked due to policy restrictions", returnCode: 403 }; + } + const a = new WebScraperDataProvider(); await a.setOptions({ mode: "single_urls", diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts new file mode 100644 index 00000000..0eef3320 --- /dev/null +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -0,0 +1,19 @@ +const socialMediaBlocklist = [ + 'facebook.com', + 'twitter.com', + 'instagram.com', + 'linkedin.com', + 'pinterest.com', + 'snapchat.com', + 'tiktok.com', + 'reddit.com', + 'tumblr.com', + 'flickr.com', + 'whatsapp.com', + 'wechat.com', + 'telegram.org', +]; + +export function isUrlBlocked(url: string): boolean { + return socialMediaBlocklist.some(domain => url.includes(domain)); +} From 0146157876b0f59690bde22df8b38a8730ce2742 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 15:28:32 -0700 Subject: [PATCH 43/96] Nick: mvp --- apps/api/src/controllers/search.ts | 136 ++++++++++++++++++ apps/api/src/lib/entities.ts | 2 + apps/api/src/routes/v0.ts | 5 + apps/api/src/scraper/WebScraper/single_url.ts | 11 +- .../src/scraper/WebScraper/utils/metadata.ts | 37 ++++- apps/api/src/search/googlesearch.ts | 134 +++++++++++++++++ apps/api/src/types.ts | 2 + 7 files changed, 320 insertions(+), 7 deletions(-) create mode 100644 apps/api/src/controllers/search.ts create mode 100644 apps/api/src/search/googlesearch.ts diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts new file mode 100644 index 00000000..7cd52095 --- /dev/null +++ b/apps/api/src/controllers/search.ts @@ -0,0 +1,136 @@ +import { Request, Response } from "express"; +import { WebScraperDataProvider } from "../scraper/WebScraper"; +import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../types"; +import { logJob } from "../services/logging/log_job"; +import { PageOptions } from "../lib/entities"; +import { search } from "../search/googlesearch"; + +export async function searchHelper( + req: Request, + team_id: string, + crawlerOptions: any, + pageOptions: PageOptions +): Promise<{ + success: boolean; + error?: string; + data?: any; + returnCode: number; +}> { + const query = req.body.query; + if (!query) { + return { success: false, error: "Query is required", returnCode: 400 }; + } + + const res = await search(query, true, 7); + + let justSearch = pageOptions.fetchPageContent === false; + + if(justSearch){ + return { success: true, data: res, returnCode: 200 }; + } + + if (res.results.length === 0) { + return { success: true, error: "No search results found", returnCode: 200 }; + } + + const a = new WebScraperDataProvider(); + await a.setOptions({ + mode: "single_urls", + urls: res.results.map((r) => r.url), + crawlerOptions: { + ...crawlerOptions, + }, + pageOptions: {...pageOptions, onlyMainContent: pageOptions?.onlyMainContent ?? true, fetchPageContent: pageOptions?.fetchPageContent ?? true, fallback:false}, + }); + + const docs = await a.getDocuments(true); + if (docs.length === 0) + { + return { success: true, error: "No search results found", returnCode: 200 }; + } + + + // make sure doc.content is not empty + const filteredDocs = docs.filter( + (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 + ); + + if (filteredDocs.length === 0) { + return { success: true, error: "No page found", returnCode: 200 }; + } + + const { success, credit_usage } = await billTeam( + team_id, + filteredDocs.length + ); + if (!success) { + return { + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, + }; + } + + return { + success: true, + data: filteredDocs, + returnCode: 200, + }; +} + +export async function searchController(req: Request, res: Response) { + try { + // make sure to authenticate user first, Bearer + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Search + ); + if (!success) { + return res.status(status).json({ error }); + } + const crawlerOptions = req.body.crawlerOptions ?? {}; + const pageOptions = req.body.pageOptions ?? { onlyMainContent: true, fetchPageContent: true, fallback: false}; + const origin = req.body.origin ?? "api"; + + try { + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); + } + } catch (error) { + console.error(error); + return res.status(500).json({ error: "Internal server error" }); + } + const startTime = new Date().getTime(); + const result = await searchHelper( + req, + team_id, + crawlerOptions, + pageOptions + ); + const endTime = new Date().getTime(); + const timeTakenInSeconds = (endTime - startTime) / 1000; + logJob({ + success: result.success, + message: result.error, + num_docs: 1, + docs: [result.data], + time_taken: timeTakenInSeconds, + team_id: team_id, + mode: "search", + url: req.body.url, + crawlerOptions: crawlerOptions, + pageOptions: pageOptions, + origin: origin, + }); + return res.status(result.returnCode).json(result); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index e261dd4f..07f07e43 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -11,6 +11,8 @@ export interface Progress { export type PageOptions = { onlyMainContent?: boolean; + fallback?: boolean; + fetchPageContent?: boolean; }; export type WebScraperOptions = { urls: string[]; diff --git a/apps/api/src/routes/v0.ts b/apps/api/src/routes/v0.ts index 023282a9..f84b974b 100644 --- a/apps/api/src/routes/v0.ts +++ b/apps/api/src/routes/v0.ts @@ -4,6 +4,7 @@ import { crawlStatusController } from "../../src/controllers/crawl-status"; import { scrapeController } from "../../src/controllers/scrape"; import { crawlPreviewController } from "../../src/controllers/crawlPreview"; import { crawlJobStatusPreviewController } from "../../src/controllers/status"; +import { searchController } from "../../src/controllers/search"; export const v0Router = express.Router(); @@ -12,3 +13,7 @@ v0Router.post("/v0/crawl", crawlController); v0Router.post("/v0/crawlWebsitePreview", crawlPreviewController); v0Router.get("/v0/crawl/status/:jobId", crawlStatusController); v0Router.get("/v0/checkJobStatus/:jobId", crawlJobStatusPreviewController); + +// Search routes +v0Router.post("/v0/search", searchController); + diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 0f3cc380..fcbb688b 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -4,9 +4,7 @@ import { extractMetadata } from "./utils/metadata"; import dotenv from "dotenv"; import { Document, PageOptions } from "../../lib/entities"; import { parseMarkdown } from "../../lib/html-to-markdown"; -import { parseTablesToMarkdown } from "./utils/parseTable"; import { excludeNonMainTags } from "./utils/excludeTags"; -// import puppeteer from "puppeteer"; dotenv.config(); @@ -155,6 +153,15 @@ export async function scrapSingleUrl( // } let [text, html] = await attemptScraping(urlToScrap, "scrapingBee"); + if(pageOptions.fallback === false){ + const soup = cheerio.load(html); + const metadata = extractMetadata(soup, urlToScrap); + return { + content: text, + markdown: text, + metadata: { ...metadata, sourceURL: urlToScrap }, + } as Document; + } if (!text || text.length < 100) { console.log("Falling back to playwright"); [text, html] = await attemptScraping(urlToScrap, "playwright"); diff --git a/apps/api/src/scraper/WebScraper/utils/metadata.ts b/apps/api/src/scraper/WebScraper/utils/metadata.ts index ef883c38..ddaf1e8d 100644 --- a/apps/api/src/scraper/WebScraper/utils/metadata.ts +++ b/apps/api/src/scraper/WebScraper/utils/metadata.ts @@ -1,4 +1,3 @@ -// import * as cheerio from 'cheerio'; import { CheerioAPI } from "cheerio"; interface Metadata { title?: string; @@ -8,6 +7,14 @@ interface Metadata { robots?: string; ogTitle?: string; ogDescription?: string; + ogUrl?: string; + ogImage?: string; + ogAudio?: string; + ogDeterminer?: string; + ogLocale?: string; + ogLocaleAlternate?: string[]; + ogSiteName?: string; + ogVideo?: string; dctermsCreated?: string; dcDateCreated?: string; dcDate?: string; @@ -17,7 +24,6 @@ interface Metadata { dctermsSubject?: string; dcSubject?: string; dcDescription?: string; - ogImage?: string; dctermsKeywords?: string; modifiedTime?: string; publishedTime?: string; @@ -33,6 +39,14 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata { let robots: string | null = null; let ogTitle: string | null = null; let ogDescription: string | null = null; + let ogUrl: string | null = null; + let ogImage: string | null = null; + let ogAudio: string | null = null; + let ogDeterminer: string | null = null; + let ogLocale: string | null = null; + let ogLocaleAlternate: string[] | null = null; + let ogSiteName: string | null = null; + let ogVideo: string | null = null; let dctermsCreated: string | null = null; let dcDateCreated: string | null = null; let dcDate: string | null = null; @@ -42,7 +56,6 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata { let dctermsSubject: string | null = null; let dcSubject: string | null = null; let dcDescription: string | null = null; - let ogImage: string | null = null; let dctermsKeywords: string | null = null; let modifiedTime: string | null = null; let publishedTime: string | null = null; @@ -62,11 +75,18 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata { robots = soup('meta[name="robots"]').attr("content") || null; ogTitle = soup('meta[property="og:title"]').attr("content") || null; ogDescription = soup('meta[property="og:description"]').attr("content") || null; + ogUrl = soup('meta[property="og:url"]').attr("content") || null; + ogImage = soup('meta[property="og:image"]').attr("content") || null; + ogAudio = soup('meta[property="og:audio"]').attr("content") || null; + ogDeterminer = soup('meta[property="og:determiner"]').attr("content") || null; + ogLocale = soup('meta[property="og:locale"]').attr("content") || null; + ogLocaleAlternate = soup('meta[property="og:locale:alternate"]').map((i, el) => soup(el).attr("content")).get() || null; + ogSiteName = soup('meta[property="og:site_name"]').attr("content") || null; + ogVideo = soup('meta[property="og:video"]').attr("content") || null; articleSection = soup('meta[name="article:section"]').attr("content") || null; articleTag = soup('meta[name="article:tag"]').attr("content") || null; publishedTime = soup('meta[property="article:published_time"]').attr("content") || null; modifiedTime = soup('meta[property="article:modified_time"]').attr("content") || null; - ogImage = soup('meta[property="og:image"]').attr("content") || null; dctermsKeywords = soup('meta[name="dcterms.keywords"]').attr("content") || null; dcDescription = soup('meta[name="dc.description"]').attr("content") || null; dcSubject = soup('meta[name="dc.subject"]').attr("content") || null; @@ -90,6 +110,14 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata { ...(robots ? { robots } : {}), ...(ogTitle ? { ogTitle } : {}), ...(ogDescription ? { ogDescription } : {}), + ...(ogUrl ? { ogUrl } : {}), + ...(ogImage ? { ogImage } : {}), + ...(ogAudio ? { ogAudio } : {}), + ...(ogDeterminer ? { ogDeterminer } : {}), + ...(ogLocale ? { ogLocale } : {}), + ...(ogLocaleAlternate ? { ogLocaleAlternate } : {}), + ...(ogSiteName ? { ogSiteName } : {}), + ...(ogVideo ? { ogVideo } : {}), ...(dctermsCreated ? { dctermsCreated } : {}), ...(dcDateCreated ? { dcDateCreated } : {}), ...(dcDate ? { dcDate } : {}), @@ -99,7 +127,6 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata { ...(dctermsSubject ? { dctermsSubject } : {}), ...(dcSubject ? { dcSubject } : {}), ...(dcDescription ? { dcDescription } : {}), - ...(ogImage ? { ogImage } : {}), ...(dctermsKeywords ? { dctermsKeywords } : {}), ...(modifiedTime ? { modifiedTime } : {}), ...(publishedTime ? { publishedTime } : {}), diff --git a/apps/api/src/search/googlesearch.ts b/apps/api/src/search/googlesearch.ts new file mode 100644 index 00000000..fd3b6455 --- /dev/null +++ b/apps/api/src/search/googlesearch.ts @@ -0,0 +1,134 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; +import * as querystring from 'querystring'; +import { ScrapingBeeClient } from 'scrapingbee'; + +const _useragent_list = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0' +]; + +function get_useragent(): string { + return _useragent_list[Math.floor(Math.random() * _useragent_list.length)]; +} + +async function _req(term: string, results: number, lang: string, start: number, proxies: any, timeout: number) { + const resp = await axios.get("https://www.google.com/search", { + headers: { + "User-Agent": get_useragent() + }, + params: { + "q": term, + "num": results + 2, // Prevents multiple requests + "hl": lang, + }, + proxy: proxies, + timeout: timeout, + }); + return resp; +} + +class SearchResult { + url: string; + title: string; + description: string; + + constructor(url: string, title: string, description: string) { + this.url = url; + this.title = title; + this.description = description; + } + + toString(): string { + return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`; + } +} + +export async function search(term: string, advanced = false, num_results = 7, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000) { + const escaped_term = querystring.escape(term); + + let proxies = null; + if (proxy) { + if (proxy.startsWith("https")) { + proxies = {"https": proxy}; + } else { + proxies = {"http": proxy}; + } + } + + // const response = await _req_scraping_bee(escaped_term, num_results, lang); + // const $ = cheerio.load(response); + + // const knowledgeGraphElement = $("div.kno-rdesc"); + // console.log(knowledgeGraphElement); + // console.log(knowledgeGraphElement.html()); + + // let knowledgeGraph = null; + // if (knowledgeGraphElement.length > 0) { + // console.log("Knowledge Graph found"); + // const title = knowledgeGraphElement.find("h2").text(); + // const type = knowledgeGraphElement.find("div[data-attrid='subtitle']").text(); + // const website = knowledgeGraphElement.find("a[data-ved]").attr("href"); + // const imageUrl = knowledgeGraphElement.find("g-img img").attr("src"); + // const description = knowledgeGraphElement.find("div[data-attrid='description'] span").text(); + // const descriptionSource = knowledgeGraphElement.find("div[data-attrid='description'] a").text(); + // const descriptionLink = knowledgeGraphElement.find("div[data-attrid='description'] a").attr("href"); + // const attributes = {}; + // knowledgeGraphElement.find("div[data-attrid='kc:/common:sideways']").each((index, element) => { + // const attributeKey = $(element).find("span[data-attrid]").text(); + // const attributeValue = $(element).find("span[data-log-string]").text(); + // attributes[attributeKey] = attributeValue; + // }); + // knowledgeGraph = { + // "title": title, + // "type": type, + // "website": website, + // "imageUrl": imageUrl, + // "description": description, + // "descriptionSource": descriptionSource, + // "descriptionLink": descriptionLink, + // "attributes": attributes + // }; + // } + + let start = 0; + let results = []; + while (start < num_results) { + const resp = await _req(escaped_term, num_results - start, lang, start, proxies, timeout); + const $ = cheerio.load(resp.data); + const result_block = $("div.g"); + if (result_block.length === 0) { + start += 1; + } + result_block.each((index, element) => { + const linkElement = $(element).find("a"); + const link = linkElement && linkElement.attr("href") ? linkElement.attr("href") : null; + const title = $(element).find("h3"); + const ogImage = $(element).find("img").eq(1).attr("src"); + const description_box = $(element).find("div[style='-webkit-line-clamp:2']"); + const answerBox = $(element).find(".mod").text(); + if (description_box) { + const description = description_box.text(); + if (link && title && description) { + start += 1; + if (advanced) { + results.push(new SearchResult(link, title.text(), description)); + } else { + results.push(link); + } + } + } + }); + await new Promise(resolve => setTimeout(resolve, sleep_interval * 1000)); + + if (start === 0) { + return {results: []}; + } + } + return {results: results}; +} diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 5d778a22..c65140cb 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -44,6 +44,8 @@ export enum RateLimiterMode { CrawlStatus = "crawl-status", Scrape = "scrape", Preview = "preview", + Search = "search", + } export interface AuthResponse { From 5e3e2ec966e4c28120f52c037a9df8e93c58ff9b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 15:44:11 -0700 Subject: [PATCH 44/96] Nick: --- apps/api/src/controllers/search.ts | 59 ++++++++++++++++++------------ apps/api/src/lib/entities.ts | 5 +++ 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 7cd52095..bc6659b3 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -4,14 +4,15 @@ import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../types"; import { logJob } from "../services/logging/log_job"; -import { PageOptions } from "../lib/entities"; +import { PageOptions, SearchOptions } from "../lib/entities"; import { search } from "../search/googlesearch"; export async function searchHelper( req: Request, team_id: string, crawlerOptions: any, - pageOptions: PageOptions + pageOptions: PageOptions, + searchOptions: SearchOptions ): Promise<{ success: boolean; error?: string; @@ -19,39 +20,44 @@ export async function searchHelper( returnCode: number; }> { const query = req.body.query; + const advanced = false; if (!query) { return { success: false, error: "Query is required", returnCode: 400 }; } - const res = await search(query, true, 7); + const res = await search(query, advanced, searchOptions.limit ?? 7); let justSearch = pageOptions.fetchPageContent === false; - if(justSearch){ + if (justSearch) { return { success: true, data: res, returnCode: 200 }; } if (res.results.length === 0) { return { success: true, error: "No search results found", returnCode: 200 }; } + console.log(res.results); const a = new WebScraperDataProvider(); await a.setOptions({ mode: "single_urls", - urls: res.results.map((r) => r.url), + urls: res.results.map((r) => (!advanced ? r : r.url)), crawlerOptions: { ...crawlerOptions, }, - pageOptions: {...pageOptions, onlyMainContent: pageOptions?.onlyMainContent ?? true, fetchPageContent: pageOptions?.fetchPageContent ?? true, fallback:false}, + pageOptions: { + ...pageOptions, + onlyMainContent: pageOptions?.onlyMainContent ?? true, + fetchPageContent: pageOptions?.fetchPageContent ?? true, + fallback: false, + }, }); const docs = await a.getDocuments(true); - if (docs.length === 0) - { + if (docs.length === 0) { return { success: true, error: "No search results found", returnCode: 200 }; } - // make sure doc.content is not empty const filteredDocs = docs.filter( (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 @@ -61,18 +67,18 @@ export async function searchHelper( return { success: true, error: "No page found", returnCode: 200 }; } - const { success, credit_usage } = await billTeam( - team_id, - filteredDocs.length - ); - if (!success) { - return { - success: false, - error: - "Failed to bill team. Insufficient credits or subscription not found.", - returnCode: 402, - }; - } + const { success, credit_usage } = await billTeam( + team_id, + filteredDocs.length + ); + if (!success) { + return { + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, + }; + } return { success: true, @@ -93,9 +99,15 @@ export async function searchController(req: Request, res: Response) { return res.status(status).json({ error }); } const crawlerOptions = req.body.crawlerOptions ?? {}; - const pageOptions = req.body.pageOptions ?? { onlyMainContent: true, fetchPageContent: true, fallback: false}; + const pageOptions = req.body.pageOptions ?? { + onlyMainContent: true, + fetchPageContent: true, + fallback: false, + }; const origin = req.body.origin ?? "api"; + const searchOptions = req.body.searchOptions ?? { limit: 7 }; + try { const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); @@ -111,7 +123,8 @@ export async function searchController(req: Request, res: Response) { req, team_id, crawlerOptions, - pageOptions + pageOptions, + searchOptions ); const endTime = new Date().getTime(); const timeTakenInSeconds = (endTime - startTime) / 1000; diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 07f07e43..b4b5193e 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -14,6 +14,11 @@ export type PageOptions = { fallback?: boolean; fetchPageContent?: boolean; }; + +export type SearchOptions = { + limit?: number; +}; + export type WebScraperOptions = { urls: string[]; mode: "single_urls" | "sitemap" | "crawl"; From 495adc9a3f3b056b84abe101bb5633bb783d410d Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 15:48:37 -0700 Subject: [PATCH 45/96] Update googlesearch.ts --- apps/api/src/search/googlesearch.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/search/googlesearch.ts b/apps/api/src/search/googlesearch.ts index fd3b6455..c63c907b 100644 --- a/apps/api/src/search/googlesearch.ts +++ b/apps/api/src/search/googlesearch.ts @@ -24,7 +24,7 @@ async function _req(term: string, results: number, lang: string, start: number, }, params: { "q": term, - "num": results + 2, // Prevents multiple requests + "num": results, // Number of results to return "hl": lang, }, proxy: proxies, From 841279c74d96b87aac989b795d062eb83e9cdda9 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:49:00 -0700 Subject: [PATCH 46/96] Update README.md Added a reminder to star the repo with a graphic. --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f6b67b71..290ed9b0 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,17 @@ Crawl and convert any website into LLM-ready markdown. Build by [Mendable.ai](https://mendable.ai?ref=gfirecrawl) - *This repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository. The primary objective is to enhance the accuracy of LLM responses by utilizing clean data. It is not ready for full self-host yet - we're working on it* ## What is Firecrawl? [Firecrawl](https://firecrawl.dev?ref=github) is an API service that takes a URL, crawls it, and converts it into clean markdown. We crawl all accessible subpages and give you clean markdown for each. No sitemap required. +_Pst. hey, you, join our stargazers :)_ + + + + ## How to use it? We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like. From 8cb5d7955a36aec3f87ea91791cbfac51f4b6070 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 15:49:05 -0700 Subject: [PATCH 47/96] Update googlesearch.ts --- apps/api/src/search/googlesearch.ts | 71 +++++++++++++++-------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/apps/api/src/search/googlesearch.ts b/apps/api/src/search/googlesearch.ts index c63c907b..c835d084 100644 --- a/apps/api/src/search/googlesearch.ts +++ b/apps/api/src/search/googlesearch.ts @@ -61,40 +61,7 @@ export async function search(term: string, advanced = false, num_results = 7, la } } - // const response = await _req_scraping_bee(escaped_term, num_results, lang); - // const $ = cheerio.load(response); - - // const knowledgeGraphElement = $("div.kno-rdesc"); - // console.log(knowledgeGraphElement); - // console.log(knowledgeGraphElement.html()); - - // let knowledgeGraph = null; - // if (knowledgeGraphElement.length > 0) { - // console.log("Knowledge Graph found"); - // const title = knowledgeGraphElement.find("h2").text(); - // const type = knowledgeGraphElement.find("div[data-attrid='subtitle']").text(); - // const website = knowledgeGraphElement.find("a[data-ved]").attr("href"); - // const imageUrl = knowledgeGraphElement.find("g-img img").attr("src"); - // const description = knowledgeGraphElement.find("div[data-attrid='description'] span").text(); - // const descriptionSource = knowledgeGraphElement.find("div[data-attrid='description'] a").text(); - // const descriptionLink = knowledgeGraphElement.find("div[data-attrid='description'] a").attr("href"); - // const attributes = {}; - // knowledgeGraphElement.find("div[data-attrid='kc:/common:sideways']").each((index, element) => { - // const attributeKey = $(element).find("span[data-attrid]").text(); - // const attributeValue = $(element).find("span[data-log-string]").text(); - // attributes[attributeKey] = attributeValue; - // }); - // knowledgeGraph = { - // "title": title, - // "type": type, - // "website": website, - // "imageUrl": imageUrl, - // "description": description, - // "descriptionSource": descriptionSource, - // "descriptionLink": descriptionLink, - // "attributes": attributes - // }; - // } + // TODO: knowledge graph, answer box, etc. let start = 0; let results = []; @@ -132,3 +99,39 @@ export async function search(term: string, advanced = false, num_results = 7, la } return {results: results}; } + + +// const response = await _req_scraping_bee(escaped_term, num_results, lang); + // const $ = cheerio.load(response); + + // const knowledgeGraphElement = $("div.kno-rdesc"); + // console.log(knowledgeGraphElement); + // console.log(knowledgeGraphElement.html()); + + // let knowledgeGraph = null; + // if (knowledgeGraphElement.length > 0) { + // console.log("Knowledge Graph found"); + // const title = knowledgeGraphElement.find("h2").text(); + // const type = knowledgeGraphElement.find("div[data-attrid='subtitle']").text(); + // const website = knowledgeGraphElement.find("a[data-ved]").attr("href"); + // const imageUrl = knowledgeGraphElement.find("g-img img").attr("src"); + // const description = knowledgeGraphElement.find("div[data-attrid='description'] span").text(); + // const descriptionSource = knowledgeGraphElement.find("div[data-attrid='description'] a").text(); + // const descriptionLink = knowledgeGraphElement.find("div[data-attrid='description'] a").attr("href"); + // const attributes = {}; + // knowledgeGraphElement.find("div[data-attrid='kc:/common:sideways']").each((index, element) => { + // const attributeKey = $(element).find("span[data-attrid]").text(); + // const attributeValue = $(element).find("span[data-log-string]").text(); + // attributes[attributeKey] = attributeValue; + // }); + // knowledgeGraph = { + // "title": title, + // "type": type, + // "website": website, + // "imageUrl": imageUrl, + // "description": description, + // "descriptionSource": descriptionSource, + // "descriptionLink": descriptionLink, + // "attributes": attributes + // }; + // } \ No newline at end of file From 41263bb4b6deb17042d64ea34cab72159e1340dc Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 16:45:06 -0700 Subject: [PATCH 48/96] Nick: serper support --- apps/api/.env.example | 3 +- apps/api/src/controllers/search.ts | 12 ++- apps/api/src/lib/entities.ts | 3 + apps/api/src/search/googlesearch.ts | 152 +++++++++++++--------------- apps/api/src/search/index.ts | 45 ++++++++ apps/api/src/search/serper.ts | 27 +++++ 6 files changed, 157 insertions(+), 85 deletions(-) create mode 100644 apps/api/src/search/index.ts create mode 100644 apps/api/src/search/serper.ts diff --git a/apps/api/.env.example b/apps/api/.env.example index 34e24b1f..3bd06cd0 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -21,4 +21,5 @@ OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) BULL_AUTH_KEY= # LOGTAIL_KEY= # Use if you're configuring basic logging with logtail PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback -LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs \ No newline at end of file +LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs +SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api \ No newline at end of file diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index bc6659b3..6a1c7b4a 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -5,7 +5,7 @@ import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../types"; import { logJob } from "../services/logging/log_job"; import { PageOptions, SearchOptions } from "../lib/entities"; -import { search } from "../search/googlesearch"; +import { search } from "../search"; export async function searchHelper( req: Request, @@ -25,7 +25,10 @@ export async function searchHelper( return { success: false, error: "Query is required", returnCode: 400 }; } - const res = await search(query, advanced, searchOptions.limit ?? 7); + const tbs = searchOptions.tbs ?? null; + const filter = searchOptions.filter ?? null; + + const res = await search({query: query, advanced: advanced, num_results: searchOptions.limit ?? 7, tbs: tbs, filter: filter}); let justSearch = pageOptions.fetchPageContent === false; @@ -33,15 +36,14 @@ export async function searchHelper( return { success: true, data: res, returnCode: 200 }; } - if (res.results.length === 0) { + if (res.length === 0) { return { success: true, error: "No search results found", returnCode: 200 }; } - console.log(res.results); const a = new WebScraperDataProvider(); await a.setOptions({ mode: "single_urls", - urls: res.results.map((r) => (!advanced ? r : r.url)), + urls: res.map((r) => r), crawlerOptions: { ...crawlerOptions, }, diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index b4b5193e..062212b2 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -13,10 +13,13 @@ export type PageOptions = { onlyMainContent?: boolean; fallback?: boolean; fetchPageContent?: boolean; + }; export type SearchOptions = { limit?: number; + tbs?: string; + filter?: string; }; export type WebScraperOptions = { diff --git a/apps/api/src/search/googlesearch.ts b/apps/api/src/search/googlesearch.ts index c835d084..53227e69 100644 --- a/apps/api/src/search/googlesearch.ts +++ b/apps/api/src/search/googlesearch.ts @@ -1,7 +1,6 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; import * as querystring from 'querystring'; -import { ScrapingBeeClient } from 'scrapingbee'; const _useragent_list = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', @@ -17,20 +16,35 @@ function get_useragent(): string { return _useragent_list[Math.floor(Math.random() * _useragent_list.length)]; } -async function _req(term: string, results: number, lang: string, start: number, proxies: any, timeout: number) { - const resp = await axios.get("https://www.google.com/search", { - headers: { - "User-Agent": get_useragent() - }, - params: { - "q": term, - "num": results, // Number of results to return - "hl": lang, - }, - proxy: proxies, - timeout: timeout, - }); - return resp; +async function _req(term: string, results: number, lang: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) { + const params = { + "q": term, + "num": results, // Number of results to return + "hl": lang, + "start": start, + }; + if (tbs) { + params["tbs"] = tbs; + } + if (filter) { + params["filter"] = filter; + } + try { + const resp = await axios.get("https://www.google.com/search", { + headers: { + "User-Agent": get_useragent() + }, + params: params, + proxy: proxies, + timeout: timeout, + }); + return resp; + } catch (error) { + if (error.response && error.response.status === 429) { + throw new Error('Google Search: Too many requests, try again later.'); + } + throw error; + } } class SearchResult { @@ -49,7 +63,7 @@ class SearchResult { } } -export async function search(term: string, advanced = false, num_results = 7, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000) { +export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise { const escaped_term = querystring.escape(term); let proxies = null; @@ -64,74 +78,54 @@ export async function search(term: string, advanced = false, num_results = 7, la // TODO: knowledge graph, answer box, etc. let start = 0; - let results = []; - while (start < num_results) { - const resp = await _req(escaped_term, num_results - start, lang, start, proxies, timeout); - const $ = cheerio.load(resp.data); - const result_block = $("div.g"); - if (result_block.length === 0) { - start += 1; - } - result_block.each((index, element) => { - const linkElement = $(element).find("a"); - const link = linkElement && linkElement.attr("href") ? linkElement.attr("href") : null; - const title = $(element).find("h3"); - const ogImage = $(element).find("img").eq(1).attr("src"); - const description_box = $(element).find("div[style='-webkit-line-clamp:2']"); - const answerBox = $(element).find(".mod").text(); - if (description_box) { - const description = description_box.text(); - if (link && title && description) { - start += 1; - if (advanced) { - results.push(new SearchResult(link, title.text(), description)); - } else { - results.push(link); + let results : string[] = []; + let attempts = 0; + const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop + while (start < num_results && attempts < maxAttempts) { + try { + const resp = await _req(escaped_term, num_results - start, lang, start, proxies, timeout, tbs, filter); + const $ = cheerio.load(resp.data); + const result_block = $("div.g"); + if (result_block.length === 0) { + start += 1; + attempts += 1; + } else { + attempts = 0; // Reset attempts if we have results + } + result_block.each((index, element) => { + const linkElement = $(element).find("a"); + const link = linkElement && linkElement.attr("href") ? linkElement.attr("href") : null; + const title = $(element).find("h3"); + const ogImage = $(element).find("img").eq(1).attr("src"); + const description_box = $(element).find("div[style='-webkit-line-clamp:2']"); + const answerBox = $(element).find(".mod").text(); + if (description_box) { + const description = description_box.text(); + if (link && title && description) { + start += 1; + if (advanced) { + // results.push(new SearchResult(link, title.text(), description)); + } else { + results.push(link); + } } } + }); + await new Promise(resolve => setTimeout(resolve, sleep_interval * 1000)); + } catch (error) { + if (error.message === 'Too many requests') { + console.warn('Too many requests, breaking the loop'); + break; } - }); - await new Promise(resolve => setTimeout(resolve, sleep_interval * 1000)); + throw error; + } if (start === 0) { - return {results: []}; + return results; } } - return {results: results}; + if (attempts >= maxAttempts) { + console.warn('Max attempts reached, breaking the loop'); + } + return results } - - -// const response = await _req_scraping_bee(escaped_term, num_results, lang); - // const $ = cheerio.load(response); - - // const knowledgeGraphElement = $("div.kno-rdesc"); - // console.log(knowledgeGraphElement); - // console.log(knowledgeGraphElement.html()); - - // let knowledgeGraph = null; - // if (knowledgeGraphElement.length > 0) { - // console.log("Knowledge Graph found"); - // const title = knowledgeGraphElement.find("h2").text(); - // const type = knowledgeGraphElement.find("div[data-attrid='subtitle']").text(); - // const website = knowledgeGraphElement.find("a[data-ved]").attr("href"); - // const imageUrl = knowledgeGraphElement.find("g-img img").attr("src"); - // const description = knowledgeGraphElement.find("div[data-attrid='description'] span").text(); - // const descriptionSource = knowledgeGraphElement.find("div[data-attrid='description'] a").text(); - // const descriptionLink = knowledgeGraphElement.find("div[data-attrid='description'] a").attr("href"); - // const attributes = {}; - // knowledgeGraphElement.find("div[data-attrid='kc:/common:sideways']").each((index, element) => { - // const attributeKey = $(element).find("span[data-attrid]").text(); - // const attributeValue = $(element).find("span[data-log-string]").text(); - // attributes[attributeKey] = attributeValue; - // }); - // knowledgeGraph = { - // "title": title, - // "type": type, - // "website": website, - // "imageUrl": imageUrl, - // "description": description, - // "descriptionSource": descriptionSource, - // "descriptionLink": descriptionLink, - // "attributes": attributes - // }; - // } \ No newline at end of file diff --git a/apps/api/src/search/index.ts b/apps/api/src/search/index.ts new file mode 100644 index 00000000..0f3a5967 --- /dev/null +++ b/apps/api/src/search/index.ts @@ -0,0 +1,45 @@ +import { google_search } from "./googlesearch"; +import { serper_search } from "./serper"; + +export async function search({ + query, + advanced = false, + num_results = 7, + tbs = null, + filter = null, + lang = "en", + proxy = null, + sleep_interval = 0, + timeout = 5000, +}: { + query: string; + advanced?: boolean; + num_results?: number; + tbs?: string; + filter?: string; + lang?: string; + proxy?: string; + sleep_interval?: number; + timeout?: number; +}) { + try { + if (process.env.SERPER_API_KEY) { + return await serper_search(query, num_results); + } + return await google_search( + query, + advanced, + num_results, + tbs, + filter, + lang, + proxy, + sleep_interval, + timeout + ); + } catch (error) { + console.error("Error in search function: ", error); + return [] + } + // if process.env.SERPER_API_KEY is set, use serper +} diff --git a/apps/api/src/search/serper.ts b/apps/api/src/search/serper.ts new file mode 100644 index 00000000..f92f2fc5 --- /dev/null +++ b/apps/api/src/search/serper.ts @@ -0,0 +1,27 @@ +import axios from "axios"; +import dotenv from "dotenv"; + +dotenv.config(); + +export async function serper_search(q, num_results) : Promise { + let data = JSON.stringify({ + q: q, + "num": num_results + }); + + let config = { + method: "POST", + url: "https://google.serper.dev/search", + headers: { + "X-API-KEY": process.env.SERPER_API_KEY, + "Content-Type": "application/json", + }, + data: data, + }; + const response = await axios(config); + if (response && response.data && Array.isArray(response.data.organic)) { + return response.data.organic.map((a) => a.link); + } else { + return []; + } +} From f3c190c21ced7b87989abbbb4e7180653c820aad Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 16:47:24 -0700 Subject: [PATCH 49/96] Nick: --- apps/api/src/__tests__/e2e_noAuth/index.test.ts | 6 +++--- apps/api/src/__tests__/e2e_withAuth/index.test.ts | 6 +++--- apps/api/src/controllers/crawl.ts | 2 +- apps/api/src/controllers/crawlPreview.ts | 2 +- apps/api/src/controllers/scrape.ts | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts index f76a8dc1..b2b29383 100644 --- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts @@ -62,7 +62,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); }); it("should return a successful response", async () => { @@ -87,7 +87,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); }); it("should return a successful response", async () => { @@ -116,7 +116,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); }); it("should return a successful response", async () => { diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index 578a0335..a165ae22 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -56,7 +56,7 @@ const TEST_URL = "http://127.0.0.1:3002"; .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); }); it("should return a successful response with a valid preview token", async () => { @@ -106,7 +106,7 @@ const TEST_URL = "http://127.0.0.1:3002"; .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); }); it("should return a successful response with a valid API key", async () => { @@ -151,7 +151,7 @@ const TEST_URL = "http://127.0.0.1:3002"; .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain("URL is blocked due to policy restrictions"); + expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); }); it("should return a successful response with a valid API key", async () => { diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 9301c4d9..3d64f7f4 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -30,7 +30,7 @@ export async function crawlController(req: Request, res: Response) { } if (isUrlBlocked(url)) { - return res.status(403).json({ error: "URL is blocked due to policy restrictions" }); + return res.status(403).json({ error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." }); } const mode = req.body.mode ?? "crawl"; diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts index 4c401974..569be333 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/crawlPreview.ts @@ -21,7 +21,7 @@ export async function crawlPreviewController(req: Request, res: Response) { } if (isUrlBlocked(url)) { - return res.status(403).json({ error: "URL is blocked due to policy restrictions" }); + return res.status(403).json({ error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." }); } const mode = req.body.mode ?? "crawl"; diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index d24c882f..cfe35b5b 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -24,7 +24,7 @@ export async function scrapeHelper( } if (isUrlBlocked(url)) { - return { success: false, error: "URL is blocked due to policy restrictions", returnCode: 403 }; + return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 }; } const a = new WebScraperDataProvider(); From e6779aff6824282c2cfdeaaa016a0f3512202216 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 16:56:09 -0700 Subject: [PATCH 50/96] Nick: tests --- .../src/__tests__/e2e_noAuth/index.test.ts | 27 ++++++++++++++++++ .../src/__tests__/e2e_withAuth/index.test.ts | 28 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts index e0aca36f..dfe6aeba 100644 --- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts @@ -102,6 +102,33 @@ describe("E2E Tests for API Routes with No Authentication", () => { }); }); + describe("POST /v0/search", () => { + it("should require not authorization", async () => { + const response = await request(TEST_URL).post("/v0/search"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return no error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/search") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ query: "test" }); + expect(response.statusCode).not.toBe(401); + }); + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/search") + .set("Content-Type", "application/json") + .send({ query: "test" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("success"); + expect(response.body.success).toBe(true); + expect(response.body).toHaveProperty("data"); + }); + }); + describe("GET /v0/crawl/status/:jobId", () => { it("should not require authorization", async () => { const response = await request(TEST_URL).get("/v0/crawl/status/123"); diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index ba01a7ca..f0887eba 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -133,6 +133,34 @@ const TEST_URL = "http://127.0.0.1:3002"; }); }); + describe("POST /v0/search", () => { + it("should require authorization", async () => { + const response = await request(TEST_URL).post("/v0/search"); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/search") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ query: "test" }); + expect(response.statusCode).toBe(401); + }); + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/search") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ query: "test" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("success"); + expect(response.body.success).toBe(true); + expect(response.body).toHaveProperty("data"); + }, 20000); + }); + describe("GET /v0/crawl/status/:jobId", () => { it("should require authorization", async () => { const response = await request(TEST_URL).get("/v0/crawl/status/123"); From 4328a68ec19049caba40ffdb3d442ba915483454 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 16:57:53 -0700 Subject: [PATCH 51/96] Nick: --- apps/api/src/__tests__/e2e_noAuth/index.test.ts | 4 ++-- apps/api/src/__tests__/e2e_withAuth/index.test.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts index dfe6aeba..37eeb0e6 100644 --- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts @@ -117,7 +117,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { expect(response.statusCode).not.toBe(401); }); - it("should return a successful response with a valid API key", async () => { + it("should return a successful response without a valid API key", async () => { const response = await request(TEST_URL) .post("/v0/search") .set("Content-Type", "application/json") @@ -126,7 +126,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { expect(response.body).toHaveProperty("success"); expect(response.body.success).toBe(true); expect(response.body).toHaveProperty("data"); - }); + }, 20000); }); describe("GET /v0/crawl/status/:jobId", () => { diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index f0887eba..59dfde24 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -158,7 +158,7 @@ const TEST_URL = "http://127.0.0.1:3002"; expect(response.body).toHaveProperty("success"); expect(response.body.success).toBe(true); expect(response.body).toHaveProperty("data"); - }, 20000); + }, 20000); }); describe("GET /v0/crawl/status/:jobId", () => { From f0695c712307b06bde55e251f799373882b6a7ad Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 17:04:10 -0700 Subject: [PATCH 52/96] Update single_url.ts --- apps/api/src/scraper/WebScraper/single_url.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index fcbb688b..e110b0eb 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -23,13 +23,14 @@ export async function scrapWithCustomFirecrawl( export async function scrapWithScrapingBee( url: string, - wait_browser: string = "domcontentloaded" + wait_browser: string = "domcontentloaded", + timeout: number = 15000 ): Promise { try { const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY); const response = await client.get({ url: url, - params: { timeout: 15000, wait_browser: wait_browser }, + params: { timeout: timeout, wait_browser: wait_browser }, headers: { "ScrapingService-Request": "TRUE" }, }); @@ -106,11 +107,11 @@ export async function scrapSingleUrl( let text = ""; switch (method) { case "firecrawl-scraper": - text = await scrapWithCustomFirecrawl(url); + text = await scrapWithCustomFirecrawl(url,); break; case "scrapingBee": if (process.env.SCRAPING_BEE_API_KEY) { - text = await scrapWithScrapingBee(url); + text = await scrapWithScrapingBee(url,"domcontentloaded", pageOptions.fallback === false? 7000 : 15000); } break; case "playwright": From 53cc4c396fea229ac87004e822f2228a090feb5c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 17:05:58 -0700 Subject: [PATCH 53/96] Update search.ts --- apps/api/src/controllers/search.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 6a1c7b4a..4c03644a 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -6,6 +6,7 @@ import { RateLimiterMode } from "../types"; import { logJob } from "../services/logging/log_job"; import { PageOptions, SearchOptions } from "../lib/entities"; import { search } from "../search"; +import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; export async function searchHelper( req: Request, @@ -28,7 +29,7 @@ export async function searchHelper( const tbs = searchOptions.tbs ?? null; const filter = searchOptions.filter ?? null; - const res = await search({query: query, advanced: advanced, num_results: searchOptions.limit ?? 7, tbs: tbs, filter: filter}); + let res = await search({query: query, advanced: advanced, num_results: searchOptions.limit ?? 7, tbs: tbs, filter: filter}); let justSearch = pageOptions.fetchPageContent === false; @@ -40,6 +41,9 @@ export async function searchHelper( return { success: true, error: "No search results found", returnCode: 200 }; } + // filter out social media links + res = res.filter((r) => !isUrlBlocked(r)); + const a = new WebScraperDataProvider(); await a.setOptions({ mode: "single_urls", From 3abfd6b4c19d9ce14c6a5b8dea47dda16f6383d0 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 17:06:48 -0700 Subject: [PATCH 54/96] Update search.ts --- apps/api/src/controllers/search.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 4c03644a..f18f1c5e 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -37,12 +37,13 @@ export async function searchHelper( return { success: true, data: res, returnCode: 200 }; } + res = res.filter((r) => !isUrlBlocked(r)); + if (res.length === 0) { return { success: true, error: "No search results found", returnCode: 200 }; } // filter out social media links - res = res.filter((r) => !isUrlBlocked(r)); const a = new WebScraperDataProvider(); await a.setOptions({ From fdb2789eaa302b2f90bed7f1dad6dcc95613cb1f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 17:14:34 -0700 Subject: [PATCH 55/96] Nick: added url as return param --- apps/api/src/lib/entities.ts | 1 + apps/api/src/scraper/WebScraper/single_url.ts | 2 ++ 2 files changed, 3 insertions(+) diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 062212b2..fdc1c613 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -40,6 +40,7 @@ export type WebScraperOptions = { export class Document { id?: string; + url?: string; // Used only in /search for now content: string; markdown?: string; createdAt?: Date; diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index e110b0eb..6ab30036 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -154,10 +154,12 @@ export async function scrapSingleUrl( // } let [text, html] = await attemptScraping(urlToScrap, "scrapingBee"); + // Basically means that it is using /search endpoint if(pageOptions.fallback === false){ const soup = cheerio.load(html); const metadata = extractMetadata(soup, urlToScrap); return { + url: urlToScrap, content: text, markdown: text, metadata: { ...metadata, sourceURL: urlToScrap }, From 479fa2f7f8862e6e69b8a2f47a928ddc1cf0808c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 17:46:32 -0700 Subject: [PATCH 56/96] Nick: --- apps/api/src/search/index.ts | 2 +- apps/api/src/search/serper.ts | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/search/index.ts b/apps/api/src/search/index.ts index 0f3a5967..ae624516 100644 --- a/apps/api/src/search/index.ts +++ b/apps/api/src/search/index.ts @@ -23,7 +23,7 @@ export async function search({ timeout?: number; }) { try { - if (process.env.SERPER_API_KEY) { + if (process.env.SERPER_API_KEY && !tbs) { return await serper_search(query, num_results); } return await google_search( diff --git a/apps/api/src/search/serper.ts b/apps/api/src/search/serper.ts index f92f2fc5..2b4ba02e 100644 --- a/apps/api/src/search/serper.ts +++ b/apps/api/src/search/serper.ts @@ -6,7 +6,8 @@ dotenv.config(); export async function serper_search(q, num_results) : Promise { let data = JSON.stringify({ q: q, - "num": num_results + "num": num_results, + }); let config = { From 3b5b868d0da4a55afa9e50f3b34dc7d02d4f3a16 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 18:13:58 -0700 Subject: [PATCH 57/96] Update requests.http --- apps/api/requests.http | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/api/requests.http b/apps/api/requests.http index 9a972de5..1dbeaebe 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -14,7 +14,7 @@ GET http://localhost:3002/v0/jobs/active HTTP/1.1 ### Scrape Website POST http://localhost:3002/v0/crawl HTTP/1.1 -Authorization: Bearer +Authorization: Bearer fc-879f515fdd5b418b8d55ec6ccb1acd46 content-type: application/json { @@ -25,6 +25,10 @@ content-type: application/json } + + + + ### Scrape Website POST http://localhost:3002/v0/scrape HTTP/1.1 Authorization: Bearer @@ -37,8 +41,8 @@ content-type: application/json ### Check Job Status -GET http://localhost:3002/v0/crawl/status/4dbf2b62-487d-45d7-a4f7-8f5e883dfecd HTTP/1.1 -Authorization: Bearer +GET http://localhost:3002/v0/crawl/status/a6053912-d602-4709-841f-3d2cb46fea0a HTTP/1.1 +Authorization: Bearer fc-879f515fdd5b418b8d55ec6ccb1acd46 ### Get Job Result From 07e93ee5fd5bee4cb7d54f825bccd5cd1574a7ae Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:32:35 -0300 Subject: [PATCH 58/96] Update requests.http --- apps/api/requests.http | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/api/requests.http b/apps/api/requests.http index 1dbeaebe..495df975 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -14,7 +14,7 @@ GET http://localhost:3002/v0/jobs/active HTTP/1.1 ### Scrape Website POST http://localhost:3002/v0/crawl HTTP/1.1 -Authorization: Bearer fc-879f515fdd5b418b8d55ec6ccb1acd46 +Authorization: Bearer content-type: application/json { @@ -29,6 +29,8 @@ content-type: application/json + + ### Scrape Website POST http://localhost:3002/v0/scrape HTTP/1.1 Authorization: Bearer @@ -42,7 +44,7 @@ content-type: application/json ### Check Job Status GET http://localhost:3002/v0/crawl/status/a6053912-d602-4709-841f-3d2cb46fea0a HTTP/1.1 -Authorization: Bearer fc-879f515fdd5b418b8d55ec6ccb1acd46 +Authorization: Bearer ### Get Job Result From 307ea6f5ec48760715f75939b269a1d5a1078eaa Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 10:11:01 -0700 Subject: [PATCH 59/96] Nick: improvements to search --- apps/api/src/controllers/search.ts | 4 ++-- apps/api/src/lib/entities.ts | 17 +++++++++++++++++ apps/api/src/search/googlesearch.ts | 25 ++++--------------------- apps/api/src/search/index.ts | 3 ++- apps/api/src/search/serper.ts | 14 +++++++++----- 5 files changed, 34 insertions(+), 29 deletions(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index f18f1c5e..28169c05 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -37,7 +37,7 @@ export async function searchHelper( return { success: true, data: res, returnCode: 200 }; } - res = res.filter((r) => !isUrlBlocked(r)); + res = res.filter((r) => !isUrlBlocked(r.url)); if (res.length === 0) { return { success: true, error: "No search results found", returnCode: 200 }; @@ -48,7 +48,7 @@ export async function searchHelper( const a = new WebScraperDataProvider(); await a.setOptions({ mode: "single_urls", - urls: res.map((r) => r), + urls: res.map((r) => r.url), crawlerOptions: { ...crawlerOptions, }, diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 1144c63d..bda74486 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -71,3 +71,20 @@ export class Document { this.provider = data.provider || undefined; } } + + +export class SearchResult { + url: string; + title: string; + description: string; + + constructor(url: string, title: string, description: string) { + this.url = url; + this.title = title; + this.description = description; + } + + toString(): string { + return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`; + } +} \ No newline at end of file diff --git a/apps/api/src/search/googlesearch.ts b/apps/api/src/search/googlesearch.ts index 53227e69..0f7c72f4 100644 --- a/apps/api/src/search/googlesearch.ts +++ b/apps/api/src/search/googlesearch.ts @@ -1,6 +1,7 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; import * as querystring from 'querystring'; +import { SearchResult } from '../../src/lib/entities'; const _useragent_list = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', @@ -47,23 +48,9 @@ async function _req(term: string, results: number, lang: string, start: number, } } -class SearchResult { - url: string; - title: string; - description: string; - constructor(url: string, title: string, description: string) { - this.url = url; - this.title = title; - this.description = description; - } - toString(): string { - return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`; - } -} - -export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise { +export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise { const escaped_term = querystring.escape(term); let proxies = null; @@ -78,7 +65,7 @@ export async function google_search(term: string, advanced = false, num_results // TODO: knowledge graph, answer box, etc. let start = 0; - let results : string[] = []; + let results : SearchResult[] = []; let attempts = 0; const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop while (start < num_results && attempts < maxAttempts) { @@ -103,11 +90,7 @@ export async function google_search(term: string, advanced = false, num_results const description = description_box.text(); if (link && title && description) { start += 1; - if (advanced) { - // results.push(new SearchResult(link, title.text(), description)); - } else { - results.push(link); - } + results.push(new SearchResult(link, title.text(), description)); } } }); diff --git a/apps/api/src/search/index.ts b/apps/api/src/search/index.ts index ae624516..5a6a3d89 100644 --- a/apps/api/src/search/index.ts +++ b/apps/api/src/search/index.ts @@ -1,3 +1,4 @@ +import { SearchResult } from "../../src/lib/entities"; import { google_search } from "./googlesearch"; import { serper_search } from "./serper"; @@ -21,7 +22,7 @@ export async function search({ proxy?: string; sleep_interval?: number; timeout?: number; -}) { +}) : Promise { try { if (process.env.SERPER_API_KEY && !tbs) { return await serper_search(query, num_results); diff --git a/apps/api/src/search/serper.ts b/apps/api/src/search/serper.ts index 2b4ba02e..f8806b7a 100644 --- a/apps/api/src/search/serper.ts +++ b/apps/api/src/search/serper.ts @@ -1,13 +1,13 @@ import axios from "axios"; import dotenv from "dotenv"; +import { SearchResult } from "../../src/lib/entities"; dotenv.config(); -export async function serper_search(q, num_results) : Promise { +export async function serper_search(q, num_results): Promise { let data = JSON.stringify({ q: q, - "num": num_results, - + num: num_results, }); let config = { @@ -21,8 +21,12 @@ export async function serper_search(q, num_results) : Promise { }; const response = await axios(config); if (response && response.data && Array.isArray(response.data.organic)) { - return response.data.organic.map((a) => a.link); - } else { + return response.data.organic.map((a) => ({ + url: a.link, + title: a.title, + description: a.snippet, + })); + }else{ return []; } } From 877af4231bdb0e1d773cfb870b72a5b9dc3502e4 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 10:11:44 -0700 Subject: [PATCH 60/96] Update openapi.json --- apps/api/openapi.json | 116 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/apps/api/openapi.json b/apps/api/openapi.json index 3916738b..dd325fa2 100644 --- a/apps/api/openapi.json +++ b/apps/api/openapi.json @@ -171,6 +171,81 @@ } } }, + "/search": { + "post": { + "summary": "Search for a keyword in Google, returns top page results with markdown content for each page", + "operationId": "searchGoogle", + "tags": ["Search"], + "security": [ + { + "bearerAuth": [] + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "format": "uri", + "description": "The URL to scrape" + }, + "pageOptions": { + "type": "object", + "properties": { + "onlyMainContent": { + "type": "boolean", + "description": "Only return the main content of the page excluding headers, navs, footers, etc.", + "default": false + }, + "fetchPageContent": { + "type": "boolean", + "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.", + "default": true + } + } + }, + "searchOptions": { + "type": "object", + "properties": { + "limit": { + "type": "integer", + "description": "Maximum number of results. Max is 20 during beta." + } + } + } + }, + "required": ["query"] + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SearchResponse" + } + } + } + }, + "402": { + "description": "Payment required" + }, + "429": { + "description": "Too many requests" + }, + "500": { + "description": "Server error" + } + } + } + }, "/crawl/status/{jobId}": { "get": { "tags": ["Crawl"], @@ -262,12 +337,53 @@ "data": { "type": "object", "properties": { + "markdown": { + "type": "string" + }, "content": { "type": "string" }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "language": { + "type": "string", + "nullable": true + }, + "sourceURL": { + "type": "string", + "format": "uri" + } + } + } + } + } + } + }, + "SearchResponse": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "data": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, "markdown": { "type": "string" }, + "content": { + "type": "string" + }, "metadata": { "type": "object", "properties": { From 3d18f2f7a0bb178a0103ccf0e7e4eddb570f4e66 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 10:16:23 -0700 Subject: [PATCH 61/96] Update README.md --- README.md | 109 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 290ed9b0..2b274133 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Crawl and convert any website into LLM-ready markdown. Build by [Mendable.ai](https://mendable.ai?ref=gfirecrawl) -*This repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository. The primary objective is to enhance the accuracy of LLM responses by utilizing clean data. It is not ready for full self-host yet - we're working on it* +_This repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository. The primary objective is to enhance the accuracy of LLM responses by utilizing clean data. It is not ready for full self-host yet - we're working on it_ ## What is Firecrawl? @@ -12,25 +12,23 @@ _Pst. hey, you, join our stargazers :)_ - ## How to use it? -We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like. +We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like. - [x] [API](https://firecrawl.dev/playground) - [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk) -- [X] [Node SDK](https://github.com/mendableai/firecrawl/tree/main/apps/js-sdk) +- [x] [Node SDK](https://github.com/mendableai/firecrawl/tree/main/apps/js-sdk) - [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/) - [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/latest/examples/data_connectors/WebPageDemo/#using-firecrawl-reader) - [ ] LangchainJS - Coming Soon - To run locally, refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md). ### API Key To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and get an API key. - + ### Crawling Used to crawl a URL and all accessible subpages. This submits a crawl job and returns a job ID to check the status of the crawl. @@ -62,22 +60,89 @@ curl -X GET https://api.firecrawl.dev/v0/crawl/status/1234-5678-9101 \ ```json { - "status": "completed", - "current": 22, - "total": 22, - "data": [ - { - "content": "Raw Content ", - "markdown": "# Markdown Content", - "provider": "web-scraper", - "metadata": { - "title": "Mendable | AI for CX and Sales", - "description": "AI for CX and Sales", - "language": null, - "sourceURL": "https://www.mendable.ai/", - } - } - ] + "status": "completed", + "current": 22, + "total": 22, + "data": [ + { + "content": "Raw Content ", + "markdown": "# Markdown Content", + "provider": "web-scraper", + "metadata": { + "title": "Mendable | AI for CX and Sales", + "description": "AI for CX and Sales", + "language": null, + "sourceURL": "https://www.mendable.ai/" + } + } + ] +} +``` + +### Scraping + +Used to scrape a URL and get its content. + +```bash +curl -X POST https://api.firecrawl.dev/v0/scrape \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "url": "https://mendable.ai" + }' +``` + +Response: + +```json +{ + "success": true, + "data": { + "content": "Raw Content ", + "markdown": "# Markdown Content", + "provider": "web-scraper", + "metadata": { + "title": "Mendable | AI for CX and Sales", + "description": "AI for CX and Sales", + "language": null, + "sourceURL": "https://www.mendable.ai/" + } + } +} +``` + +### Search (Preview) + +Used to search the web, get the most relevant results, scrap each page and return the markdown. + +```bash +curl -X POST https://api.firecrawl.dev/v0/search \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "query": "firecrawl", + "pageOptions": { + "fetchPageContent": true // false for a fast serp api + } + }' +``` + +```json +{ + "success": true, + "data": [ + { + "url": "https://mendable.ai", + "markdown": "# Markdown Content", + "provider": "web-scraper", + "metadata": { + "title": "Mendable | AI for CX and Sales", + "description": "AI for CX and Sales", + "language": null, + "sourceURL": "https://www.mendable.ai/" + } + } + ] } ``` From e7d385ad323eaac609f947ba44965658c359b7c2 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 10:23:26 -0700 Subject: [PATCH 62/96] Update search.ts --- apps/api/src/controllers/search.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 28169c05..6839d8a0 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -138,12 +138,12 @@ export async function searchController(req: Request, res: Response) { logJob({ success: result.success, message: result.error, - num_docs: 1, - docs: [result.data], + num_docs: result.data.length, + docs: result.data, time_taken: timeTakenInSeconds, team_id: team_id, mode: "search", - url: req.body.url, + url: req.body.query, crawlerOptions: crawlerOptions, pageOptions: pageOptions, origin: origin, From 427f658c4457e98c003717b597f77cd260a3ec68 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 10:40:07 -0700 Subject: [PATCH 63/96] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b274133..a6cd240c 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ Response: } ``` -### Search (Preview) +### Search (Beta) Used to search the web, get the most relevant results, scrap each page and return the markdown. From d0a70de0620b6d6b99b6bd5e37ac6ab9e132106b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 11:46:25 -0700 Subject: [PATCH 64/96] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a6cd240c..5d695a2e 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,8 @@ curl -X POST https://api.firecrawl.dev/v0/search \ } ``` +Coming soon to the SDKs and Integrations. + ## Using Python SDK ### Installing Python SDK From 26c861db5aabc197ba7556c3ea70860d549bc4b1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 16:13:29 -0700 Subject: [PATCH 65/96] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d695a2e..c48ef10e 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,8 @@ We provide an easy to use API with our hosted version. You can find the playgrou - [x] [Node SDK](https://github.com/mendableai/firecrawl/tree/main/apps/js-sdk) - [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/) - [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/latest/examples/data_connectors/WebPageDemo/#using-firecrawl-reader) -- [ ] LangchainJS - Coming Soon +- [X] [Langchain JS Integration 🦜🔗](https://js.langchain.com/docs/integrations/document_loaders/web_loaders/firecrawl) +- [ ] Want an SDK or Integration? Let us know by opening an issue. To run locally, refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md). From f2690f69094e3edff1f3b5d7c6ed146329d5b270 Mon Sep 17 00:00:00 2001 From: Roger M Date: Thu, 25 Apr 2024 01:35:17 +0100 Subject: [PATCH 66/96] Support for tbs, filter, lang, country and location with Serper search. --- apps/api/src/controllers/search.ts | 11 ++++++++++- apps/api/src/lib/entities.ts | 3 +++ apps/api/src/search/googlesearch.ts | 7 ++++--- apps/api/src/search/index.ts | 7 ++++++- apps/api/src/search/serper.ts | 17 +++++++++++++++-- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 6839d8a0..bc81f69e 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -29,7 +29,16 @@ export async function searchHelper( const tbs = searchOptions.tbs ?? null; const filter = searchOptions.filter ?? null; - let res = await search({query: query, advanced: advanced, num_results: searchOptions.limit ?? 7, tbs: tbs, filter: filter}); + let res = await search({ + query: query, + advanced: advanced, + num_results: searchOptions.limit ?? 7, + tbs: tbs, + filter: filter, + lang: searchOptions.lang ?? "en", + country: searchOptions.country ?? "us", + location: searchOptions.location, + }); let justSearch = pageOptions.fetchPageContent === false; diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index bda74486..7b46305c 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -20,6 +20,9 @@ export type SearchOptions = { limit?: number; tbs?: string; filter?: string; + lang?: string; + country?: string; + location?: string; }; export type WebScraperOptions = { diff --git a/apps/api/src/search/googlesearch.ts b/apps/api/src/search/googlesearch.ts index 0f7c72f4..a6d09ed6 100644 --- a/apps/api/src/search/googlesearch.ts +++ b/apps/api/src/search/googlesearch.ts @@ -17,11 +17,12 @@ function get_useragent(): string { return _useragent_list[Math.floor(Math.random() * _useragent_list.length)]; } -async function _req(term: string, results: number, lang: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) { +async function _req(term: string, results: number, lang: string, country: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) { const params = { "q": term, "num": results, // Number of results to return "hl": lang, + "gl": country, "start": start, }; if (tbs) { @@ -50,7 +51,7 @@ async function _req(term: string, results: number, lang: string, start: number, -export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise { +export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise { const escaped_term = querystring.escape(term); let proxies = null; @@ -70,7 +71,7 @@ export async function google_search(term: string, advanced = false, num_results const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop while (start < num_results && attempts < maxAttempts) { try { - const resp = await _req(escaped_term, num_results - start, lang, start, proxies, timeout, tbs, filter); + const resp = await _req(escaped_term, num_results - start, lang, country, start, proxies, timeout, tbs, filter); const $ = cheerio.load(resp.data); const result_block = $("div.g"); if (result_block.length === 0) { diff --git a/apps/api/src/search/index.ts b/apps/api/src/search/index.ts index 5a6a3d89..f365811c 100644 --- a/apps/api/src/search/index.ts +++ b/apps/api/src/search/index.ts @@ -9,6 +9,8 @@ export async function search({ tbs = null, filter = null, lang = "en", + country = "us", + location = undefined, proxy = null, sleep_interval = 0, timeout = 5000, @@ -19,13 +21,15 @@ export async function search({ tbs?: string; filter?: string; lang?: string; + country?: string; + location?: string; proxy?: string; sleep_interval?: number; timeout?: number; }) : Promise { try { if (process.env.SERPER_API_KEY && !tbs) { - return await serper_search(query, num_results); + return await serper_search(query, {num_results, tbs, filter, lang, country, location}); } return await google_search( query, @@ -34,6 +38,7 @@ export async function search({ tbs, filter, lang, + country, proxy, sleep_interval, timeout diff --git a/apps/api/src/search/serper.ts b/apps/api/src/search/serper.ts index f8806b7a..be716367 100644 --- a/apps/api/src/search/serper.ts +++ b/apps/api/src/search/serper.ts @@ -4,10 +4,23 @@ import { SearchResult } from "../../src/lib/entities"; dotenv.config(); -export async function serper_search(q, num_results): Promise { +export async function serper_search(q, options: { + tbs?: string; + filter?: string; + lang?: string; + country?: string; + location?: string; + num_results: number; + page?: number; +}): Promise { let data = JSON.stringify({ q: q, - num: num_results, + hl: options.lang, + gl: options.country, + location: options.location, + tbs: options.tbs, + num: options.num_results, + page: options.page ?? 1, }); let config = { From a59ddf1855a8bf6fea69d0619ec35fabe2636692 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 24 Apr 2024 18:00:25 -0700 Subject: [PATCH 67/96] Nick: default to serper --- apps/api/src/search/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/search/index.ts b/apps/api/src/search/index.ts index f365811c..d3b66aad 100644 --- a/apps/api/src/search/index.ts +++ b/apps/api/src/search/index.ts @@ -28,7 +28,7 @@ export async function search({ timeout?: number; }) : Promise { try { - if (process.env.SERPER_API_KEY && !tbs) { + if (process.env.SERPER_API_KEY ) { return await serper_search(query, {num_results, tbs, filter, lang, country, location}); } return await google_search( From 75597f72a197b692b600d1c1f006bc2f3dc37dae Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 25 Apr 2024 08:39:45 -0300 Subject: [PATCH 68/96] [Feat] Added allowed urls FireCrawl should be able to scrape LinkedIn Articles (/pulse/*) --- apps/api/src/scraper/WebScraper/utils/blocklist.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 0eef3320..a50e42ef 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -14,6 +14,14 @@ const socialMediaBlocklist = [ 'telegram.org', ]; +const allowedUrls = [ + 'linkedin.com/pulse' +]; + export function isUrlBlocked(url: string): boolean { + if (allowedUrls.some(allowedUrl => url.includes(allowedUrl))) { + return false; + } + return socialMediaBlocklist.some(domain => url.includes(domain)); } From 9c481e5e83aa95f10295de4d4246950faa212f25 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:05:53 -0300 Subject: [PATCH 69/96] [Feat] Coupon system WIP. Idea for solving #57 --- .../src/services/billing/credit_billing.ts | 208 ++++++++++-------- 1 file changed, 113 insertions(+), 95 deletions(-) diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index bf5be60e..7f6f9b83 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -41,14 +41,30 @@ export async function supaBillTeam(team_id: string, credits: number) { return { success: true, credit_usage }; } - // 2. add the credits to the credits_usage + // 2. Check for available coupons + const { data: coupons } = await supabase_service + .from("coupons") + .select("credits") + .eq("team_id", team_id) + .eq("status", "active"); + + let couponValue = 0; + if (coupons && coupons.length > 0) { + couponValue = coupons[0].credits; // Assuming only one active coupon can be used at a time + console.log(`Applying coupon of ${couponValue} credits`); + } + + // Calculate final credits used after applying coupon + const finalCreditsUsed = Math.max(0, credits - couponValue); + + // 3. Log the credit usage const { data: credit_usage } = await supabase_service .from("credit_usage") .insert([ { team_id, - subscription_id: subscription.id, - credits_used: credits, + subscription_id: subscription ? subscription.id : null, + credits_used: finalCreditsUsed, created_at: new Date(), }, ]) @@ -65,61 +81,32 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { if (team_id === "preview") { return { success: true, message: "Preview team, no credits used" }; } - // 1. Retrieve the team's active subscription based on the team_id. - const { data: subscription, error: subscriptionError } = - await supabase_service - .from("subscriptions") - .select("id, price_id, current_period_start, current_period_end") - .eq("team_id", team_id) - .eq("status", "active") - .single(); - if (subscriptionError || !subscription) { - const { data: creditUsages, error: creditUsageError } = - await supabase_service - .from("credit_usage") - .select("credits_used") - .is("subscription_id", null) - .eq("team_id", team_id); - // .gte("created_at", subscription.current_period_start) - // .lte("created_at", subscription.current_period_end); - - if (creditUsageError) { - throw new Error( - `Failed to retrieve credit usage for subscription_id: ${subscription.id}` - ); - } - - const totalCreditsUsed = creditUsages.reduce( - (acc, usage) => acc + usage.credits_used, - 0 - ); - - console.log("totalCreditsUsed", totalCreditsUsed); - // 5. Compare the total credits used with the credits allowed by the plan. - if (totalCreditsUsed + credits > FREE_CREDITS) { - return { - success: false, - message: "Insufficient credits, please upgrade!", - }; - } - return { success: true, message: "Sufficient credits available" }; - } - - // 2. Get the price_id from the subscription. - const { data: price, error: priceError } = await supabase_service - .from("prices") - .select("credits") - .eq("id", subscription.price_id) + // Retrieve the team's active subscription + const { data: subscription, error: subscriptionError } = await supabase_service + .from("subscriptions") + .select("id, price_id, current_period_start, current_period_end") + .eq("team_id", team_id) + .eq("status", "active") .single(); - if (priceError) { - throw new Error( - `Failed to retrieve price for price_id: ${subscription.price_id}` - ); + if (subscriptionError || !subscription) { + return { success: false, message: "No active subscription found" }; } - // 4. Calculate the total credits used by the team within the current billing period. + // Check for available coupons + const { data: coupons } = await supabase_service + .from("coupons") + .select("credits") + .eq("team_id", team_id) + .eq("status", "active"); + + let couponValue = 0; + if (coupons && coupons.length > 0) { + couponValue = coupons[0].credits; + } + + // Calculate the total credits used by the team within the current billing period const { data: creditUsages, error: creditUsageError } = await supabase_service .from("credit_usage") .select("credits_used") @@ -128,18 +115,27 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { .lte("created_at", subscription.current_period_end); if (creditUsageError) { - throw new Error( - `Failed to retrieve credit usage for subscription_id: ${subscription.id}` - ); + throw new Error(`Failed to retrieve credit usage for subscription_id: ${subscription.id}`); } - const totalCreditsUsed = creditUsages.reduce( - (acc, usage) => acc + usage.credits_used, - 0 - ); + const totalCreditsUsed = creditUsages.reduce((acc, usage) => acc + usage.credits_used, 0); - // 5. Compare the total credits used with the credits allowed by the plan. - if (totalCreditsUsed + credits > price.credits) { + // Adjust total credits used by subtracting coupon value + const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponValue); + + // Get the price details + const { data: price, error: priceError } = await supabase_service + .from("prices") + .select("credits") + .eq("id", subscription.price_id) + .single(); + + if (priceError) { + throw new Error(`Failed to retrieve price for price_id: ${subscription.price_id}`); + } + + // Compare the adjusted total credits used with the credits allowed by the plan + if (adjustedCreditsUsed + credits > price.credits) { return { success: false, message: "Insufficient credits, please upgrade!" }; } @@ -159,7 +155,17 @@ export async function countCreditsAndRemainingForCurrentBillingPeriod( .single(); if (subscriptionError || !subscription) { - // throw new Error(`Failed to retrieve subscription for team_id: ${team_id}`); + // Check for available coupons even if there's no subscription + const { data: coupons } = await supabase_service + .from("coupons") + .select("value") + .eq("team_id", team_id) + .eq("status", "active"); + + let couponValue = 0; + if (coupons && coupons.length > 0) { + couponValue = coupons[0].value; + } // Free const { data: creditUsages, error: creditUsageError } = @@ -168,13 +174,9 @@ export async function countCreditsAndRemainingForCurrentBillingPeriod( .select("credits_used") .is("subscription_id", null) .eq("team_id", team_id); - // .gte("created_at", subscription.current_period_start) - // .lte("created_at", subscription.current_period_end); if (creditUsageError || !creditUsages) { - throw new Error( - `Failed to retrieve credit usage for subscription_id: ${subscription.id}` - ); + throw new Error(`Failed to retrieve credit usage for team_id: ${team_id}`); } const totalCreditsUsed = creditUsages.reduce( @@ -182,46 +184,62 @@ export async function countCreditsAndRemainingForCurrentBillingPeriod( 0 ); + // Adjust total credits used by subtracting coupon value + const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponValue); + // 4. Calculate remaining credits. - const remainingCredits = FREE_CREDITS - totalCreditsUsed; + const remainingCredits = FREE_CREDITS - adjustedCreditsUsed; - return { totalCreditsUsed, remainingCredits, totalCredits: FREE_CREDITS }; + return { totalCreditsUsed: adjustedCreditsUsed, remainingCredits, totalCredits: FREE_CREDITS }; } - // 2. Get the price_id from the subscription to retrieve the total credits available. - const { data: price, error: priceError } = await supabase_service - .from("prices") - .select("credits") - .eq("id", subscription.price_id) - .single(); + // If there is an active subscription + const { data: coupons } = await supabase_service + .from("coupons") + .select("credits") + .eq("team_id", team_id) + .eq("status", "active"); - if (priceError || !price) { - throw new Error( - `Failed to retrieve price for price_id: ${subscription.price_id}` - ); + let couponValue = 0; + if (coupons && coupons.length > 0) { + couponValue = coupons[0].credits; } - // 3. Calculate the total credits used by the team within the current billing period. const { data: creditUsages, error: creditUsageError } = await supabase_service - .from("credit_usage") - .select("credits_used") - .eq("subscription_id", subscription.id) - .gte("created_at", subscription.current_period_start) - .lte("created_at", subscription.current_period_end); + .from("credit_usage") + .select("credits_used") + .eq("subscription_id", subscription.id) + .gte("created_at", subscription.current_period_start) + .lte("created_at", subscription.current_period_end); if (creditUsageError || !creditUsages) { - throw new Error( - `Failed to retrieve credit usage for subscription_id: ${subscription.id}` - ); + throw new Error(`Failed to retrieve credit usage for subscription_id: ${subscription.id}`); } const totalCreditsUsed = creditUsages.reduce( - (acc, usage) => acc + usage.credits_used, - 0 + (acc, usage) => acc + usage.credits_used, + 0 ); - // 4. Calculate remaining credits. - const remainingCredits = price.credits - totalCreditsUsed; + // Adjust total credits used by subtracting coupon value + const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponValue); - return { totalCreditsUsed, remainingCredits, totalCredits: price.credits }; -} + const { data: price, error: priceError } = await supabase_service + .from("prices") + .select("credits") + .eq("id", subscription.price_id) + .single(); + + if (priceError || !price) { + throw new Error(`Failed to retrieve price for price_id: ${subscription.price_id}`); + } + + // Calculate remaining credits. + const remainingCredits = price.credits - adjustedCreditsUsed; + + return { + totalCreditsUsed: adjustedCreditsUsed, + remainingCredits, + totalCredits: price.credits + }; + } From d3ab2ea9260017322c4527545abadfb041f8420c Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:51:01 -0300 Subject: [PATCH 70/96] [Feat] Implemented retry attempts to handle 502 errors --- apps/python-sdk/firecrawl/firecrawl.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index f1f5e6e4..4fc78cf8 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -1,5 +1,6 @@ import os import requests +import time class FirecrawlApp: def __init__(self, api_key=None): @@ -62,11 +63,23 @@ class FirecrawlApp: 'Authorization': f'Bearer {self.api_key}' } - def _post_request(self, url, data, headers): - return requests.post(url, headers=headers, json=data) + def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.post(url, headers=headers, json=data) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response - def _get_request(self, url, headers): - return requests.get(url, headers=headers) + def _get_request(self, url, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.get(url, headers=headers) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response def _monitor_job_status(self, job_id, headers, timeout): import time From a7be09e479d9a6615e074753b455c6d7c14643da Mon Sep 17 00:00:00 2001 From: Mark Percival Date: Thu, 25 Apr 2024 14:16:14 +0000 Subject: [PATCH 71/96] Fix: Remove dotenv from npm module --- apps/js-sdk/firecrawl/package-lock.json | 29 +++---------------------- apps/js-sdk/firecrawl/package.json | 4 +--- apps/js-sdk/firecrawl/src/index.ts | 2 -- 3 files changed, 4 insertions(+), 31 deletions(-) diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 0497c6e1..ae39b204 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,20 +1,18 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.9", + "version": "0.0.13", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.9", + "version": "0.0.13", "license": "MIT", "dependencies": { - "axios": "^1.6.8", - "dotenv": "^16.4.5" + "axios": "^1.6.8" }, "devDependencies": { "@types/axios": "^0.14.0", - "@types/dotenv": "^8.2.0", "@types/node": "^20.12.7", "typescript": "^5.4.5" } @@ -29,16 +27,6 @@ "axios": "*" } }, - "node_modules/@types/dotenv": { - "version": "8.2.0", - "resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz", - "integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==", - "deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.", - "dev": true, - "dependencies": { - "dotenv": "*" - } - }, "node_modules/@types/node": { "version": "20.12.7", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", @@ -82,17 +70,6 @@ "node": ">=0.4.0" } }, - "node_modules/dotenv": { - "version": "16.4.5", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", - "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://dotenvx.com" - } - }, "node_modules/follow-redirects": { "version": "1.15.6", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 566fdde9..5a311d3b 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -17,8 +17,7 @@ "author": "Mendable.ai", "license": "MIT", "dependencies": { - "axios": "^1.6.8", - "dotenv": "^16.4.5" + "axios": "^1.6.8" }, "bugs": { "url": "https://github.com/mendableai/firecrawl/issues" @@ -26,7 +25,6 @@ "homepage": "https://github.com/mendableai/firecrawl#readme", "devDependencies": { "@types/axios": "^0.14.0", - "@types/dotenv": "^8.2.0", "@types/node": "^20.12.7", "typescript": "^5.4.5" }, diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 65456001..76747d9c 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,6 +1,4 @@ import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios'; -import dotenv from 'dotenv'; -dotenv.config(); /** * Configuration interface for FirecrawlApp. From e8b8150b56002eec71b75768ee968151f09af451 Mon Sep 17 00:00:00 2001 From: Mark Percival Date: Thu, 25 Apr 2024 14:21:30 +0000 Subject: [PATCH 72/96] Chore: Add some basic jest tests --- apps/js-sdk/firecrawl/jest.config.cjs | 5 + apps/js-sdk/firecrawl/package-lock.json | 3622 +++++++++++++++++ apps/js-sdk/firecrawl/package.json | 7 +- .../src/__tests__/fixtures/scrape.json | 22 + .../firecrawl/src/__tests__/index.test.ts | 48 + 5 files changed, 3702 insertions(+), 2 deletions(-) create mode 100644 apps/js-sdk/firecrawl/jest.config.cjs create mode 100644 apps/js-sdk/firecrawl/src/__tests__/fixtures/scrape.json create mode 100644 apps/js-sdk/firecrawl/src/__tests__/index.test.ts diff --git a/apps/js-sdk/firecrawl/jest.config.cjs b/apps/js-sdk/firecrawl/jest.config.cjs new file mode 100644 index 00000000..b413e106 --- /dev/null +++ b/apps/js-sdk/firecrawl/jest.config.cjs @@ -0,0 +1,5 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} */ +module.exports = { + preset: 'ts-jest', + testEnvironment: 'node', +}; \ No newline at end of file diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index ae39b204..98115979 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -12,11 +12,954 @@ "axios": "^1.6.8" }, "devDependencies": { + "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", "@types/node": "^20.12.7", + "jest": "^29.7.0", + "ts-jest": "^29.1.2", "typescript": "^5.4.5" } }, + "node_modules/@ampproject/remapping": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", + "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", + "dev": true, + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.24.2", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.24.2.tgz", + "integrity": "sha512-y5+tLQyV8pg3fsiln67BVLD1P13Eg4lh5RW9mF0zUuvLrv9uIQ4MCL+CRT+FTsBlBjcIan6PGsLcBN0m3ClUyQ==", + "dev": true, + "dependencies": { + "@babel/highlight": "^7.24.2", + "picocolors": "^1.0.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.24.4.tgz", + "integrity": "sha512-vg8Gih2MLK+kOkHJp4gBEIkyaIi00jgWot2D9QOmmfLC8jINSOzmCLta6Bvz/JSBCqnegV0L80jhxkol5GWNfQ==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.24.4.tgz", + "integrity": "sha512-MBVlMXP+kkl5394RBLSxxk/iLTeVGuXTV3cIDXavPpMMqnSnt6apKgan/U8O3USWZCWZT/TbgfEpKa4uMgN4Dg==", + "dev": true, + "dependencies": { + "@ampproject/remapping": "^2.2.0", + "@babel/code-frame": "^7.24.2", + "@babel/generator": "^7.24.4", + "@babel/helper-compilation-targets": "^7.23.6", + "@babel/helper-module-transforms": "^7.23.3", + "@babel/helpers": "^7.24.4", + "@babel/parser": "^7.24.4", + "@babel/template": "^7.24.0", + "@babel/traverse": "^7.24.1", + "@babel/types": "^7.24.0", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/generator": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.24.4.tgz", + "integrity": "sha512-Xd6+v6SnjWVx/nus+y0l1sxMOTOMBkyL4+BIdbALyatQnAe/SRVjANeDPSCYaX+i1iJmuGSKf3Z+E+V/va1Hvw==", + "dev": true, + "dependencies": { + "@babel/types": "^7.24.0", + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25", + "jsesc": "^2.5.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.23.6", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.23.6.tgz", + "integrity": "sha512-9JB548GZoQVmzrFgp8o7KxdgkTGm6xs9DW0o/Pim72UDjzr5ObUQ6ZzYPqA+g9OTS2bBQoctLJrky0RDCAWRgQ==", + "dev": true, + "dependencies": { + "@babel/compat-data": "^7.23.5", + "@babel/helper-validator-option": "^7.23.5", + "browserslist": "^4.22.2", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-environment-visitor": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz", + "integrity": "sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-function-name": { + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz", + "integrity": "sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==", + "dev": true, + "dependencies": { + "@babel/template": "^7.22.15", + "@babel/types": "^7.23.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-hoist-variables": { + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz", + "integrity": "sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==", + "dev": true, + "dependencies": { + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.24.3", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.24.3.tgz", + "integrity": "sha512-viKb0F9f2s0BCS22QSF308z/+1YWKV/76mwt61NBzS5izMzDPwdq1pTrzf+Li3npBWX9KdQbkeCt1jSAM7lZqg==", + "dev": true, + "dependencies": { + "@babel/types": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.23.3.tgz", + "integrity": "sha512-7bBs4ED9OmswdfDzpz4MpWgSrV7FXlc3zIagvLFjS5H+Mk7Snr21vQ6QwrsoCGMfNC4e4LQPdoULEt4ykz0SRQ==", + "dev": true, + "dependencies": { + "@babel/helper-environment-visitor": "^7.22.20", + "@babel/helper-module-imports": "^7.22.15", + "@babel/helper-simple-access": "^7.22.5", + "@babel/helper-split-export-declaration": "^7.22.6", + "@babel/helper-validator-identifier": "^7.22.20" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-plugin-utils": { + "version": "7.24.0", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.24.0.tgz", + "integrity": "sha512-9cUznXMG0+FxRuJfvL82QlTqIzhVW9sL0KjMPHhAOOvpQGL8QtdxnBKILjBqxlHyliz0yCa1G903ZXI/FuHy2w==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-simple-access": { + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.22.5.tgz", + "integrity": "sha512-n0H99E/K+Bika3++WNL17POvo4rKWZ7lZEp1Q+fStVbUi8nxPQEBOlTmCOxW/0JsS56SKKQ+ojAe2pHKJHN35w==", + "dev": true, + "dependencies": { + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-split-export-declaration": { + "version": "7.22.6", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz", + "integrity": "sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==", + "dev": true, + "dependencies": { + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.24.1.tgz", + "integrity": "sha512-2ofRCjnnA9y+wk8b9IAREroeUP02KHp431N2mhKniy2yKIDKpbrHv9eXwm8cBeWQYcJmzv5qKCu65P47eCF7CQ==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", + "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.23.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.23.5.tgz", + "integrity": "sha512-85ttAOMLsr53VgXkTbkx8oA6YTfT4q7/HzXSLEYmjcSTJPMPQtvq1BD79Byep5xMUYbGRzEpDsjUf3dyp54IKw==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.24.4.tgz", + "integrity": "sha512-FewdlZbSiwaVGlgT1DPANDuCHaDMiOo+D/IDYRFYjHOuv66xMSJ7fQwwODwRNAPkADIO/z1EoF/l2BCWlWABDw==", + "dev": true, + "dependencies": { + "@babel/template": "^7.24.0", + "@babel/traverse": "^7.24.1", + "@babel/types": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/highlight": { + "version": "7.24.2", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.24.2.tgz", + "integrity": "sha512-Yac1ao4flkTxTteCDZLEvdxg2fZfz1v8M4QpaGypq/WPDqg3ijHYbDfs+LG5hvzSoqaSZ9/Z9lKSP3CjZjv+pA==", + "dev": true, + "dependencies": { + "@babel/helper-validator-identifier": "^7.22.20", + "chalk": "^2.4.2", + "js-tokens": "^4.0.0", + "picocolors": "^1.0.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/highlight/node_modules/ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "dependencies": { + "color-convert": "^1.9.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/highlight/node_modules/chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "dependencies": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/highlight/node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/@babel/highlight/node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "dev": true + }, + "node_modules/@babel/highlight/node_modules/escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "dev": true, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@babel/highlight/node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/highlight/node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/parser": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.24.4.tgz", + "integrity": "sha512-zTvEBcghmeBma9QIGunWevvBAp4/Qu9Bdq+2k0Ot4fVMD6v3dsC9WOcRSKk7tRRyBM/53yKMJko9xOatGQAwSg==", + "dev": true, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/plugin-syntax-async-generators": { + "version": "7.8.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz", + "integrity": "sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-bigint": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz", + "integrity": "sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-class-properties": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz", + "integrity": "sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.12.13" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-import-meta": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz", + "integrity": "sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.10.4" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-json-strings": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz", + "integrity": "sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-jsx": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.24.1.tgz", + "integrity": "sha512-2eCtxZXf+kbkMIsXS4poTvT4Yu5rXiRa+9xGVT56raghjmBTKMpFNc9R4IDiB4emao9eO22Ox7CxuJG7BgExqA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-logical-assignment-operators": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz", + "integrity": "sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.10.4" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-nullish-coalescing-operator": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz", + "integrity": "sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-numeric-separator": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz", + "integrity": "sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.10.4" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-object-rest-spread": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz", + "integrity": "sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-optional-catch-binding": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz", + "integrity": "sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-optional-chaining": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz", + "integrity": "sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-top-level-await": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz", + "integrity": "sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.14.5" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-typescript": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.24.1.tgz", + "integrity": "sha512-Yhnmvy5HZEnHUty6i++gcfH1/l68AHnItFHnaCv6hn9dNh0hQvvQJsxpi4BMBFN5DLeHBuucT/0DgzXif/OyRw==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/template": { + "version": "7.24.0", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.24.0.tgz", + "integrity": "sha512-Bkf2q8lMB0AFpX0NFEqSbx1OkTHf0f+0j82mkw+ZpzBnkk7e9Ql0891vlfgi+kHwOk8tQjiQHpqh4LaSa0fKEA==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.23.5", + "@babel/parser": "^7.24.0", + "@babel/types": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.24.1.tgz", + "integrity": "sha512-xuU6o9m68KeqZbQuDt2TcKSxUw/mrsvavlEqQ1leZ/B+C9tk6E4sRWy97WaXgvq5E+nU3cXMxv3WKOCanVMCmQ==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.24.1", + "@babel/generator": "^7.24.1", + "@babel/helper-environment-visitor": "^7.22.20", + "@babel/helper-function-name": "^7.23.0", + "@babel/helper-hoist-variables": "^7.22.5", + "@babel/helper-split-export-declaration": "^7.22.6", + "@babel/parser": "^7.24.1", + "@babel/types": "^7.24.0", + "debug": "^4.3.1", + "globals": "^11.1.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/types": { + "version": "7.24.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.24.0.tgz", + "integrity": "sha512-+j7a5c253RfKh8iABBhywc8NSfP5LURe7Uh4qpsh6jc+aLJguvmIUBdjSdEMQv2bENrCR5MfRdjGo7vzS/ob7w==", + "dev": true, + "dependencies": { + "@babel/helper-string-parser": "^7.23.4", + "@babel/helper-validator-identifier": "^7.22.20", + "to-fast-properties": "^2.0.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@bcoe/v8-coverage": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz", + "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", + "dev": true + }, + "node_modules/@istanbuljs/load-nyc-config": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", + "integrity": "sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==", + "dev": true, + "dependencies": { + "camelcase": "^5.3.1", + "find-up": "^4.1.0", + "get-package-type": "^0.1.0", + "js-yaml": "^3.13.1", + "resolve-from": "^5.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@istanbuljs/schema": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@istanbuljs/schema/-/schema-0.1.3.tgz", + "integrity": "sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/@jest/console": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/console/-/console-29.7.0.tgz", + "integrity": "sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/core": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/core/-/core-29.7.0.tgz", + "integrity": "sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==", + "dev": true, + "dependencies": { + "@jest/console": "^29.7.0", + "@jest/reporters": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "ansi-escapes": "^4.2.1", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "exit": "^0.1.2", + "graceful-fs": "^4.2.9", + "jest-changed-files": "^29.7.0", + "jest-config": "^29.7.0", + "jest-haste-map": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-resolve-dependencies": "^29.7.0", + "jest-runner": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "jest-watcher": "^29.7.0", + "micromatch": "^4.0.4", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/@jest/environment": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/environment/-/environment-29.7.0.tgz", + "integrity": "sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==", + "dev": true, + "dependencies": { + "@jest/fake-timers": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-mock": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/expect": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/expect/-/expect-29.7.0.tgz", + "integrity": "sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==", + "dev": true, + "dependencies": { + "expect": "^29.7.0", + "jest-snapshot": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/expect-utils": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-29.7.0.tgz", + "integrity": "sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==", + "dev": true, + "dependencies": { + "jest-get-type": "^29.6.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/fake-timers": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/fake-timers/-/fake-timers-29.7.0.tgz", + "integrity": "sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@sinonjs/fake-timers": "^10.0.2", + "@types/node": "*", + "jest-message-util": "^29.7.0", + "jest-mock": "^29.7.0", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/globals": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/globals/-/globals-29.7.0.tgz", + "integrity": "sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/expect": "^29.7.0", + "@jest/types": "^29.6.3", + "jest-mock": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/reporters": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/reporters/-/reporters-29.7.0.tgz", + "integrity": "sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==", + "dev": true, + "dependencies": { + "@bcoe/v8-coverage": "^0.2.3", + "@jest/console": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@jridgewell/trace-mapping": "^0.3.18", + "@types/node": "*", + "chalk": "^4.0.0", + "collect-v8-coverage": "^1.0.0", + "exit": "^0.1.2", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "istanbul-lib-coverage": "^3.0.0", + "istanbul-lib-instrument": "^6.0.0", + "istanbul-lib-report": "^3.0.0", + "istanbul-lib-source-maps": "^4.0.0", + "istanbul-reports": "^3.1.3", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "jest-worker": "^29.7.0", + "slash": "^3.0.0", + "string-length": "^4.0.1", + "strip-ansi": "^6.0.0", + "v8-to-istanbul": "^9.0.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/@jest/schemas": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz", + "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==", + "dev": true, + "dependencies": { + "@sinclair/typebox": "^0.27.8" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/source-map": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/source-map/-/source-map-29.6.3.tgz", + "integrity": "sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==", + "dev": true, + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.18", + "callsites": "^3.0.0", + "graceful-fs": "^4.2.9" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/test-result": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/test-result/-/test-result-29.7.0.tgz", + "integrity": "sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==", + "dev": true, + "dependencies": { + "@jest/console": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/istanbul-lib-coverage": "^2.0.0", + "collect-v8-coverage": "^1.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/test-sequencer": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/test-sequencer/-/test-sequencer-29.7.0.tgz", + "integrity": "sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==", + "dev": true, + "dependencies": { + "@jest/test-result": "^29.7.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/transform": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/transform/-/transform-29.7.0.tgz", + "integrity": "sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==", + "dev": true, + "dependencies": { + "@babel/core": "^7.11.6", + "@jest/types": "^29.6.3", + "@jridgewell/trace-mapping": "^0.3.18", + "babel-plugin-istanbul": "^6.1.1", + "chalk": "^4.0.0", + "convert-source-map": "^2.0.0", + "fast-json-stable-stringify": "^2.1.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-util": "^29.7.0", + "micromatch": "^4.0.4", + "pirates": "^4.0.4", + "slash": "^3.0.0", + "write-file-atomic": "^4.0.2" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/types": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz", + "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==", + "dev": true, + "dependencies": { + "@jest/schemas": "^29.6.3", + "@types/istanbul-lib-coverage": "^2.0.0", + "@types/istanbul-reports": "^3.0.0", + "@types/node": "*", + "@types/yargs": "^17.0.8", + "chalk": "^4.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", + "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", + "dev": true, + "dependencies": { + "@jridgewell/set-array": "^1.2.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/set-array": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", + "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.4.15", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==", + "dev": true + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.25", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", + "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "dev": true, + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@sinclair/typebox": { + "version": "0.27.8", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz", + "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==", + "dev": true + }, + "node_modules/@sinonjs/commons": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.1.tgz", + "integrity": "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==", + "dev": true, + "dependencies": { + "type-detect": "4.0.8" + } + }, + "node_modules/@sinonjs/fake-timers": { + "version": "10.3.0", + "resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-10.3.0.tgz", + "integrity": "sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==", + "dev": true, + "dependencies": { + "@sinonjs/commons": "^3.0.0" + } + }, "node_modules/@types/axios": { "version": "0.14.0", "resolved": "https://registry.npmjs.org/@types/axios/-/axios-0.14.0.tgz", @@ -27,6 +970,80 @@ "axios": "*" } }, + "node_modules/@types/babel__core": { + "version": "7.20.5", + "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", + "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", + "dev": true, + "dependencies": { + "@babel/parser": "^7.20.7", + "@babel/types": "^7.20.7", + "@types/babel__generator": "*", + "@types/babel__template": "*", + "@types/babel__traverse": "*" + } + }, + "node_modules/@types/babel__generator": { + "version": "7.6.8", + "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.6.8.tgz", + "integrity": "sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==", + "dev": true, + "dependencies": { + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__template": { + "version": "7.4.4", + "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", + "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", + "dev": true, + "dependencies": { + "@babel/parser": "^7.1.0", + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__traverse": { + "version": "7.20.5", + "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.20.5.tgz", + "integrity": "sha512-WXCyOcRtH37HAUkpXhUduaxdm82b4GSlyTqajXviN4EfiuPgNYR109xMCKvpl6zPIpua0DGlMEDCq+g8EdoheQ==", + "dev": true, + "dependencies": { + "@babel/types": "^7.20.7" + } + }, + "node_modules/@types/graceful-fs": { + "version": "4.1.9", + "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", + "integrity": "sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==", + "dev": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/@types/istanbul-lib-coverage": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", + "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==", + "dev": true + }, + "node_modules/@types/istanbul-lib-report": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz", + "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==", + "dev": true, + "dependencies": { + "@types/istanbul-lib-coverage": "*" + } + }, + "node_modules/@types/istanbul-reports": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz", + "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==", + "dev": true, + "dependencies": { + "@types/istanbul-lib-report": "*" + } + }, "node_modules/@types/node": { "version": "20.12.7", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", @@ -36,6 +1053,88 @@ "undici-types": "~5.26.4" } }, + "node_modules/@types/stack-utils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz", + "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", + "dev": true + }, + "node_modules/@types/yargs": { + "version": "17.0.32", + "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", + "integrity": "sha512-xQ67Yc/laOG5uMfX/093MRlGGCIBzZMarVa+gfNKJxWAIgykYpVGkBdbqEzGDDfCrVUj6Hiff4mTZ5BA6TmAog==", + "dev": true, + "dependencies": { + "@types/yargs-parser": "*" + } + }, + "node_modules/@types/yargs-parser": { + "version": "21.0.3", + "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", + "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", + "dev": true + }, + "node_modules/ansi-escapes": { + "version": "4.3.2", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", + "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==", + "dev": true, + "dependencies": { + "type-fest": "^0.21.3" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "dev": true, + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dev": true, + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -51,6 +1150,332 @@ "proxy-from-env": "^1.1.0" } }, + "node_modules/babel-jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz", + "integrity": "sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==", + "dev": true, + "dependencies": { + "@jest/transform": "^29.7.0", + "@types/babel__core": "^7.1.14", + "babel-plugin-istanbul": "^6.1.1", + "babel-preset-jest": "^29.6.3", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "@babel/core": "^7.8.0" + } + }, + "node_modules/babel-plugin-istanbul": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-6.1.1.tgz", + "integrity": "sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.0.0", + "@istanbuljs/load-nyc-config": "^1.0.0", + "@istanbuljs/schema": "^0.1.2", + "istanbul-lib-instrument": "^5.0.4", + "test-exclude": "^6.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/babel-plugin-istanbul/node_modules/istanbul-lib-instrument": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-5.2.1.tgz", + "integrity": "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==", + "dev": true, + "dependencies": { + "@babel/core": "^7.12.3", + "@babel/parser": "^7.14.7", + "@istanbuljs/schema": "^0.1.2", + "istanbul-lib-coverage": "^3.2.0", + "semver": "^6.3.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/babel-plugin-jest-hoist": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-29.6.3.tgz", + "integrity": "sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==", + "dev": true, + "dependencies": { + "@babel/template": "^7.3.3", + "@babel/types": "^7.3.3", + "@types/babel__core": "^7.1.14", + "@types/babel__traverse": "^7.0.6" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/babel-preset-current-node-syntax": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.0.1.tgz", + "integrity": "sha512-M7LQ0bxarkxQoN+vz5aJPsLBn77n8QgTFmo8WK0/44auK2xlCXrYcUxHFxgU7qW5Yzw/CjmLRK2uJzaCd7LvqQ==", + "dev": true, + "dependencies": { + "@babel/plugin-syntax-async-generators": "^7.8.4", + "@babel/plugin-syntax-bigint": "^7.8.3", + "@babel/plugin-syntax-class-properties": "^7.8.3", + "@babel/plugin-syntax-import-meta": "^7.8.3", + "@babel/plugin-syntax-json-strings": "^7.8.3", + "@babel/plugin-syntax-logical-assignment-operators": "^7.8.3", + "@babel/plugin-syntax-nullish-coalescing-operator": "^7.8.3", + "@babel/plugin-syntax-numeric-separator": "^7.8.3", + "@babel/plugin-syntax-object-rest-spread": "^7.8.3", + "@babel/plugin-syntax-optional-catch-binding": "^7.8.3", + "@babel/plugin-syntax-optional-chaining": "^7.8.3", + "@babel/plugin-syntax-top-level-await": "^7.8.3" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/babel-preset-jest": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-29.6.3.tgz", + "integrity": "sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==", + "dev": true, + "dependencies": { + "babel-plugin-jest-hoist": "^29.6.3", + "babel-preset-current-node-syntax": "^1.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "dev": true, + "dependencies": { + "fill-range": "^7.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browserslist": { + "version": "4.23.0", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.0.tgz", + "integrity": "sha512-QW8HiM1shhT2GuzkvklfjcKDiWFXHOeFCIA/huJPwHsslwcydgk7X+z2zXpEijP98UCY7HbubZt5J2Zgvf0CaQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "caniuse-lite": "^1.0.30001587", + "electron-to-chromium": "^1.4.668", + "node-releases": "^2.0.14", + "update-browserslist-db": "^1.0.13" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/bs-logger": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/bs-logger/-/bs-logger-0.2.6.tgz", + "integrity": "sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==", + "dev": true, + "dependencies": { + "fast-json-stable-stringify": "2.x" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/bser": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz", + "integrity": "sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==", + "dev": true, + "dependencies": { + "node-int64": "^0.4.0" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "dev": true + }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/camelcase": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", + "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001612", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001612.tgz", + "integrity": "sha512-lFgnZ07UhaCcsSZgWW0K5j4e69dK1u/ltrL9lTUiFOwNHs12S3UMIEYgBV0Z6C6hRDev7iRnMzzYmKabYdXF9g==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ] + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/char-regex": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz", + "integrity": "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/ci-info": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz", + "integrity": "sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/sibiraj-s" + } + ], + "engines": { + "node": ">=8" + } + }, + "node_modules/cjs-module-lexer": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.2.3.tgz", + "integrity": "sha512-0TNiGstbQmCFwt4akjjBg5pLRTSyj/PkWQ1ZoO2zntmg9yLqSRxwEa4iCfQLGjqhiqBfOJa7W/E8wfGrTDmlZQ==", + "dev": true + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dev": true, + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/co": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", + "integrity": "sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==", + "dev": true, + "engines": { + "iojs": ">= 1.0.0", + "node": ">= 0.12.0" + } + }, + "node_modules/collect-v8-coverage": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.2.tgz", + "integrity": "sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==", + "dev": true + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, "node_modules/combined-stream": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", @@ -62,6 +1487,93 @@ "node": ">= 0.8" } }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true + }, + "node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true + }, + "node_modules/create-jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz", + "integrity": "sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "exit": "^0.1.2", + "graceful-fs": "^4.2.9", + "jest-config": "^29.7.0", + "jest-util": "^29.7.0", + "prompts": "^2.0.1" + }, + "bin": { + "create-jest": "bin/create-jest.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "dev": true, + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/dedent": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", + "integrity": "sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==", + "dev": true, + "peerDependencies": { + "babel-plugin-macros": "^3.1.0" + }, + "peerDependenciesMeta": { + "babel-plugin-macros": { + "optional": true + } + } + }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -70,6 +1582,176 @@ "node": ">=0.4.0" } }, + "node_modules/detect-newline": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz", + "integrity": "sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/diff-sequences": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", + "integrity": "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/electron-to-chromium": { + "version": "1.4.748", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz", + "integrity": "sha512-VWqjOlPZn70UZ8FTKUOkUvBLeTQ0xpty66qV0yJcAGY2/CthI4xyW9aEozRVtuwv3Kpf5xTesmJUcPwuJmgP4A==", + "dev": true + }, + "node_modules/emittery": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/emittery/-/emittery-0.13.1.tgz", + "integrity": "sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sindresorhus/emittery?sponsor=1" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "node_modules/error-ex": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", + "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "dev": true, + "dependencies": { + "is-arrayish": "^0.2.1" + } + }, + "node_modules/escalade": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/escape-string-regexp": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz", + "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "dev": true, + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/execa": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", + "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==", + "dev": true, + "dependencies": { + "cross-spawn": "^7.0.3", + "get-stream": "^6.0.0", + "human-signals": "^2.1.0", + "is-stream": "^2.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^4.0.1", + "onetime": "^5.1.2", + "signal-exit": "^3.0.3", + "strip-final-newline": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sindresorhus/execa?sponsor=1" + } + }, + "node_modules/exit": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz", + "integrity": "sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==", + "dev": true, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/expect": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/expect/-/expect-29.7.0.tgz", + "integrity": "sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==", + "dev": true, + "dependencies": { + "@jest/expect-utils": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "node_modules/fb-watchman": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/fb-watchman/-/fb-watchman-2.0.2.tgz", + "integrity": "sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==", + "dev": true, + "dependencies": { + "bser": "2.1.1" + } + }, + "node_modules/fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "dev": true, + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/find-up": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", + "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", + "dev": true, + "dependencies": { + "locate-path": "^5.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/follow-redirects": { "version": "1.15.6", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", @@ -102,6 +1784,1121 @@ "node": ">= 6" } }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "dev": true + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true, + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-package-type": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz", + "integrity": "sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==", + "dev": true, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/get-stream": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", + "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/globals": { + "version": "11.12.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", + "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/html-escaper": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", + "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", + "dev": true + }, + "node_modules/human-signals": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", + "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", + "dev": true, + "engines": { + "node": ">=10.17.0" + } + }, + "node_modules/import-local": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz", + "integrity": "sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==", + "dev": true, + "dependencies": { + "pkg-dir": "^4.2.0", + "resolve-cwd": "^3.0.0" + }, + "bin": { + "import-local-fixture": "fixtures/cli.js" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true, + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "dev": true, + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "node_modules/is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", + "dev": true + }, + "node_modules/is-core-module": { + "version": "2.13.1", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.13.1.tgz", + "integrity": "sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==", + "dev": true, + "dependencies": { + "hasown": "^2.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-generator-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-generator-fn/-/is-generator-fn-2.1.0.tgz", + "integrity": "sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true + }, + "node_modules/istanbul-lib-coverage": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", + "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/istanbul-lib-instrument": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-6.0.2.tgz", + "integrity": "sha512-1WUsZ9R1lA0HtBSohTkm39WTPlNKSJ5iFk7UwqXkBLoHQT+hfqPsfsTDVuZdKGaBwn7din9bS7SsnoAr943hvw==", + "dev": true, + "dependencies": { + "@babel/core": "^7.23.9", + "@babel/parser": "^7.23.9", + "@istanbuljs/schema": "^0.1.3", + "istanbul-lib-coverage": "^3.2.0", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-instrument/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-instrument/node_modules/semver": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", + "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-instrument/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "node_modules/istanbul-lib-report": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz", + "integrity": "sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==", + "dev": true, + "dependencies": { + "istanbul-lib-coverage": "^3.0.0", + "make-dir": "^4.0.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-source-maps": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.1.tgz", + "integrity": "sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==", + "dev": true, + "dependencies": { + "debug": "^4.1.1", + "istanbul-lib-coverage": "^3.0.0", + "source-map": "^0.6.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-reports": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-3.1.7.tgz", + "integrity": "sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==", + "dev": true, + "dependencies": { + "html-escaper": "^2.0.0", + "istanbul-lib-report": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz", + "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", + "dev": true, + "dependencies": { + "@jest/core": "^29.7.0", + "@jest/types": "^29.6.3", + "import-local": "^3.0.2", + "jest-cli": "^29.7.0" + }, + "bin": { + "jest": "bin/jest.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/jest-changed-files": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-changed-files/-/jest-changed-files-29.7.0.tgz", + "integrity": "sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==", + "dev": true, + "dependencies": { + "execa": "^5.0.0", + "jest-util": "^29.7.0", + "p-limit": "^3.1.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-circus": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-circus/-/jest-circus-29.7.0.tgz", + "integrity": "sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/expect": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "co": "^4.6.0", + "dedent": "^1.0.0", + "is-generator-fn": "^2.0.0", + "jest-each": "^29.7.0", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "p-limit": "^3.1.0", + "pretty-format": "^29.7.0", + "pure-rand": "^6.0.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-cli": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-29.7.0.tgz", + "integrity": "sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==", + "dev": true, + "dependencies": { + "@jest/core": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "create-jest": "^29.7.0", + "exit": "^0.1.2", + "import-local": "^3.0.2", + "jest-config": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "yargs": "^17.3.1" + }, + "bin": { + "jest": "bin/jest.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/jest-config": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-29.7.0.tgz", + "integrity": "sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==", + "dev": true, + "dependencies": { + "@babel/core": "^7.11.6", + "@jest/test-sequencer": "^29.7.0", + "@jest/types": "^29.6.3", + "babel-jest": "^29.7.0", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "deepmerge": "^4.2.2", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "jest-circus": "^29.7.0", + "jest-environment-node": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-runner": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "micromatch": "^4.0.4", + "parse-json": "^5.2.0", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "@types/node": "*", + "ts-node": ">=9.0.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "ts-node": { + "optional": true + } + } + }, + "node_modules/jest-diff": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.7.0.tgz", + "integrity": "sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "diff-sequences": "^29.6.3", + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-docblock": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-29.7.0.tgz", + "integrity": "sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==", + "dev": true, + "dependencies": { + "detect-newline": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-each": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-29.7.0.tgz", + "integrity": "sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "jest-get-type": "^29.6.3", + "jest-util": "^29.7.0", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-environment-node": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-29.7.0.tgz", + "integrity": "sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/fake-timers": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-mock": "^29.7.0", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-get-type": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.6.3.tgz", + "integrity": "sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-haste-map": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-haste-map/-/jest-haste-map-29.7.0.tgz", + "integrity": "sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/graceful-fs": "^4.1.3", + "@types/node": "*", + "anymatch": "^3.0.3", + "fb-watchman": "^2.0.0", + "graceful-fs": "^4.2.9", + "jest-regex-util": "^29.6.3", + "jest-util": "^29.7.0", + "jest-worker": "^29.7.0", + "micromatch": "^4.0.4", + "walker": "^1.0.8" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "optionalDependencies": { + "fsevents": "^2.3.2" + } + }, + "node_modules/jest-leak-detector": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-leak-detector/-/jest-leak-detector-29.7.0.tgz", + "integrity": "sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==", + "dev": true, + "dependencies": { + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-matcher-utils": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-29.7.0.tgz", + "integrity": "sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "jest-diff": "^29.7.0", + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-message-util": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.7.0.tgz", + "integrity": "sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.12.13", + "@jest/types": "^29.6.3", + "@types/stack-utils": "^2.0.0", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "micromatch": "^4.0.4", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-mock": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-29.7.0.tgz", + "integrity": "sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-pnp-resolver": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/jest-pnp-resolver/-/jest-pnp-resolver-1.2.3.tgz", + "integrity": "sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==", + "dev": true, + "engines": { + "node": ">=6" + }, + "peerDependencies": { + "jest-resolve": "*" + }, + "peerDependenciesMeta": { + "jest-resolve": { + "optional": true + } + } + }, + "node_modules/jest-regex-util": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz", + "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-resolve": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-resolve/-/jest-resolve-29.7.0.tgz", + "integrity": "sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-pnp-resolver": "^1.2.2", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "resolve": "^1.20.0", + "resolve.exports": "^2.0.0", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-resolve-dependencies": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-resolve-dependencies/-/jest-resolve-dependencies-29.7.0.tgz", + "integrity": "sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==", + "dev": true, + "dependencies": { + "jest-regex-util": "^29.6.3", + "jest-snapshot": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-runner": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-runner/-/jest-runner-29.7.0.tgz", + "integrity": "sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==", + "dev": true, + "dependencies": { + "@jest/console": "^29.7.0", + "@jest/environment": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "emittery": "^0.13.1", + "graceful-fs": "^4.2.9", + "jest-docblock": "^29.7.0", + "jest-environment-node": "^29.7.0", + "jest-haste-map": "^29.7.0", + "jest-leak-detector": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-resolve": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-util": "^29.7.0", + "jest-watcher": "^29.7.0", + "jest-worker": "^29.7.0", + "p-limit": "^3.1.0", + "source-map-support": "0.5.13" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-runtime": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-runtime/-/jest-runtime-29.7.0.tgz", + "integrity": "sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/fake-timers": "^29.7.0", + "@jest/globals": "^29.7.0", + "@jest/source-map": "^29.6.3", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "cjs-module-lexer": "^1.0.0", + "collect-v8-coverage": "^1.0.0", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-mock": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "slash": "^3.0.0", + "strip-bom": "^4.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-snapshot": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-29.7.0.tgz", + "integrity": "sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==", + "dev": true, + "dependencies": { + "@babel/core": "^7.11.6", + "@babel/generator": "^7.7.2", + "@babel/plugin-syntax-jsx": "^7.7.2", + "@babel/plugin-syntax-typescript": "^7.7.2", + "@babel/types": "^7.3.3", + "@jest/expect-utils": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "babel-preset-current-node-syntax": "^1.0.0", + "chalk": "^4.0.0", + "expect": "^29.7.0", + "graceful-fs": "^4.2.9", + "jest-diff": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "natural-compare": "^1.4.0", + "pretty-format": "^29.7.0", + "semver": "^7.5.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-snapshot/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/jest-snapshot/node_modules/semver": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", + "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/jest-snapshot/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "node_modules/jest-util": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-29.7.0.tgz", + "integrity": "sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "graceful-fs": "^4.2.9", + "picomatch": "^2.2.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-validate": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-29.7.0.tgz", + "integrity": "sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "camelcase": "^6.2.0", + "chalk": "^4.0.0", + "jest-get-type": "^29.6.3", + "leven": "^3.1.0", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-validate/node_modules/camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/jest-watcher": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-watcher/-/jest-watcher-29.7.0.tgz", + "integrity": "sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==", + "dev": true, + "dependencies": { + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "ansi-escapes": "^4.2.1", + "chalk": "^4.0.0", + "emittery": "^0.13.1", + "jest-util": "^29.7.0", + "string-length": "^4.0.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-worker": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-29.7.0.tgz", + "integrity": "sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==", + "dev": true, + "dependencies": { + "@types/node": "*", + "jest-util": "^29.7.0", + "merge-stream": "^2.0.0", + "supports-color": "^8.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-worker/node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true + }, + "node_modules/js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "dev": true, + "dependencies": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/jsesc": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-2.5.2.tgz", + "integrity": "sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==", + "dev": true, + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true, + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/kleur": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", + "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/leven": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", + "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/lines-and-columns": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", + "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", + "dev": true + }, + "node_modules/locate-path": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", + "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", + "dev": true, + "dependencies": { + "p-locate": "^4.1.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/lodash.memoize": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/lodash.memoize/-/lodash.memoize-4.1.2.tgz", + "integrity": "sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==", + "dev": true + }, + "node_modules/lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "dependencies": { + "yallist": "^3.0.2" + } + }, + "node_modules/make-dir": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-4.0.0.tgz", + "integrity": "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==", + "dev": true, + "dependencies": { + "semver": "^7.5.3" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-dir/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/make-dir/node_modules/semver": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", + "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/make-dir/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", + "dev": true + }, + "node_modules/makeerror": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz", + "integrity": "sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==", + "dev": true, + "dependencies": { + "tmpl": "1.0.5" + } + }, + "node_modules/merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "dev": true + }, + "node_modules/micromatch": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", + "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", + "dev": true, + "dependencies": { + "braces": "^3.0.2", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -121,11 +2918,678 @@ "node": ">= 0.6" } }, + "node_modules/mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true + }, + "node_modules/node-int64": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", + "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==", + "dev": true + }, + "node_modules/node-releases": { + "version": "2.0.14", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", + "integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==", + "dev": true + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/npm-run-path": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", + "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==", + "dev": true, + "dependencies": { + "path-key": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/onetime": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "dev": true, + "dependencies": { + "mimic-fn": "^2.1.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-locate": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", + "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", + "dev": true, + "dependencies": { + "p-limit": "^2.2.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/p-locate/node_modules/p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "dev": true, + "dependencies": { + "p-try": "^2.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-try": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", + "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/parse-json": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", + "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/path-parse": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", + "dev": true + }, + "node_modules/picocolors": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", + "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==", + "dev": true + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pirates": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.6.tgz", + "integrity": "sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==", + "dev": true, + "engines": { + "node": ">= 6" + } + }, + "node_modules/pkg-dir": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz", + "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==", + "dev": true, + "dependencies": { + "find-up": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/pretty-format": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz", + "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==", + "dev": true, + "dependencies": { + "@jest/schemas": "^29.6.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/pretty-format/node_modules/ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/prompts": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", + "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==", + "dev": true, + "dependencies": { + "kleur": "^3.0.3", + "sisteransi": "^1.0.5" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" }, + "node_modules/pure-rand": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz", + "integrity": "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==", + "dev": true, + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/dubzzz" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fast-check" + } + ] + }, + "node_modules/react-is": { + "version": "18.2.0", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz", + "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==", + "dev": true + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/resolve": { + "version": "1.22.8", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.8.tgz", + "integrity": "sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==", + "dev": true, + "dependencies": { + "is-core-module": "^2.13.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + }, + "bin": { + "resolve": "bin/resolve" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/resolve-cwd": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz", + "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==", + "dev": true, + "dependencies": { + "resolve-from": "^5.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/resolve-from": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", + "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/resolve.exports": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/resolve.exports/-/resolve.exports-2.0.2.tgz", + "integrity": "sha512-X2UW6Nw3n/aMgDVy+0rSqgHlv39WZAlZrXCdnbyEiKm17DSqHX4MmQMaST3FbeWR5FTuRcUwYAziZajji0Y7mg==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "dev": true + }, + "node_modules/sisteransi": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", + "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", + "dev": true + }, + "node_modules/slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz", + "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==", + "dev": true, + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", + "dev": true + }, + "node_modules/stack-utils": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz", + "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==", + "dev": true, + "dependencies": { + "escape-string-regexp": "^2.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/string-length": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz", + "integrity": "sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==", + "dev": true, + "dependencies": { + "char-regex": "^1.0.2", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-bom": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz", + "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-final-newline": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz", + "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/test-exclude": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", + "integrity": "sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==", + "dev": true, + "dependencies": { + "@istanbuljs/schema": "^0.1.2", + "glob": "^7.1.4", + "minimatch": "^3.0.4" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/tmpl": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", + "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==", + "dev": true + }, + "node_modules/to-fast-properties": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", + "integrity": "sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/ts-jest": { + "version": "29.1.2", + "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.1.2.tgz", + "integrity": "sha512-br6GJoH/WUX4pu7FbZXuWGKGNDuU7b8Uj77g/Sp7puZV6EXzuByl6JrECvm0MzVzSTkSHWTihsXt+5XYER5b+g==", + "dev": true, + "dependencies": { + "bs-logger": "0.x", + "fast-json-stable-stringify": "2.x", + "jest-util": "^29.0.0", + "json5": "^2.2.3", + "lodash.memoize": "4.x", + "make-error": "1.x", + "semver": "^7.5.3", + "yargs-parser": "^21.0.1" + }, + "bin": { + "ts-jest": "cli.js" + }, + "engines": { + "node": "^16.10.0 || ^18.0.0 || >=20.0.0" + }, + "peerDependencies": { + "@babel/core": ">=7.0.0-beta.0 <8", + "@jest/types": "^29.0.0", + "babel-jest": "^29.0.0", + "jest": "^29.0.0", + "typescript": ">=4.3 <6" + }, + "peerDependenciesMeta": { + "@babel/core": { + "optional": true + }, + "@jest/types": { + "optional": true + }, + "babel-jest": { + "optional": true + }, + "esbuild": { + "optional": true + } + } + }, + "node_modules/ts-jest/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/ts-jest/node_modules/semver": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", + "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/ts-jest/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "node_modules/type-detect": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", + "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/type-fest": { + "version": "0.21.3", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", + "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/typescript": { "version": "5.4.5", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", @@ -144,6 +3608,164 @@ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", "dev": true + }, + "node_modules/update-browserslist-db": { + "version": "1.0.13", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz", + "integrity": "sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "escalade": "^3.1.1", + "picocolors": "^1.0.0" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/v8-to-istanbul": { + "version": "9.2.0", + "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz", + "integrity": "sha512-/EH/sDgxU2eGxajKdwLCDmQ4FWq+kpi3uCmBGpw1xJtnAxEjlD8j8PEiGWpCIMIs3ciNAgH0d3TTJiUkYzyZjA==", + "dev": true, + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.12", + "@types/istanbul-lib-coverage": "^2.0.1", + "convert-source-map": "^2.0.0" + }, + "engines": { + "node": ">=10.12.0" + } + }, + "node_modules/walker": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.8.tgz", + "integrity": "sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==", + "dev": true, + "dependencies": { + "makeerror": "1.0.12" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true + }, + "node_modules/write-file-atomic": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-4.0.2.tgz", + "integrity": "sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==", + "dev": true, + "dependencies": { + "imurmurhash": "^0.1.4", + "signal-exit": "^3.0.7" + }, + "engines": { + "node": "^12.13.0 || ^14.15.0 || >=16.0.0" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/yallist": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "dev": true + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dev": true, + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "dev": true, + "engines": { + "node": ">=12" + } + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } } } } diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 5a311d3b..f969cbb8 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -7,8 +7,8 @@ "type": "module", "scripts": { "build": "tsc", - "publish":"npm run build && npm publish --access public", - "test": "echo \"Error: no test specified\" && exit 1" + "publish": "npm run build && npm publish --access public", + "test": "jest src/**/*.test.ts" }, "repository": { "type": "git", @@ -24,8 +24,11 @@ }, "homepage": "https://github.com/mendableai/firecrawl#readme", "devDependencies": { + "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", "@types/node": "^20.12.7", + "jest": "^29.7.0", + "ts-jest": "^29.1.2", "typescript": "^5.4.5" }, "keywords": [ diff --git a/apps/js-sdk/firecrawl/src/__tests__/fixtures/scrape.json b/apps/js-sdk/firecrawl/src/__tests__/fixtures/scrape.json new file mode 100644 index 00000000..efa03a8f --- /dev/null +++ b/apps/js-sdk/firecrawl/src/__tests__/fixtures/scrape.json @@ -0,0 +1,22 @@ +{ + "success": true, + "data": { + "content": "\n\n[![Mendable logo](https://mendable.ai/Frame%20566%20(2)Mendable](/)\n\n* Getting started\n* Use Cases\n* [Docs](https://docs.mendable.ai)\n \n* [Pricing](/pricing)\n \n* [Blog](/blog)\n \n\nOpen main menu\n\n[Sign In](/signin)\n[Get Started](/signup)\n\n![](https://mendable.ai/fullbgdsm.png)\n\n[$ npm i @mendable/search](https://docs.mendable.ai)\n\nJust in time answers \nfor Sales and Support\n============================================\n\nTrain a secure AI on your technical resources that answers customer and employee questions so your team doesn't have to\n\nGet Started\n\nTalk to Us\n\nBacked BY\n\n![Y Combinator Logo](https://mendable.ai/yc.svg)Combinator\n\ninvisible\n\nAssistant\n\nHi, how can I help you?\n\nGenerating\n\nLoading...\n\n![Mendable loading placeholder image](https://mendable.ai/heroloading.png)\n\nFrom small startups to Fortune 500\n\nTrusted by top companies\n------------------------\n\n![Snapchat](https://mendable.ai/customers/snapchat2.svg)\n\n![MongoDB](https://mendable.ai/customers/mongo.svg)\n\n![Langchain](https://mendable.ai/customers/coinbase.svg)\n\n![Worldline](https://mendable.ai/customers/world.svg)\n\n![Nylas](https://mendable.ai/customers/nylass.svg)\n\n![Spectrocloud](https://mendable.ai/customers/spectro.svg)\n\n![Merge](https://mendable.ai/customers/merge.svg)\n\n![0x](https://mendable.ai/customers/zeroxx.svg)\n\n![Tecton.ai](https://mendable.ai/customers/tecton.svg)\n\n![Llama Index](https://mendable.ai/customers/llamaindex.png)\n\nDeploy a knowledgable technical AI anywhere\n\nUse Mendable for\n----------------\n\n[Docs & Knowledge Base](/usecases/documentation)\n\n-------------------------------------------------\n\nDecrease tickets & activation times with an AI assistant\n\n[Customer Success Enablement](/usecases/cs-enablement)\n\n-------------------------------------------------------\n\nUse a technical AI copilot to increase retention\n\n[Sales Enablement](/usecases/sales-enablement)\n\n-----------------------------------------------\n\nUse a technical AI copilot to build trust with prospects\n\n[Product Copilot](/usecases/productcopilot)\n\n--------------------------------------------\n\nSpeed up adoption with a technical assistant in your app\n\nSee how companies implement Mendable\n------------------------------------\n\n![](https://mendable.ai/langchain.png)\n\n[Langchain Docs](https://python.langchain.com)\n\n-----------------------------------------------\n\nOne of the most popular frameworks for developing AI applications\n\nhttps://python.langchain.com\n\n![](https://mendable.ai/0xlogo.png)\n\n[0x Docs](https://0x.org/docs)\n\n-------------------------------\n\n0x offers the core building blocks to create the most powerful Web3 apps\n\nhttps://0x.org/docs\n\n![](https://mendable.ai/zenlytics.png)\n\n[Zenlytics](https://docs.zenlytic.com)\n\n---------------------------------------\n\nSelf-serve analytics tool that helps you answer the deeper questions you have about your data\n\nhttps://docs.zenlytic.com\n\n![](https://mendable.ai/LlamaIndex.png)\n\n[Llama Index](http://gpt-index.readthedocs.io)\n\n-----------------------------------------------\n\nA central interface to connect your LLM’s with external data.\n\nhttp://gpt-index.readthedocs.io\n\n![](https://mendable.ai/spectrocloud-white.png)\n\n[Spectrocloud](https://docs.spectrocloud.com/)\n\n-----------------------------------------------\n\nK8s management uniquely built for scale. Manage the lifecycle of any type of cluster.\n\nhttps://docs.spectrocloud.com/\n\n![](https://mendable.ai/codegpt.png)\n\n[Code GPT](https://www.codegpt.co/)\n\n------------------------------------\n\nWith over 450,000 installs, CodeGPT brings AI inside your code editor.\n\nhttps://www.codegpt.co/\n\nAnd many more...\n\nFrom SSO to BYOK\n\nEnterprise-grade security\n-------------------------\n\n#### SOC 2 Type II\n\nMendable is SOC 2 Type II certified. Check out our [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-website)\n for additional information.\n\n#### SSO (SAML, OIDC, OAuth)\n\nSupports SAML 2.0, OpenID Connect, and OAuth 2.0 for single sign-on (SSO) and identity federation.\n\n#### RBAC (Project and chunk level)\n\nRole-based access control to ensure that only the right people have access to the right data.\n\n#### Secure Data Connectors\n\nIntegrate securely to Google Drive, Salesforce, Zendesk and more using OAuth 2.0.\n\n#### BYOK / BYOM\n\nBring your own key or custom model to Mendable to ensure compliance.\n\n#### Rate Limiting\n\nProject and user rate limit protection to prevent abuse and ensure availability.\n\nOver 20+ data connectors\n\nStart by connecting your data\n-----------------------------\n\nMendable offers managed ingestion through a simple online GUI and through our API. You can easily add, modify, or delete different types of sources.\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\nEasily Teach Your Model\n\nCustomize your model\n--------------------\n\nCustomize base model properties\n\nGPT-3.5-Turbo and GPT-4 are supported with a variety of base models coming soon\n\nTraining through answer correction\n\nCorrect the answers generated by the model and it will instantly learn from your feedback\n\nCustom prompt edits\n\nEdit the prompt to prevent hallucinations, maintain voice and format requirements\n\nKeep your data always updated\n\nMendable reingestion process offers CRON jobs and webhooks to keep your data synced and always up to date\n\nSupport Link\n\nHave customers redirected to your customer support link when the bot can't answer their questions\n\nPrivacy-first features\n\nMendable provides custom private, open source LLMs depending on your needs\n\n### Make it perfect for your use case\n\nWe know every uses case is slightly different so the Mendable platform allows you to customize your model to fit your company's needs through multiple features.\n\n* Support for multiple base LLM models (including privacy first models)\n* Training through answer correction\n* Custom prompt edits\n* Model creativity control\n\nTeach Model\n\nContinuous Training\n-------------------\n\nCoach the model by correcting the wrong responses, keeping your chat applications always up to date\n\nMore than just a chatbot\n\nTools and Actions\n-----------------\n\nGive your AI access to tools for augmentation and actions for automation. Integrate with any API\n\n![](https://mendable.ai/tools-purple.svg)![](https://mendable.ai/actions-yellow-svg.svg)\n\n![](https://mendable.ai/tools-pic.png)\n\nReact, Vanilla JS, API\n\nChoose your component\n---------------------\n\nMendable provides a variety of components ranging from search bars, to chat bubbles, to full CLIs built on our API. Customize them or easily build your own\n\n![Mendable component](https://mendable.ai/Frame%20597%20(3)\n\n \n import { MendableSearchBar } from '@mendable/search'\n \n \n\nFrom zero to production in minutes\n\nDeploy anywhere\n---------------\n\nDeploy Mendable internally, externally, or both. Our API allows you to send and query data from anywhere.\n\nView Documentation\n\n![]()\n\nMendables integration on Nylas is a goldmine of data. Now, the product team has a direct source of user feedback, questions, and problems. It's amazing!\n\nSaif Khan \\- Product @ NylasKarl Cardenas \\- Director @ SpectroCloudGuillermo Rauch \\- CEO @ Vercel\n\nAI Chat Infrastructure built for production\n\nEnterprise ready out of the box\n-------------------------------\n\nVerified Sources\n----------------\n\nReduce hallucinations by grounding answers with sources from your documentation\n\nEnterprise Grade Security\n-------------------------\n\nOur platform is built for enterprises in mind. We provide RBAC, bring your own model, and SLAs\n\nReady for the whole team\n------------------------\n\nMendable supports single-sign-on so your entire team can train, manage your custom AI\n\nExplore your dashboard\n\nGet insights from usage\n-----------------------\n\nUsage\n\nNumber of chat messages per month\n\n### Understand all interactions\n\nUnravel your users' queries, track their interactions, customize responses, and monitor your product usage effortlessly.\n\n* \\-Gain key insights into user queries\n* \\-Monitor real-time product-user interactions\n* \\-Fine-tune your model for optimized responses\n* \\-Track and evaluate Mendable usage\n\n### Insights beyond conversations\n\nLearn what your users are asking, how they are asking, and their satisfaction level with the answers. Teach the model based on the answers rating and improve the model's performance.\n\nOur wall of love\n\nDon't take our word for it\n--------------------------\n\nEmpower your users with AI powered search\n\nBuild an AI technical assistant in minutes\n------------------------------------------\n\nTry it out\n\nFrequently asked questions\n--------------------------\n\nIf you have anything else you want to ask,[reach out to us](mailto:hello@mendable.ai)\n.\n\n* * ### Is it free?\n \n We have a free plan that gives you 500 message credits. It is also free for certain Open source projects. Contact us to see if your project is eligible.\n \n * ### Do you train your AI model with my code?\n \n Currently, Mendable does not look at any of your repository's code. However, in the future we may add it. We will always give you the option to opt out of sharing your data.\n \n* * ### How do I remove the Powered by Mendable?\n \n To remove the Powered by Mendable, you need to upgrade to an enterprise or custom plan. Contact us at [garrett@mendable.ai](mailto:garrett@mendable.ai)\n and we can help you out.\n \n * ### How do I get an anon key?\n \n To get your anon key you need to sign up at [mendable.ai](https://mendable.ai)\n and create a project. Then you can find your anon key in the API Keys section of the dashboard. Anon keys are used for client-side while API keys are used for server-side.\n \n* * ### Which model does Mendable use?\n \n Mendable offers gpt-3.5-turbo, gpt-4, claude-2 and more. If you'd like a custom model, contact us and we can help you out.\n \n * ### Is GPT-4 pricing different?\n \n Yes, right now GPT-4 will cost 3 requests per message instead of 1 (gpt-3.5-turbo). That means that instead of 500 messages, you will get around 166 messages if you only use GPT-4.\n \n* * ### Can you correct the AI response?\n \n Yes, Mendable offers a 'teach the model' functionality where you can correct the AI response and it will learn from it.\n \n * ### How can I integrate Mendable with my application?\n \n Probably! Check out the Mendable documentation here [https://docs.mendable.ai](https://docs.mendable.ai)\n to better understand how you can start integrating.\n \n* * ### Is it 100% accurate?\n \n Like Humans, AI will never be 100% accurate. So we can't assure you that every solution will be correct.\n \n * ### How do I cancel my subscription?\n \n Simply log into our platform, go to your account and click on \"Open customer portal\" button. There you will be able to cancel/modify it through Stripe.\n \n* * ### How does Mendable work?\n \n Our application syncs with your documentation and support channels, then uses your docs and previously answered questions to suggest possible answers.\n \n * ### Are you open-source?\n \n Currently not - although we have some open source components and integrations. If you have input here, please message us at.[hello@mendable.ai](mailto:hello@mendable.ai)\n .\n \n* * ### How does Mendable price custom plans?\n \n #### 1\\. Use case\n \n * Mendable differentiates between internal and external use cases.\n * With Mendable, we give you the ability to use our chat bots for a variety of use cases, both for internal efficiency and external communication to your customers.\n \n #### 2\\. Total usage\n \n * For specifically external use cases, you will only pay for the value you're receiving.\n * Mendable will look at the total number of messages sent during a month.\n \n #### 3\\. Custom work\n \n * If there are any special feature requests (custom data connectors, etc.), we are happy to discuss these requirements!\n \n\nWe use tracking cookies to understand how you use the product and help us improve it! \nPlease accept cookies to help us improve.\n\nAccept CookiesDecline Cookies\n\n![Mendable logo](https://mendable.ai/Frame%20566%20(2)[Mendable](#_)\n\n[Instagram](https://instagram.com/sideguide.dev)\n[Twitter](https://twitter.com/mendableai)\n[GitHub](https://github.com/sideguide)\n[Discord](https://discord.com/invite/kJufGDb7AA)\n\n![SOC 2 Type II](https://mendable.ai/soc2type2badge.png)\n\nDocumentation\n\n* [Getting Started](/signup)\n \n\n* [API Docs](https://docs.mendable.ai)\n \n\n* [Integrations](https://docs.mendable.ai/integrations/slack)\n \n\n* [Examples](https://docs.mendable.ai/examples)\n \n\n* [Tools & Actions](https://docs.mendable.ai/tools)\n \n\nUse Cases\n\n* [Sales Enablement](/usecases/sales-enablement)\n \n\n* [Knowledge Base](/usecases/documentation)\n \n\n* [CS Enablement](/usecases/cs-enablement)\n \n\n* [Product Copilot](/usecases/productcopilot)\n \n\nResources\n\n* [Pricing](/pricing)\n \n\n* [Changelog](https://docs.mendable.ai/changelog)\n \n\n* [Security](/security)\n \n\n* [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-footer)\n \n\nCompany\n\n* [Blog](/blog)\n \n\n* [Contact](mailto:garrett@mendable.ai)\n \n\n© 2024 SideGuide - SideGuide Technologies Inc.\n\n[System Status](https://mendable.betteruptime.com)\n\n[Status](https://mendable.betteruptime.com)\n[Privacy Policy](/privacy-policy)\n[Privacy](/privacy-policy)\n[Terms](/terms-of-conditions)", + "markdown": "\n\n[![Mendable logo](/Frame 566 (2).png)Mendable](/)\n\n* Getting started\n* Use Cases\n* [Docs](https://docs.mendable.ai)\n \n* [Pricing](/pricing)\n \n* [Blog](/blog)\n \n\nOpen main menu\n\n[Sign In](/signin)\n[Get Started](/signup)\n\n![](/fullbgdsm.png)\n\n[$ npm i @mendable/search](https://docs.mendable.ai)\n\nJust in time answers \nfor Sales and Support\n============================================\n\nTrain a secure AI on your technical resources that answers customer and employee questions so your team doesn't have to\n\nGet Started\n\nTalk to Us\n\nBacked BY\n\n![Y Combinator Logo](/yc.svg)Combinator\n\ninvisible\n\nAssistant\n\nHi, how can I help you?\n\nGenerating\n\nLoading...\n\n![Mendable loading placeholder image](/heroloading.png)\n\nFrom small startups to Fortune 500\n\nTrusted by top companies\n------------------------\n\n![Snapchat](/customers/snapchat2.svg)\n\n![MongoDB](/customers/mongo.svg)\n\n![Langchain](/customers/coinbase.svg)\n\n![Worldline](/customers/world.svg)\n\n![Nylas](/customers/nylass.svg)\n\n![Spectrocloud](/customers/spectro.svg)\n\n![Merge](/customers/merge.svg)\n\n![0x](/customers/zeroxx.svg)\n\n![Tecton.ai](/customers/tecton.svg)\n\n![Llama Index](/customers/llamaindex.png)\n\nDeploy a knowledgable technical AI anywhere\n\nUse Mendable for\n----------------\n\n[Docs & Knowledge Base](/usecases/documentation)\n\n-------------------------------------------------\n\nDecrease tickets & activation times with an AI assistant\n\n[Customer Success Enablement](/usecases/cs-enablement)\n\n-------------------------------------------------------\n\nUse a technical AI copilot to increase retention\n\n[Sales Enablement](/usecases/sales-enablement)\n\n-----------------------------------------------\n\nUse a technical AI copilot to build trust with prospects\n\n[Product Copilot](/usecases/productcopilot)\n\n--------------------------------------------\n\nSpeed up adoption with a technical assistant in your app\n\nSee how companies implement Mendable\n------------------------------------\n\n![](/langchain.png)\n\n[Langchain Docs](https://python.langchain.com)\n\n-----------------------------------------------\n\nOne of the most popular frameworks for developing AI applications\n\nhttps://python.langchain.com\n\n![](/0xlogo.png)\n\n[0x Docs](https://0x.org/docs)\n\n-------------------------------\n\n0x offers the core building blocks to create the most powerful Web3 apps\n\nhttps://0x.org/docs\n\n![](/zenlytics.png)\n\n[Zenlytics](https://docs.zenlytic.com)\n\n---------------------------------------\n\nSelf-serve analytics tool that helps you answer the deeper questions you have about your data\n\nhttps://docs.zenlytic.com\n\n![](/LlamaIndex.png)\n\n[Llama Index](http://gpt-index.readthedocs.io)\n\n-----------------------------------------------\n\nA central interface to connect your LLM’s with external data.\n\nhttp://gpt-index.readthedocs.io\n\n![](/spectrocloud-white.png)\n\n[Spectrocloud](https://docs.spectrocloud.com/)\n\n-----------------------------------------------\n\nK8s management uniquely built for scale. Manage the lifecycle of any type of cluster.\n\nhttps://docs.spectrocloud.com/\n\n![](/codegpt.png)\n\n[Code GPT](https://www.codegpt.co/)\n\n------------------------------------\n\nWith over 450,000 installs, CodeGPT brings AI inside your code editor.\n\nhttps://www.codegpt.co/\n\nAnd many more...\n\nFrom SSO to BYOK\n\nEnterprise-grade security\n-------------------------\n\n#### SOC 2 Type II\n\nMendable is SOC 2 Type II certified. Check out our [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-website)\n for additional information.\n\n#### SSO (SAML, OIDC, OAuth)\n\nSupports SAML 2.0, OpenID Connect, and OAuth 2.0 for single sign-on (SSO) and identity federation.\n\n#### RBAC (Project and chunk level)\n\nRole-based access control to ensure that only the right people have access to the right data.\n\n#### Secure Data Connectors\n\nIntegrate securely to Google Drive, Salesforce, Zendesk and more using OAuth 2.0.\n\n#### BYOK / BYOM\n\nBring your own key or custom model to Mendable to ensure compliance.\n\n#### Rate Limiting\n\nProject and user rate limit protection to prevent abuse and ensure availability.\n\nOver 20+ data connectors\n\nStart by connecting your data\n-----------------------------\n\nMendable offers managed ingestion through a simple online GUI and through our API. You can easily add, modify, or delete different types of sources.\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\n![]()\n\nEasily Teach Your Model\n\nCustomize your model\n--------------------\n\nCustomize base model properties\n\nGPT-3.5-Turbo and GPT-4 are supported with a variety of base models coming soon\n\nTraining through answer correction\n\nCorrect the answers generated by the model and it will instantly learn from your feedback\n\nCustom prompt edits\n\nEdit the prompt to prevent hallucinations, maintain voice and format requirements\n\nKeep your data always updated\n\nMendable reingestion process offers CRON jobs and webhooks to keep your data synced and always up to date\n\nSupport Link\n\nHave customers redirected to your customer support link when the bot can't answer their questions\n\nPrivacy-first features\n\nMendable provides custom private, open source LLMs depending on your needs\n\n### Make it perfect for your use case\n\nWe know every uses case is slightly different so the Mendable platform allows you to customize your model to fit your company's needs through multiple features.\n\n* Support for multiple base LLM models (including privacy first models)\n* Training through answer correction\n* Custom prompt edits\n* Model creativity control\n\nTeach Model\n\nContinuous Training\n-------------------\n\nCoach the model by correcting the wrong responses, keeping your chat applications always up to date\n\nMore than just a chatbot\n\nTools and Actions\n-----------------\n\nGive your AI access to tools for augmentation and actions for automation. Integrate with any API\n\n![](/tools-purple.svg)![](/actions-yellow-svg.svg)\n\n![](/tools-pic.png)\n\nReact, Vanilla JS, API\n\nChoose your component\n---------------------\n\nMendable provides a variety of components ranging from search bars, to chat bubbles, to full CLIs built on our API. Customize them or easily build your own\n\n![Mendable component](/Frame 597 (3).png)\n\n \n import { MendableSearchBar } from '@mendable/search'\n \n \n\nFrom zero to production in minutes\n\nDeploy anywhere\n---------------\n\nDeploy Mendable internally, externally, or both. Our API allows you to send and query data from anywhere.\n\nView Documentation\n\n![]()\n\nMendables integration on Nylas is a goldmine of data. Now, the product team has a direct source of user feedback, questions, and problems. It's amazing!\n\nSaif Khan \\- Product @ NylasKarl Cardenas \\- Director @ SpectroCloudGuillermo Rauch \\- CEO @ Vercel\n\nAI Chat Infrastructure built for production\n\nEnterprise ready out of the box\n-------------------------------\n\nVerified Sources\n----------------\n\nReduce hallucinations by grounding answers with sources from your documentation\n\nEnterprise Grade Security\n-------------------------\n\nOur platform is built for enterprises in mind. We provide RBAC, bring your own model, and SLAs\n\nReady for the whole team\n------------------------\n\nMendable supports single-sign-on so your entire team can train, manage your custom AI\n\nExplore your dashboard\n\nGet insights from usage\n-----------------------\n\nUsage\n\nNumber of chat messages per month\n\n### Understand all interactions\n\nUnravel your users' queries, track their interactions, customize responses, and monitor your product usage effortlessly.\n\n* \\-Gain key insights into user queries\n* \\-Monitor real-time product-user interactions\n* \\-Fine-tune your model for optimized responses\n* \\-Track and evaluate Mendable usage\n\n### Insights beyond conversations\n\nLearn what your users are asking, how they are asking, and their satisfaction level with the answers. Teach the model based on the answers rating and improve the model's performance.\n\nOur wall of love\n\nDon't take our word for it\n--------------------------\n\nEmpower your users with AI powered search\n\nBuild an AI technical assistant in minutes\n------------------------------------------\n\nTry it out\n\nFrequently asked questions\n--------------------------\n\nIf you have anything else you want to ask,[reach out to us](mailto:hello@mendable.ai)\n.\n\n* * ### Is it free?\n \n We have a free plan that gives you 500 message credits. It is also free for certain Open source projects. Contact us to see if your project is eligible.\n \n * ### Do you train your AI model with my code?\n \n Currently, Mendable does not look at any of your repository's code. However, in the future we may add it. We will always give you the option to opt out of sharing your data.\n \n* * ### How do I remove the Powered by Mendable?\n \n To remove the Powered by Mendable, you need to upgrade to an enterprise or custom plan. Contact us at [garrett@mendable.ai](mailto:garrett@mendable.ai)\n and we can help you out.\n \n * ### How do I get an anon key?\n \n To get your anon key you need to sign up at [mendable.ai](https://mendable.ai)\n and create a project. Then you can find your anon key in the API Keys section of the dashboard. Anon keys are used for client-side while API keys are used for server-side.\n \n* * ### Which model does Mendable use?\n \n Mendable offers gpt-3.5-turbo, gpt-4, claude-2 and more. If you'd like a custom model, contact us and we can help you out.\n \n * ### Is GPT-4 pricing different?\n \n Yes, right now GPT-4 will cost 3 requests per message instead of 1 (gpt-3.5-turbo). That means that instead of 500 messages, you will get around 166 messages if you only use GPT-4.\n \n* * ### Can you correct the AI response?\n \n Yes, Mendable offers a 'teach the model' functionality where you can correct the AI response and it will learn from it.\n \n * ### How can I integrate Mendable with my application?\n \n Probably! Check out the Mendable documentation here [https://docs.mendable.ai](https://docs.mendable.ai)\n to better understand how you can start integrating.\n \n* * ### Is it 100% accurate?\n \n Like Humans, AI will never be 100% accurate. So we can't assure you that every solution will be correct.\n \n * ### How do I cancel my subscription?\n \n Simply log into our platform, go to your account and click on \"Open customer portal\" button. There you will be able to cancel/modify it through Stripe.\n \n* * ### How does Mendable work?\n \n Our application syncs with your documentation and support channels, then uses your docs and previously answered questions to suggest possible answers.\n \n * ### Are you open-source?\n \n Currently not - although we have some open source components and integrations. If you have input here, please message us at.[hello@mendable.ai](mailto:hello@mendable.ai)\n .\n \n* * ### How does Mendable price custom plans?\n \n #### 1\\. Use case\n \n * Mendable differentiates between internal and external use cases.\n * With Mendable, we give you the ability to use our chat bots for a variety of use cases, both for internal efficiency and external communication to your customers.\n \n #### 2\\. Total usage\n \n * For specifically external use cases, you will only pay for the value you're receiving.\n * Mendable will look at the total number of messages sent during a month.\n \n #### 3\\. Custom work\n \n * If there are any special feature requests (custom data connectors, etc.), we are happy to discuss these requirements!\n \n\nWe use tracking cookies to understand how you use the product and help us improve it! \nPlease accept cookies to help us improve.\n\nAccept CookiesDecline Cookies\n\n![Mendable logo](/Frame 566 (2).png)[Mendable](#_)\n\n[Instagram](https://instagram.com/sideguide.dev)\n[Twitter](https://twitter.com/mendableai)\n[GitHub](https://github.com/sideguide)\n[Discord](https://discord.com/invite/kJufGDb7AA)\n\n![SOC 2 Type II](/soc2type2badge.png)\n\nDocumentation\n\n* [Getting Started](/signup)\n \n\n* [API Docs](https://docs.mendable.ai)\n \n\n* [Integrations](https://docs.mendable.ai/integrations/slack)\n \n\n* [Examples](https://docs.mendable.ai/examples)\n \n\n* [Tools & Actions](https://docs.mendable.ai/tools)\n \n\nUse Cases\n\n* [Sales Enablement](/usecases/sales-enablement)\n \n\n* [Knowledge Base](/usecases/documentation)\n \n\n* [CS Enablement](/usecases/cs-enablement)\n \n\n* [Product Copilot](/usecases/productcopilot)\n \n\nResources\n\n* [Pricing](/pricing)\n \n\n* [Changelog](https://docs.mendable.ai/changelog)\n \n\n* [Security](/security)\n \n\n* [AI Trust Center](https://mendable.wolfia.com/?ref=mendable-footer)\n \n\nCompany\n\n* [Blog](/blog)\n \n\n* [Contact](mailto:garrett@mendable.ai)\n \n\n© 2024 SideGuide - SideGuide Technologies Inc.\n\n[System Status](https://mendable.betteruptime.com)\n\n[Status](https://mendable.betteruptime.com)\n[Privacy Policy](/privacy-policy)\n[Privacy](/privacy-policy)\n[Terms](/terms-of-conditions)", + "metadata": { + "title": "Mendable", + "description": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide", + "robots": "follow, index", + "ogTitle": "Mendable", + "ogDescription": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide", + "ogUrl": "https://mendable.ai/", + "ogImage": "https://mendable.ai/mendable_new_og1.png", + "ogLocaleAlternate": [], + "ogSiteName": "Mendable", + "sourceURL": "https://mendable.ai", + "sitemap": { + "changefreq": "hourly" + } + } + } +} diff --git a/apps/js-sdk/firecrawl/src/__tests__/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts new file mode 100644 index 00000000..8c5ed5a1 --- /dev/null +++ b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts @@ -0,0 +1,48 @@ +import { describe, test, expect, jest } from '@jest/globals'; +import axios from 'axios'; +import FirecrawlApp from '../index'; + +import { readFile } from 'fs/promises'; +import { join } from 'path'; + +// Mock jest and set the type +jest.mock('axios'); +const mockedAxios = axios as jest.Mocked; + +// Get the fixure data from the JSON file in ./fixtures +async function loadFixture(name: string): Promise { + return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8') +} + +describe('the firecrawl JS SDK', () => { + + test('Should require an API key to instantiate FirecrawlApp', async () => { + const fn = () => { + new FirecrawlApp({ apiKey: undefined }); + }; + expect(fn).toThrow('No API key provided'); + }); + + test('Should return scraped data from a /scrape API call', async () => { + const mockData = await loadFixture('scrape'); + mockedAxios.post.mockResolvedValue({ + status: 200, + data: JSON.parse(mockData), + }); + + const apiKey = 'YOUR_API_KEY' + const app = new FirecrawlApp({ apiKey }); + // Scrape a single URL + const url = 'https://mendable.ai'; + const scrapedData = await app.scrapeUrl(url); + + expect(mockedAxios.post).toHaveBeenCalledTimes(1); + expect(mockedAxios.post).toHaveBeenCalledWith( + expect.stringMatching(/^https:\/\/api.firecrawl.dev/), + expect.objectContaining({ url }), + expect.objectContaining({ headers: expect.objectContaining({'Authorization': `Bearer ${apiKey}`}) }), + ) + expect(scrapedData.success).toBe(true); + expect(scrapedData.data.metadata.title).toEqual('Mendable'); + }); +}) \ No newline at end of file From 03d1c64ac808412a7b622da1d68de23e362d9886 Mon Sep 17 00:00:00 2001 From: Rafael Miller <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 25 Apr 2024 13:33:06 -0300 Subject: [PATCH 73/96] Removed process.env call for API_KEY --- apps/js-sdk/firecrawl/src/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 76747d9c..12bb49f7 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -55,7 +55,7 @@ export default class FirecrawlApp { * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null }: FirecrawlAppConfig) { - this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || ''; + this.apiKey = apiKey || ''; if (!this.apiKey) { throw new Error('No API key provided'); } @@ -224,4 +224,4 @@ export default class FirecrawlApp { throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); } } -} \ No newline at end of file +} From a3911bfc67a1e56b53b416b0ae479e1c77f72991 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 25 Apr 2024 10:00:35 -0700 Subject: [PATCH 74/96] Update index.ts --- apps/api/src/search/index.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/api/src/search/index.ts b/apps/api/src/search/index.ts index d3b66aad..88cbf812 100644 --- a/apps/api/src/search/index.ts +++ b/apps/api/src/search/index.ts @@ -2,6 +2,9 @@ import { SearchResult } from "../../src/lib/entities"; import { google_search } from "./googlesearch"; import { serper_search } from "./serper"; + + + export async function search({ query, advanced = false, From f2af7408e802cf892b624e3474538baba514330c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 25 Apr 2024 10:31:28 -0700 Subject: [PATCH 75/96] Update main.py --- apps/playwright-service/main.py | 41 +++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/apps/playwright-service/main.py b/apps/playwright-service/main.py index b4b83de9..7a6e620c 100644 --- a/apps/playwright-service/main.py +++ b/apps/playwright-service/main.py @@ -1,29 +1,36 @@ -from fastapi import FastAPI, Response -from playwright.async_api import async_playwright -import os +from fastapi import FastAPI +from playwright.async_api import async_playwright, Browser from fastapi.responses import JSONResponse from pydantic import BaseModel + app = FastAPI() -from pydantic import BaseModel class UrlModel(BaseModel): url: str -@app.post("/html") # Kept as POST to accept body parameters -async def root(body: UrlModel): # Using Pydantic model for request body - async with async_playwright() as p: - browser = await p.chromium.launch() - context = await browser.new_context() - page = await context.new_page() +browser: Browser = None - await page.goto(body.url) # Adjusted to use the url from the request body model - page_content = await page.content() # Get the HTML content of the page - await context.close() - await browser.close() +@app.on_event("startup") +async def startup_event(): + global browser + playwright = await async_playwright().start() + browser = await playwright.chromium.launch() - json_compatible_item_data = {"content": page_content} - return JSONResponse(content=json_compatible_item_data) - + +@app.on_event("shutdown") +async def shutdown_event(): + await browser.close() + + +@app.post("/html") +async def root(body: UrlModel): + context = await browser.new_context() + page = await context.new_page() + await page.goto(body.url) + page_content = await page.content() + await context.close() + json_compatible_item_data = {"content": page_content} + return JSONResponse(content=json_compatible_item_data) From 6ea818fac8dace52e60c8bef6f3bcc218618a39d Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 25 Apr 2024 14:49:12 -0300 Subject: [PATCH 76/96] Update version --- apps/js-sdk/firecrawl/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index f969cbb8..a493dabb 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.13", + "version": "0.0.14", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", From a32e16a9bebab9c89aacdb1aa4bce5dfa0976f12 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 25 Apr 2024 11:20:35 -0700 Subject: [PATCH 77/96] Nick: added /search to the python sdk --- apps/python-sdk/README.md | 9 ++++++ .../build/lib/firecrawl/firecrawl.py | 26 ++++++++++++++++++ .../python-sdk/dist/firecrawl-py-0.0.5.tar.gz | Bin 3400 -> 0 bytes .../python-sdk/dist/firecrawl-py-0.0.6.tar.gz | Bin 0 -> 3476 bytes .../dist/firecrawl_py-0.0.5-py3-none-any.whl | Bin 2523 -> 0 bytes .../dist/firecrawl_py-0.0.6-py3-none-any.whl | Bin 0 -> 2573 bytes .../__pycache__/__init__.cpython-311.pyc | Bin 241 -> 254 bytes .../__pycache__/firecrawl.cpython-311.pyc | Bin 5892 -> 6997 bytes apps/python-sdk/firecrawl/firecrawl.py | 26 ++++++++++++++++++ .../python-sdk/firecrawl_py.egg-info/PKG-INFO | 4 +-- apps/python-sdk/setup.py | 4 +-- 11 files changed, 65 insertions(+), 4 deletions(-) delete mode 100644 apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz create mode 100644 apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz delete mode 100644 apps/python-sdk/dist/firecrawl_py-0.0.5-py3-none-any.whl create mode 100644 apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 0a802028..02ad3079 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -47,6 +47,15 @@ url = 'https://example.com' scraped_data = app.scrape_url(url) ``` +### Search for a query + +Used to search the web, get the most relevant results, scrap each page and return the markdown. + +```python +query = 'what is mendable?' +search_result = app.search(query) +``` + ### Crawling a Website To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format. diff --git a/apps/python-sdk/build/lib/firecrawl/firecrawl.py b/apps/python-sdk/build/lib/firecrawl/firecrawl.py index f1f5e6e4..ef3eb532 100644 --- a/apps/python-sdk/build/lib/firecrawl/firecrawl.py +++ b/apps/python-sdk/build/lib/firecrawl/firecrawl.py @@ -32,6 +32,32 @@ class FirecrawlApp: raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') else: raise Exception(f'Failed to scrape URL. Status code: {response.status_code}') + + def search(self, query, params=None): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}' + } + json_data = {'query': query} + if params: + json_data.update(params) + response = requests.post( + 'https://api.firecrawl.dev/v0/search', + headers=headers, + json=json_data + ) + if response.status_code == 200: + response = response.json() + if response['success'] == True: + return response['data'] + else: + raise Exception(f'Failed to search. Error: {response["error"]}') + + elif response.status_code in [402, 409, 500]: + error_message = response.json().get('error', 'Unknown error occurred') + raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}') + else: + raise Exception(f'Failed to search. Status code: {response.status_code}') def crawl_url(self, url, params=None, wait_until_done=True, timeout=2): headers = self._prepare_headers() diff --git a/apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz b/apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz deleted file mode 100644 index fab06b75d357414ee3585b4217a022726facd340..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3400 zcmV-O4Y%?iiwFpWmKbIN|7K}&Wn*$-cWfYK4Z)SJ-l4K=LT=g370Ewx6 z&U=QO73FlC&X4ctr&o*yEZzH4z*TVDU)Sv)R@d0xb$i|39{IGl1IR^2Q`mm*VM6aQ zS!8^{M%|-s@A2cK!`@@3*LU5=y@9o7;Jb$@Pnn<6tMDLM9=ML{3_5>`{T~em>hDnf zb$d;l?;Z}_VYl1wxzG+D`hz_(*rokP+>b*le7c}1-7pS@O{d?V{GI;`|KDHt|6Sw% zcjEt#Hu(SElP5<<#{EA~|NrjipAJsmzWn80?SFW9c&q>KyGJYje{j@?{LkH`{XbCu z|Au8Wpcy^*m8F8m(J|>cUF$7fuwzo!4Xf;zN@AVn3t-gpn7mtN^Ee`>&wnPJ@uyl@u5 zj^h7t&>iUb55tDw3%bK@|H#DuUBIJS#I8cQWqp{ljXi`5LP^LcDMWvg&1ptx7{*tE zEaRMHF=15ja7hGAOco;QGAtxD0()f}G46zb?k%Veb9%VVt_$9+DmQJb9 z)(w|Wja zWs0T6bSiK$7G3f*jv!si$R$lV=1e4l3@;$#mnZLEJbVB2x7Qxb@qT{spD_V0B~%E) zGdUkVq%aoPhSJ5=$0hwwlVnVMuo?kevtF^3*=om6R7xVAl6icE^N=Yx7dWLDlSs~4 zlE9J>0s@TFjMO&(27xp{mNevluqOYZTS*akpB7YG6k11(K`oka54uJ+4)pXVdmq<6 z)rtz*6U_kUkMCbwc^bkBcERXcqG?-7u>()4?TEl;j%xB?Rtx^gRi8<6Xrc9%#KhaS zt{NekEnv{KMF4Es%Fvq}4KKWq|nh&jK_9TpFoqyWr?W&(gRsVULJR*>3 zgF&!Gk}2~-eZ=HkRC7YU;2BKlf?rlk-B>pox0H!I%rwmZI%!O0GGL zhDRln(W=Bc^V_Aaj4Qs5325Lv4y46r^a9KsN5Jq^9Yy0$S(c}f8luIeyo?+ha37fo z?#rob;)-mv3gdms7s14b=4{-q z?{US^NLb2a&>Ns@N&GqUFAB+Ixj+Uqcb2rE|Abgspb!v^pyE46=ZkqaaAZtc5dH-7 z<0ugOI0+&k>=HCkdr3pF2{%O(cuw%=oMsF#ECU-NbMjoVzX(qY8s%R~hbUMzmV-T| zs%m(tt#u>X=;j)x^qso7Hd)c)5vmeMONn?i&>jK=x~}ga@eK;N?id;ZuSBI0jw7Fy zQm<>CRHLF}N;%|DmAS=X@J) z1t_@GD;9#Hv40@0=whZE$fD5_^h*GKsA+RFN0HCK0;~7}nI0tWOa?5lD6m;jlQ>}! z(j({MRFO{>n1_-imzpn022GbZ{lv3Z`2<>%7(!|+<2g|Cn)@sgZ2K(??sOm^8T6S} zEQvt?zB|xC=IqVMhhniR>Bfm_o)f3DPN@1t2L|su=K0@O*8j};kH75s-{Xz-zpg0& zKG^4fWmD%K?f-7)KZZkm{%_8InEH?L|2wV!^oM;@|1thw`+v{l5!|bKPO|(H>p!jw zeY*bB9}c0t=N|R@rv9@N`Qrn4XZsU)VF+_;#Q$A@j`y*!^p$?dDlAJD_8XbNu`HDYY)XpEuq9YHZIhzD z{x*)-{&87n!A?i_$*1e;3ZG^RXT~xXUAFACBMA0>xd5Lw*WiNS7S#?2@>w_em4-Q! z59V9;+qgUep^t|IdeM@ATK!g$P_OK)kTI9Bu_{-UWh8u5&Z6w#!!lv^F|lcqgxr^h zMmoQXII`Q#@#+vO|3i*|2~+#ujHWCl*L5my>~hvkd4Qq2xpX^sLHga_Djk`7Rb80I`Z)G{cA_nk&elAxy28%I`hN%jon1u zs`k5$F+3t?hi#80g_vjq+`Y zdMT;>FEM4c_O(XtSd(zGMJ*K`sut`?b#4{pf@R6$JXM*xp?K?+XQhF<{`@s+wI+Sw zbSeDl7f8ROFzH-t!tHNVffcX1>ZtF^PIdf1u~Thb=ja+^aIqR63+e7Wtk*ECTVu`P zG_7q7>VA<^0HC2xz!_OtRZS#=O@l4m4})rrh2S{;+I*Lfk+oz2|9e8)?QRcJLKkH~NJV~N$NzhX6g zB+)MpR?!*dk@e;5UQd21UQbmT1bZWY^)as^4}uh^&e0##hxMFu^VS=@@rqu5-YGX{ zOaD+!rt)-d-7ow-65eZt_{}cU-H#Fb1d4@5)x=vn&MaNZ6P$O%4zMr}#?15o-97&| z=fD2C=l`3}e?9JY&H1l~{`}udmJXZE4tO+;JKt#kcRK&o>$+X@{vT8SHU59+^eC>SUj? zdyoIa-fH|ma!vf-36w8;vINWW#DVa;@%+EF`Bb2_Jz2Cqt4p_x2pE=eSmZCgkVh9{ z%TTQMLC3ZsS)6zwyJX?lXs?O?CjOiFZ{UH)|4knGp5njjc30y6z#W?Jf9(W}shjw3 z;=hUiCjOiFzcv2XZ?o?C{@?Ac#{d4IJ23Ho7f^oP%b0?R|0e#M_;2FBiU0Tf4E6Kz zfA#r)e{krU_`g&68=E)r-<SI&La;xb-KYI;fB)NX_5GiR zgQ1E4yMQxD@H|ZL&aI5jyh+YO{C>XtfUgsPYu7TKz{G!Z|Nk)V|GNWo|8L@d@{O;2 zy;J-j^as`Z--bsR|M5tmiT}HhKfeD}N<53>P!wYwA ez<>b*1`HT5V8DO@0|pH29R3H^#4)4*pa1}zUh|~@ diff --git a/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz b/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c1b4206e6db72385a8c4bb6b0d84642b749911bb GIT binary patch literal 3476 zcmV;F4QuiriwFq8oGNAl|7K}&Wn*$-cWf-HXy4-XDda*@##mVa ze`TrQadb+0PS<)#SL~G3b;By#rIJ{0@)ZbbeM(-fvt=BSH$VPN=5b1%7GubxS7+9< zc*Twrx`4@+S(b>?PG`ZhWj=HKc-2|4D4?^DQQj$+vL59yUwTSj=thTfU3bh@l!vDT zcIy2Pt6H~obT{~afby`0*cFsp*4ri9*+RG=l!Sbi0{W9|Ni#yjFuoFG9p@yA z38R9CYa(D^!U&B5qNccFGr=<^+OjUrvn0=8JR=x=5FWv-5I?M7z7(KnErW?%FmeU) z0tsof$RUg%t2khxAg$-mUx*`zoMj|Q<4Ybeq)PBr5~?+Ml;u3*mkd9#bWVLHXO!Ql zpYdp+X3iRXLd@b=*29P7EQ0+GLz+Rz1Hy=5psd*FU&*B{`S)ogiB5QOYFX3ibVkL} zf^X&cMoa}ADlWik6%Cf?YReIyq}-^XG%ay2^E6yOpb>y zDf9)g;qKz>(~|#3NiroqSd9RwSKFV@FSavyX7@H(F6a zd!hx<{Olc(qk7Ll~D1*kgY0hN(wRNpmN}0&ROw(5`-YP$} z)wYU>`g2sMqLv4m2bH2g-51*;YL`1Yt+<~O(352xNW0AFdoW7u0o^w>2~9L*S)N9! zix#T#1+s6zedHaSj8oT_4GC&R!>fr~?M%s~+#ki|Z+eT6Q@^=wH7-lZ-M}QGi1aFw zTl8>p?`T+YEhJ~@B1cOU4nY??Rq~&f^&8$)B+Tb?u;d`%dOkI=knRjuRy6+}&)P)f z{t_3#>J|OKSNV!W`Dz9d2UA3i5;&4Uu;QuQg!R%K{5Drs>DLE0bjPo!cIDVUENluJ=m@zVA(9X1Cpe(Vg(qxeK zoCtJpF`1&iPuul1t~j{Xgrz(Ny#c<~#9uQ1eX%oHE>Hl?l_f9eqfNG88u94(;3MTEZymk*att%0f3?Q+@6K;qVrGtqFuG~ zsA|7xlZON@_yL#MD#;YeBP>eYtj2_V#nH1tNa(7muG`I-U*f~z-DWt1b_p?0+HYAVNgv(7a=q*C)wA%UN#DZ0TMw7sFn@ZIA9 z>}Gv)&uyqa3+xL^PoXz^i_Df9UV;L7lBOWRXFw4mgK~Qq@h-6jm|)m0G_~LvX6xIQ#PgX}EJ;BCzCO^PbpGP(Z82FDO5;E^&WY1SCsggC1D%f? zbN}zFpa1o@@Bekp{lA0U{~PZ*{~M3TCjH~S-v2ABI(KOQpO^nJ8g<7e|HGXB82^9J z^PgUKa55a2^B?2?mH+oV9>Gd$M1DXHfj{Du-$OT`}b4~uoq4pndz&rcz zz#A(u*MA28|Ni)o*MI%-_XhtB{ww^iPME(a0N9WJ-JUxtum6T4ycg8#c84ba;~+)j zeJm{fN#BkYmL(7N8CdK*s%Q#|3r{$pxHafaZ-iKG8@V>Wj z7A#}YWy{`5L9ma?3HZD@2NwjFs8+xt?~0ROX_zy43%_N*jLQ@VogNbCNlON5C&P+` z`svRG8%v3e)p1yP=A_S>j^C~mW}gz9CP~PBnKaV*UBr>yZuVDsto#qz0|w0Pe>0l0 zlzgmnd2N@YuFFq2U2(Gg(FS5+IQ)NI7nP#EDV%bglPB)0Jn3Az9VN^SZne}V`fq!StVu~*+0@wif-*;)*LZ#$0F@KcbKMOE zsI#EQ;M}dFMz`9O=eEdwp9x`K;EZ@a*PK#KE05%=F-WR?(dfrQv1_E%$S1}5O>1sH zr95PTJkeLl6z^U?chrf~Dfw6~@tr)!`R?M{Zjx?Q>)j+69+C5b+iR17`+b`XUH76% zmz>$tCq&zlLUU}{vXk8=_PgkP6kjz?snXJjK9@~(u1Ju~8nbokuPa$J=&Z|OThw>B zer=t~MSyMkOYI)OGSc;5$3aMPVFLO0|HMg1aT+IaEA5rC`g=PF&Ehw44!2($lLQNg zP$i7)4jDd`F4EiSGNfgDNy=a*=IQ#HRvrTRH4R|Vp0@ASeq)wjKo81A+66~{&N|Cf zj-6VkGCH;lbwk=W8mHoJ6-Ss);Nen@43Luaa;J`NW4>zckD9ns&BFB_l~i=7bP`Xh za;sn$EJ|h&R+%mh#oJC^Q5vXISpJM!ZS6ity4?KvEq1@7DCvCEjN4zR0vn!m(>Z^x zI8_Fd5~o_aF3~N);AAy97Q1_JIImGyx5k>oXm8Ys-Cl zAvDol>(nRTVSIHB-eH2(diuf?8_K$?CHV3kkgS#LAj(}_pf;EMq@LHv)Lwmah8A@;A!8o8c&FOWuUW_Tw^twaUGn~l8t4uq4TNX0R*)0u>b8LLX)?QRcJLK56N%!Oz0}nxR_06LHk8+CS6b-S+^JWcHmRVdagnsSR3W5 zGewIy2tuH`M88vCwqwoROYiW;8+!d|r`#MZ{X;dF$_E1Ke&MeX=zdg$Umr5meHXD0 zpjc>BO}e$?%+j??8vjh{fC_WR+?@a2`T3uD|I4>K|J(ikm%+(N&)E7sKmYTRwZj&R zV;;@p&KKJMXXStP&;gkIPm}*){QsfzKhXc1{14;*xBUMXycO*8-v8AF|KICk{Acn% zzJ4+9|1#Hq2LBEI8~iu;zZd`C{POPg<0o&N>_c|v_&@4x;{Vtk8vH**DPQ(x36|xF z14zH~`5$X{Rfg7@pR!$X|OQkKT(tUGZ2C8uoR`;=~KtB@6$I_8R;* z_;2vvlzWcyEGg-R>s- z_Xl0q;Qt{?`M9Ss1%v+v{|){d{5SZ2$M;Y_jsKhX|NFy%dH>U4_utsO!GDwgegE+P zhTnfa>6-lSdy4<+AlRQT?$ZAEzyEQx`TpO*$mIVWq?|*5=V5?%VP$mT&2k>%WBBq5 zzBK?oyO!|;2LH|V|NXfB@0sg=ga0xS>`v`}AN~*f)%Aa`D}Vp9KQj0K53~RH{%5)4 zSsaI=c-e-mY0KaJ@e=C4mp{*?QUujIzwq}XCw5(X^vh2!)pG(9TPl;T%+qjU-|`6n zJ%q}f|A}3D4xrKE$)^D(_U6+7x>Nb%NJz@QXB2e*g2_pP<%AVu;h36MkqkrEXM1|kqZI;ezBP-zN+Za@sE z@X(;5L=X`%p*JH#?@04P)85|P?QD$6n;W(!gOG1h8h;yeT^B-6p6xKUUG zND`}m%OD%q{Iw|MsXmlN{%16ktfuIB=wE|A(YN~vHgmei?P+AOpc=3d3Uatfu4|T! zU$+w7_k}*fQFwVGtG&p}CV;%RK|pLmtwiyP+myds`R}9t(X6*K*z04>)xUajjb_hhjJZ&buu2dcVHNUn8$Y^}$qXNo zwJf&2OKiyHbUtKEO8n|_on#cwh9&g7I3FIZs=A*>*nOn#-A7WdxH~HYwvj^$)xHLP z59DacHMgq5LgN<_kO7Z78B__P;A~sZNd&#ymL8Rw#!ei0QgpqH`(}PBZ_V@?r0!8b z*Wk`DT%-WAr=a;TG)9h$2O(DTITW<_wy=moh)?k#Yxa;pL3tF&~8ywQ;IgDY0<7E7(LD> z;2NnAU>@CY_k&7Bs+rx8?|gjziu)+;$-c6Kp7kFC#p@Rb4j!#INPp0rDI(9cj^3Ak zamYH@d3wpiVQ_U+Fr?;E(-)9?%x?WX88WnEo%Xp9SA~;J9uN1hmZyb-fy?o4Clx8N zz?FhAtvmI7w$$P0*o5cronbl5J(^A%Hw9z79*k9B5&j+Sl)lsppf?&V`!k~mPKzDY z3&JNnv(V4qyACUJXT7{U<_$g1GJmr$()eT9mhCOcbqLpS`WQ!9Iti4@8;5uoSiG@b zogpTFjT?`VRF9P<3Y}lHZm9SyjC(g7FD0Piu=jEey~OI(0>wJ)v8$G3=Ro=wane1N z!y_>1$DmOqr7&%7Bc%`a+>3UGMbCck-Zy2JSv=x;S=e1{cPw2*_zP7`Wkz*Z&}a#` zc8zbOvGk~4cXF%%I)pgojTV+q@Z8PStGKjg?IfxFiqLdOd4$7yNfkud-w3cyQ2l*w zNF7mp{iA3a`qF2u$Qt{(2(d>y8m2Tnn`VDe6cF598hdtWkXS$OBi$0Qwo^Ypv9ypB zBA5X(|77vSzmwf9@a9qZhbu*)uj^)|4){ctVVdLF8%A?Mu(=tJ3-s6g4V1f{=rhFB z7W_hPE-SD{E#vGDwIzyITc&IDCvNa+$~aG2JZ)AKY)G^L6at0(3h~8R!6K{vf4zOpmH{g%NeTz_P59P=!?PSZpn$w1`m7eBIH~Z>U~<9saIFLzr8R= zJ>r+_F^14gWn$I9U9p{*?m*|Nm#-P+h4ImQjPLoJG2ZdDl5&+Z4(@*eikV{14O)6% z8|VQ|$c`!wN^|7$@*9Q5QukgH&Qald9*D znd)=asTGZe+1BFZdcN-!^jnlAH;raJdC`57`gT#e*}>sRu`WBujvfhwfnH~OR}BW* zZ!#Y0+<`D~05c6~7&`|h7?N%%bPL@L?KXMXZPLRPZS6AbSpc&nbPOCim`EB%M_12r zQq&$szaUAMFnX{|4zL!DpiAnpX|SSud4j(vDK+_CG2}BWqT{zeAuVDhiRu$`qXox9WfI}jSsvvY909?W4ZoS z)!5Xcm#<)rABEuKmlOEkJ)if-|9r~Me;YO}_2UiTN5tRt_`d)EK-u}jKOr_@ypI1P z)Fkbl}}D{pJEf9DzP*oOC0>Tl(4P5A$E@AD;Z Z$Nek)Q3xP!Apo$0_xABz^GWF2>_1rH5a0j+ diff --git a/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl b/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..5aba56187b34e37ad84764a061ff2b3c769c0bb0 GIT binary patch literal 2573 zcmai$3pmsJAIIk&<&s+@Nm0UVtP9I+SjJq3h_u|A9hqU0OSw(%3@5oI|D{|~hs7k3 z6JeNb{%DDmTj^-(2%+&`=RAMuJpKQEzvun@p6~N~f3NTN^ZkBq4oCq(SpWdA4WLw! z<(5Sb9^1zosqrMo(|K%&pHGNa_yrIejm2RJXtWMK0;$!_aA6{Qw5@G*thE`AUA@3A zkftL(K~!H}7S9K8*g(=q1#bD8?!spT0RS*>75N{K8*m=;gR5=O#4)MnRadE*II+^< zlQl*IZYurz_~Lcs;g4p#cPj87$neJbW~*2FI<>7gn`h@7KHb+zMKr;~YWz|l^V6T` z_+D3(hBaCrIaC9yizEk}3oJ({8`N%JCl57Izs=VQGttDKKHMQSNWWFu_yCuu;# z-mx@kiBZj{7uS8(Gkc3%EHu)Ox9|x}d0H^~Kz8;znIajxs}(2 zY^j&q7E$Q|`<~R2jr_wHH zZ-}|aUE3(ksJbGgmt49{p8hFR1~tVDr(r*ITx$mW`Y5eaa)T~~@k0bJqbm+B_D+5GDxDSCZWRR9X_3Uelxn$%6?GwOIJA34 zaYD$qeH+LyB;pg!=9k94u1Dg*fJNHx-sVkA%a@l{li#Q}Mna5IX+<+xy31ZrPeSlu zNv_;aa{Sif`5?}-yzbR4`osm1{)0%1* zec3W=S14B?j47N_l-_b5YLfN`5wq%?6vS8^Hk&h|F3Feo;MDQn@CCMmgxw^W%6xeP z8f~`>9Se}eO*^p-_gf|T(7LN*%kn}7%F^T#^YLHrr_{GVpL&=@7U?fXyt9kbnRcc9 zf?3L*xi~+MnO$pFEvUXc{uyLl`rK-jzI>Z7R2%{Id`Ds_dsjb*?K~fj=ITpIT^&?7 zQjDwrV9F_B)Wg?FVIp@IWb~vtF;bivQ>sPMII&M873BQUR1`O{Y;tE2zB+p)r8)XF zVsU?zTyRv}l9_SkJmAjU`^%C7K32!`>YK=C2N&>WPPI8gYqvV=>U`d1jQV@OjcuiW ze1ER!gsRBZqUqh7oO=&mVQuUSjV;|LCm-pR$=0{lHUwmy!&Ko}ixjW7d%96=bFC7Z zIWD_B*(Wq0XJ4^BWX%#KcSSY+si_u3W381H!XMHS0%lihbpXBt6@{d^E=xlyBGW1N zG(SrkRT6sQ`lX=fv2(2p(L^L$d}0c~xBBjtay0eKTb$rib{;yA_$%pXs8pt@>MapY zC#TwAPh_dm5`jx_;1nYoxj_LSfLkU1E?tBo)7VZqQfH}i# zGkPTA!Lk-h+|*TpKvrVfeKpmSK41&!CD(_h2Lfxe6xPnlBhK^{7Z>F0rz&KdbcuYZ zj&EkyD(yxwvGy6AjIQ#6Ktq`V{TirZ!j)4F^kBUF`*E=w^Y|45lIyOE7q5OHa`le2 z>kFy-{Wd`=AqSt_O#vEPngJWM^2+@fS{kkz4!I+`cmLYDU75`P z#6nCLyeRaemH)b}%Q?fd?wj^+Wu+?u4!7CPK-kh*;z*!oUoYbU3OoRL9b!*M0}AFZf;byP!k3MA)b~Z#FZ(-Kt5{ zlDj4!0Kn$yz*bBY+}s{zahcu}84m`Sz8S@cBBbIalr^sRpK7GtwpkWTAcjE7N{ufc z3Ov*R;g*-B%g%z2Bh#^#1|8#gA)hdk%jfZC>zik<-hF(K^}N%0qFu7?#n|5Jqq840 zJqFdV>}LU9fK&dC#?_YGwufmCt{Eu`_w+`pB(cSdog4&1Ij&~jS?=x%qR$=IApCOV z1(l?vyZcgdlG_g`H<~t4(41<-@dd<_KkR1xt#mP6aDv%m`)k=@$`{76I=Y(IM)KlX z)r+P+iFWBlXO?IB10AP|5H~!Xcj|LhuEz_igyXDfg_)Y-`-7P-MT1R+LoaV0jqRU@`756F z=dby~b1go8S%LrECGgJsucs{T+h?P!Z|)g3BYxHu{0#sA%Ho`UKy1KxJ;G+j&m#Fd zV}jRjZ2bBE;`*P+e}>I>Bu!`wbIliuYl$IWI340}!-Lm7U0)%osW`Utd5!v7n$>zo4=tBR@~KI3;`H!Tqx&IWI340}yO!mzc<%%;+&OU!PgGpmO56004{Z2|54( diff --git a/apps/python-sdk/firecrawl/__pycache__/firecrawl.cpython-311.pyc b/apps/python-sdk/firecrawl/__pycache__/firecrawl.cpython-311.pyc index 694553e2f74228f43cf414e4633ea7297260fe73..7c98fa33cdb39232d3c4b0ad07ee506b91bb20d4 100644 GIT binary patch delta 1088 zcmaKqO-vI}5XbvUyQQ>U+e%CME`sUY&O4{eeXZBGxOf6@7(~u zVKUXJ;8|byUOisF#xGu&g|9~{NfjZYDxwy7QN2ncHDGJNYQSp1YQ-a>4lF0uuBswB zQ4g#Y>wtRE0IU;@K!a!k8bvav@|f4&?2-lciulcbA)A%`+2ve3lkStn>xD05rG`8& zq_sPwkW%jv2Q@|XPW|X!dT2a~lr*bYQAbbAoJ z2oFHk0i4Q1jo?=}rGos?@dTA(RJfHK%VvGbVi!iZSP$psS|3Zr`YtxZmN*R#>=C##g-(#mlqYMWKET=siHCU!P;DGnctVpgIltO z%hqtw7%rUCZIgB<7G#QMGjc9UC77n1qn)ru8xU?bsSlEFwxaLkl&;u@evFJ3KI(sv zU_Zthqt~Rrg$7~MqfrKQ5_45Y)61HTivU?q#22!%aMNPX!bhZ_}`=vTvd z_!UlwL1FAnd+*({ni%BUe;jLU^&+6AWA7(VMA)D9Z1()_A1`($a zIDU#lQd$b27i14?ZR9Q6unpTB+6D4eb%<=+#thXVV?}g^-L*T&EPH5oPu?WtHU#|J Od{g#z|0AJ1Wc>-C92l_x delta 623 zcmaJ-O-mbL5Z+04H(;{c7&Twee7R}dm3+_yYq32Dp`u`0@D`<^bl0Hx5#NoVl=fnN zfMLM?fkN*twAc2~YiSQ6#7pU=1urdlmd+?ekhbsPnVDzid6;>RCb}j4jiwDq^jZh; zqdPf@mYTcxPjfv(hhomF5#GH>)gpTR7CgtEWb z-t}bPOFx+2p7IU6Zm@Cg)A4CVq;M7voD@2fF999&j2 zJeI{&WDH6;6G_7~wj$3jX#;;maxjU(p%m0TYv?_|1fC8Hu<*%9xqgSFW}q*3R@NOc z$v#(?j8fTG+%?B(i;L9)*H^pEf;xT~sX`f*XdY&~@#tgUy(`NU*?&faPOiTJ#Da^J diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index f1f5e6e4..ef3eb532 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -32,6 +32,32 @@ class FirecrawlApp: raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') else: raise Exception(f'Failed to scrape URL. Status code: {response.status_code}') + + def search(self, query, params=None): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}' + } + json_data = {'query': query} + if params: + json_data.update(params) + response = requests.post( + 'https://api.firecrawl.dev/v0/search', + headers=headers, + json=json_data + ) + if response.status_code == 200: + response = response.json() + if response['success'] == True: + return response['data'] + else: + raise Exception(f'Failed to search. Error: {response["error"]}') + + elif response.status_code in [402, 409, 500]: + error_message = response.json().get('error', 'Unknown error occurred') + raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}') + else: + raise Exception(f'Failed to search. Status code: {response.status_code}') def crawl_url(self, url, params=None, wait_until_done=True, timeout=2): headers = self._prepare_headers() diff --git a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO index ad0bd09c..61589c22 100644 --- a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO +++ b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO @@ -1,7 +1,7 @@ Metadata-Version: 2.1 Name: firecrawl-py -Version: 0.0.5 +Version: 0.0.6 Summary: Python SDK for Firecrawl API -Home-page: https://github.com/mendableai/firecrawl-py +Home-page: https://github.com/mendableai/firecrawl Author: Mendable.ai Author-email: nick@mendable.ai diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index d2fc6b81..a3589e34 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -2,8 +2,8 @@ from setuptools import setup, find_packages setup( name='firecrawl-py', - version='0.0.5', - url='https://github.com/mendableai/firecrawl-py', + version='0.0.6', + url='https://github.com/mendableai/firecrawl', author='Mendable.ai', author_email='nick@mendable.ai', description='Python SDK for Firecrawl API', From b7c7291b0e889731159cafdabda3dc727b2508c1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 25 Apr 2024 12:49:10 -0700 Subject: [PATCH 78/96] Nick: v15 --- apps/js-sdk/firecrawl/build/index.js | 37 +++++++++++++++++++++++ apps/js-sdk/firecrawl/package.json | 2 +- apps/js-sdk/firecrawl/src/index.ts | 41 ++++++++++++++++++++++++++ apps/js-sdk/firecrawl/types/index.d.ts | 15 ++++++++++ apps/js-sdk/package-lock.json | 8 ++--- apps/js-sdk/package.json | 2 +- 6 files changed, 99 insertions(+), 6 deletions(-) diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index 1b23bb54..9d8237b4 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -61,6 +61,43 @@ export default class FirecrawlApp { return { success: false, error: 'Internal server error.' }; }); } + /** + * Searches for a query using the Firecrawl API. + * @param {string} query - The query to search for. + * @param {Params | null} params - Additional parameters for the search request. + * @returns {Promise} The response from the search operation. + */ + search(query_1) { + return __awaiter(this, arguments, void 0, function* (query, params = null) { + const headers = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + }; + let jsonData = { query }; + if (params) { + jsonData = Object.assign(Object.assign({}, jsonData), params); + } + try { + const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } + else { + throw new Error(`Failed to search. Error: ${responseData.error}`); + } + } + else { + this.handleError(response, 'search'); + } + } + catch (error) { + throw new Error(error.message); + } + return { success: false, error: 'Internal server error.' }; + }); + } /** * Initiates a crawl job for a URL using the Firecrawl API. * @param {string} url - The URL to crawl. diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 566fdde9..c35a93b6 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.13", + "version": "0.0.15", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 65456001..54e4e237 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -25,6 +25,14 @@ export interface ScrapeResponse { error?: string; } +/** + * Response interface for searching operations. + */ +export interface SearchResponse { + success: boolean; + data?: any; + error?: string; +} /** * Response interface for crawling operations. */ @@ -96,6 +104,39 @@ export default class FirecrawlApp { return { success: false, error: 'Internal server error.' }; } + /** + * Searches for a query using the Firecrawl API. + * @param {string} query - The query to search for. + * @param {Params | null} params - Additional parameters for the search request. + * @returns {Promise} The response from the search operation. + */ + async search(query: string, params: Params | null = null): Promise { + const headers: AxiosRequestHeaders = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + } as AxiosRequestHeaders; + let jsonData: Params = { query }; + if (params) { + jsonData = { ...jsonData, ...params }; + } + try { + const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } else { + throw new Error(`Failed to search. Error: ${responseData.error}`); + } + } else { + this.handleError(response, 'search'); + } + } catch (error: any) { + throw new Error(error.message); + } + return { success: false, error: 'Internal server error.' }; + } + /** * Initiates a crawl job for a URL using the Firecrawl API. * @param {string} url - The URL to crawl. diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index be960f7c..7f79d644 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -19,6 +19,14 @@ export interface ScrapeResponse { data?: any; error?: string; } +/** + * Response interface for searching operations. + */ +export interface SearchResponse { + success: boolean; + data?: any; + error?: string; +} /** * Response interface for crawling operations. */ @@ -55,6 +63,13 @@ export default class FirecrawlApp { * @returns {Promise} The response from the scrape operation. */ scrapeUrl(url: string, params?: Params | null): Promise; + /** + * Searches for a query using the Firecrawl API. + * @param {string} query - The query to search for. + * @param {Params | null} params - Additional parameters for the search request. + * @returns {Promise} The response from the search operation. + */ + search(query: string, params?: Params | null): Promise; /** * Initiates a crawl job for a URL using the Firecrawl API. * @param {string} url - The URL to crawl. diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index a73272f4..363f3013 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -9,14 +9,14 @@ "version": "1.0.0", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.8", + "@mendable/firecrawl-js": "^0.0.15", "axios": "^1.6.8" } }, "node_modules/@mendable/firecrawl-js": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.8.tgz", - "integrity": "sha512-dD7eA5X6UT8CM3z7qCqHgA4YbCsdwmmlaT/L0/ozM6gGvb0PnJMoB+e51+n4lAW8mxXOvHGbq9nrgBT1wEhhhw==", + "version": "0.0.15", + "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.15.tgz", + "integrity": "sha512-e3iCCrLIiEh+jEDerGV9Uhdkn8ymo+sG+k3osCwPg51xW1xUdAnmlcHrcJoR43RvKXdvD/lqoxg8odUEsqyH+w==", "dependencies": { "axios": "^1.6.8", "dotenv": "^16.4.5" diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index 9bb5c4f2..563e1e39 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -11,7 +11,7 @@ "author": "", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.8", + "@mendable/firecrawl-js": "^0.0.15", "axios": "^1.6.8" } } From 3ac87243292d224cb3485254025de78ee7547808 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 25 Apr 2024 13:28:07 -0700 Subject: [PATCH 79/96] Update openapi.json --- apps/api/openapi.json | 57 +++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/apps/api/openapi.json b/apps/api/openapi.json index dd325fa2..7861f32a 100644 --- a/apps/api/openapi.json +++ b/apps/api/openapi.json @@ -373,33 +373,36 @@ "type": "boolean" }, "data": { - "type": "object", - "properties": { - "url": { - "type": "string" - }, - "markdown": { - "type": "string" - }, - "content": { - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "language": { - "type": "string", - "nullable": true - }, - "sourceURL": { - "type": "string", - "format": "uri" + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "markdown": { + "type": "string" + }, + "content": { + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "language": { + "type": "string", + "nullable": true + }, + "sourceURL": { + "type": "string", + "format": "uri" + } } } } From 4fce848ebbca4c5fee7c356fba5e5c16b0058db0 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 25 Apr 2024 13:29:37 -0700 Subject: [PATCH 80/96] Update README.md --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index c48ef10e..256e2bd8 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,15 @@ url = 'https://example.com' scraped_data = app.scrape_url(url) ``` +### Search for a query + +Performs a web search, retrieve the top results, extract data from each page, and returns their markdown. + +```python +query = 'what is mendable?' +search_result = app.search(query) +``` + ## Contributing We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. From 06675d1fe329a546ec4d8e395e8f39e60cd60b28 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 26 Apr 2024 11:42:49 -0300 Subject: [PATCH 81/96] almost finished --- apps/api/src/controllers/scrape.ts | 24 +- apps/api/src/controllers/search.ts | 4 +- apps/api/src/main/runWebScraper.ts | 4 +- .../src/services/billing/credit_billing.ts | 217 ++++++++++-------- 4 files changed, 139 insertions(+), 110 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index cfe35b5b..eebdcb42 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -46,18 +46,18 @@ export async function scrapeHelper( return { success: true, error: "No page found", returnCode: 200 }; } - const { success, credit_usage } = await billTeam( - team_id, - filteredDocs.length - ); - if (!success) { - return { - success: false, - error: - "Failed to bill team. Insufficient credits or subscription not found.", - returnCode: 402, - }; - } + const billingResult = await billTeam( + team_id, + filteredDocs.length + ); + if (!billingResult.success) { + return { + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, + }; + } return { success: true, diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index bc81f69e..5c2cf808 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -83,11 +83,11 @@ export async function searchHelper( return { success: true, error: "No page found", returnCode: 200 }; } - const { success, credit_usage } = await billTeam( + const billingResult = await billTeam( team_id, filteredDocs.length ); - if (!success) { + if (!billingResult.success) { return { success: false, error: diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 0e44310b..892a2a34 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -89,12 +89,12 @@ export async function runWebScraper({ : docs.filter((doc) => doc.content.trim().length > 0); - const { success, credit_usage } = await billTeam( + const billingResult = await billTeam( team_id, filteredDocs.length ); - if (!success) { + if (!billingResult.success) { // throw new Error("Failed to bill team, no subscription was found"); return { success: false, diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 7f6f9b83..e6a05d7f 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -18,7 +18,6 @@ export async function supaBillTeam(team_id: string, credits: number) { // created_at: The timestamp of the API usage. // 1. get the subscription - const { data: subscription } = await supabase_service .from("subscriptions") .select("*") @@ -26,51 +25,81 @@ export async function supaBillTeam(team_id: string, credits: number) { .eq("status", "active") .single(); - if (!subscription) { - const { data: credit_usage } = await supabase_service - .from("credit_usage") - .insert([ - { - team_id, - credits_used: credits, - created_at: new Date(), - }, - ]) - .select(); - - return { success: true, credit_usage }; - } - // 2. Check for available coupons const { data: coupons } = await supabase_service .from("coupons") - .select("credits") + .select("id, credits") .eq("team_id", team_id) .eq("status", "active"); - let couponValue = 0; + let couponCredits = 0; if (coupons && coupons.length > 0) { - couponValue = coupons[0].credits; // Assuming only one active coupon can be used at a time - console.log(`Applying coupon of ${couponValue} credits`); + couponCredits = coupons.reduce((total, coupon) => total + coupon.credits, 0); } - // Calculate final credits used after applying coupon - const finalCreditsUsed = Math.max(0, credits - couponValue); + let sortedCoupons = coupons.sort((a, b) => b.credits - a.credits); - // 3. Log the credit usage - const { data: credit_usage } = await supabase_service - .from("credit_usage") - .insert([ - { - team_id, - subscription_id: subscription ? subscription.id : null, - credits_used: finalCreditsUsed, - created_at: new Date(), - }, - ]) - .select(); + // using coupon credits: + if (couponCredits > 0) { + // using only coupon credits: + if (couponCredits > credits && !subscription) { + // remove credits from coupon credits + let usedCredits = credits; + while (usedCredits > 0) { + // update coupons + if (sortedCoupons[0].credits < usedCredits) { + usedCredits = usedCredits - sortedCoupons[0].credits; + // update coupon credits + await supabase_service + .from("coupons") + .update({ + credits: 0 + }) + .eq("id", sortedCoupons[0].id); + sortedCoupons.shift(); - return { success: true, credit_usage }; + } else { + // update coupon credits + await supabase_service + .from("coupons") + .update({ + credits: sortedCoupons[0].credits - usedCredits + }) + .eq("id", sortedCoupons[0].id); + usedCredits = 0; + } + } + + return await createCreditUsage({ team_id, credits: 0 }); + + // @nick ??? HOW TO HANDLE THIS CASE? + // not enough coupon credits but no subscription + } else if (!subscription) { + return await createCreditUsage({ team_id, credits }); + } + + // using coupon + subscription credits: + if (credits > couponCredits) { + // update coupon credits + for (let i = 0; i < sortedCoupons.length; i++) { + await supabase_service + .from("coupons") + .update({ + credits: 0 + }) + .eq("id", sortedCoupons[i].id); + } + const usedCredits = credits - couponCredits; + return await createCreditUsage({ team_id, subscription_id: subscription.id, credits: usedCredits }); + } + } + + // not using coupon credits + if (!subscription) { + return await createCreditUsage({ team_id, credits }); + } + + return await createCreditUsage({ team_id, subscription_id: subscription.id, credits }); } export async function checkTeamCredits(team_id: string, credits: number) { @@ -90,10 +119,6 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { .eq("status", "active") .single(); - if (subscriptionError || !subscription) { - return { success: false, message: "No active subscription found" }; - } - // Check for available coupons const { data: coupons } = await supabase_service .from("coupons") @@ -101,9 +126,18 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { .eq("team_id", team_id) .eq("status", "active"); - let couponValue = 0; + let couponCredits = 0; if (coupons && coupons.length > 0) { - couponValue = coupons[0].credits; + couponCredits = coupons.reduce((total, coupon) => total + coupon.credits, 0); + } + + if (subscriptionError || (!subscription && couponCredits <= 0)) { + return { success: false, message: "No active subscription or coupons found" }; + } + + // If there is no active subscription but there are available coupons + if (couponCredits >= credits) { + return { success: true, message: "Sufficient credits available" }; } // Calculate the total credits used by the team within the current billing period @@ -121,7 +155,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { const totalCreditsUsed = creditUsages.reduce((acc, usage) => acc + usage.credits_used, 0); // Adjust total credits used by subtracting coupon value - const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponValue); + const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponCredits); // Get the price details const { data: price, error: priceError } = await supabase_service @@ -154,19 +188,18 @@ export async function countCreditsAndRemainingForCurrentBillingPeriod( .eq("team_id", team_id) .single(); + const { data: coupons } = await supabase_service + .from("coupons") + .select("credits") + .eq("team_id", team_id) + .eq("status", "active"); + + let couponCredits = 0; + if (coupons && coupons.length > 0) { + couponCredits = coupons.reduce((total, coupon) => total + coupon.credits, 0); + } + if (subscriptionError || !subscription) { - // Check for available coupons even if there's no subscription - const { data: coupons } = await supabase_service - .from("coupons") - .select("value") - .eq("team_id", team_id) - .eq("status", "active"); - - let couponValue = 0; - if (coupons && coupons.length > 0) { - couponValue = coupons[0].value; - } - // Free const { data: creditUsages, error: creditUsageError } = await supabase_service @@ -184,62 +217,58 @@ export async function countCreditsAndRemainingForCurrentBillingPeriod( 0 ); - // Adjust total credits used by subtracting coupon value - const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponValue); - - // 4. Calculate remaining credits. - const remainingCredits = FREE_CREDITS - adjustedCreditsUsed; - - return { totalCreditsUsed: adjustedCreditsUsed, remainingCredits, totalCredits: FREE_CREDITS }; - } - - // If there is an active subscription - const { data: coupons } = await supabase_service - .from("coupons") - .select("credits") - .eq("team_id", team_id) - .eq("status", "active"); - - let couponValue = 0; - if (coupons && coupons.length > 0) { - couponValue = coupons[0].credits; + const remainingCredits = FREE_CREDITS + couponCredits - totalCreditsUsed; + return { totalCreditsUsed: totalCreditsUsed, remainingCredits, totalCredits: FREE_CREDITS + couponCredits }; } const { data: creditUsages, error: creditUsageError } = await supabase_service - .from("credit_usage") - .select("credits_used") - .eq("subscription_id", subscription.id) - .gte("created_at", subscription.current_period_start) - .lte("created_at", subscription.current_period_end); + .from("credit_usage") + .select("credits_used") + .eq("subscription_id", subscription.id) + .gte("created_at", subscription.current_period_start) + .lte("created_at", subscription.current_period_end); if (creditUsageError || !creditUsages) { - throw new Error(`Failed to retrieve credit usage for subscription_id: ${subscription.id}`); + throw new Error(`Failed to retrieve credit usage for subscription_id: ${subscription.id}`); } - const totalCreditsUsed = creditUsages.reduce( - (acc, usage) => acc + usage.credits_used, - 0 - ); + const totalCreditsUsed = creditUsages.reduce((acc, usage) => acc + usage.credits_used, 0); // Adjust total credits used by subtracting coupon value - const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponValue); + // const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponCredits); const { data: price, error: priceError } = await supabase_service - .from("prices") - .select("credits") - .eq("id", subscription.price_id) - .single(); + .from("prices") + .select("credits") + .eq("id", subscription.price_id) + .single(); if (priceError || !price) { - throw new Error(`Failed to retrieve price for price_id: ${subscription.price_id}`); + throw new Error(`Failed to retrieve price for price_id: ${subscription.price_id}`); } // Calculate remaining credits. - const remainingCredits = price.credits - adjustedCreditsUsed; + const remainingCredits = price.credits + couponCredits - totalCreditsUsed; return { - totalCreditsUsed: adjustedCreditsUsed, - remainingCredits, - totalCredits: price.credits + totalCreditsUsed, + remainingCredits, + totalCredits: price.credits }; - } +} + +async function createCreditUsage({ team_id, subscription_id, credits }: { team_id: string, subscription_id?: string, credits: number }) { + const { data: credit_usage } = await supabase_service + .from("credit_usage") + .insert([ + { + team_id, + credits_used: credits, + subscription_id: subscription_id || null, + created_at: new Date(), + }, + ]) + .select(); + + return { success: true, credit_usage }; +} \ No newline at end of file From 24e1bdec1bdd5e7e88805b6d513351de8de7437c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Apr 2024 10:14:29 -0700 Subject: [PATCH 82/96] Update credit_billing.ts --- apps/api/src/services/billing/credit_billing.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index bf5be60e..165d4dde 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -1,7 +1,7 @@ import { withAuth } from "../../lib/withAuth"; import { supabase_service } from "../supabase"; -const FREE_CREDITS = 100; +const FREE_CREDITS = 500; export async function billTeam(team_id: string, credits: number) { return withAuth(supaBillTeam)(team_id, credits); From d210a57a9bfe25993bd63f2c253cdf646f1cf158 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Apr 2024 10:24:36 -0700 Subject: [PATCH 83/96] Update credit_billing.ts --- apps/api/src/services/billing/credit_billing.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 165d4dde..73c08903 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -1,7 +1,7 @@ import { withAuth } from "../../lib/withAuth"; import { supabase_service } from "../supabase"; -const FREE_CREDITS = 500; +const FREE_CREDITS = 300; export async function billTeam(team_id: string, credits: number) { return withAuth(supaBillTeam)(team_id, credits); From bb3da8df896bae6d860d3f07ba2c83f100827027 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Apr 2024 11:28:31 -0700 Subject: [PATCH 84/96] Update package.json --- apps/js-sdk/firecrawl/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 9a3e650b..a8275f7b 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.15", + "version": "0.0.16", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", From 1f48998970b4817a627a64df6ee8d49928ffdcbf Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 26 Apr 2024 16:27:31 -0300 Subject: [PATCH 85/96] done --- .../src/services/billing/credit_billing.ts | 134 +++++++++++++----- 1 file changed, 100 insertions(+), 34 deletions(-) diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index e6a05d7f..50a77c43 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -38,12 +38,73 @@ export async function supaBillTeam(team_id: string, credits: number) { } let sortedCoupons = coupons.sort((a, b) => b.credits - a.credits); - // using coupon credits: if (couponCredits > 0) { - // using only coupon credits: - if (couponCredits > credits && !subscription) { - // remove credits from coupon credits + if (!subscription) { + // using only coupon credits: + if (couponCredits >= credits) { + // remove credits from coupon credits + let usedCredits = credits; + while (usedCredits > 0) { + // update coupons + if (sortedCoupons[0].credits < usedCredits) { + usedCredits = usedCredits - sortedCoupons[0].credits; + // update coupon credits + await supabase_service + .from("coupons") + .update({ + credits: 0 + }) + .eq("id", sortedCoupons[0].id); + sortedCoupons.shift(); + + } else { + // update coupon credits + await supabase_service + .from("coupons") + .update({ + credits: sortedCoupons[0].credits - usedCredits + }) + .eq("id", sortedCoupons[0].id); + usedCredits = 0; + } + } + + return await createCreditUsage({ team_id, credits: 0 }); + + // not enough coupon credits and no subscription + } else { + // update coupon credits + const usedCredits = credits - couponCredits; + for (let i = 0; i < sortedCoupons.length; i++) { + await supabase_service + .from("coupons") + .update({ + credits: 0 + }) + .eq("id", sortedCoupons[i].id); + } + + return await createCreditUsage({ team_id, credits: usedCredits }); + } + } + + // with subscription + // using coupon + subscription credits: + if (credits > couponCredits) { + // update coupon credits + for (let i = 0; i < sortedCoupons.length; i++) { + await supabase_service + .from("coupons") + .update({ + credits: 0 + }) + .eq("id", sortedCoupons[i].id); + } + const usedCredits = credits - couponCredits; + return await createCreditUsage({ team_id, subscription_id: subscription.id, credits: usedCredits }); + + } else { // using only coupon credits let usedCredits = credits; while (usedCredits > 0) { // update coupons @@ -70,27 +131,7 @@ export async function supaBillTeam(team_id: string, credits: number) { } } - return await createCreditUsage({ team_id, credits: 0 }); - - // @nick ??? HOW TO HANDLE THIS CASE? - // not enough coupon credits but no subscription - } else if (!subscription) { - return await createCreditUsage({ team_id, credits }); - } - - // using coupon + subscription credits: - if (credits > couponCredits) { - // update coupon credits - for (let i = 0; i < sortedCoupons.length; i++) { - await supabase_service - .from("coupons") - .update({ - credits: 0 - }) - .eq("id", sortedCoupons[i].id); - } - const usedCredits = credits - couponCredits; - return await createCreditUsage({ team_id, subscription_id: subscription.id, credits: usedCredits }); + return await createCreditUsage({ team_id, subscription_id: subscription.id, credits: 0 }); } } @@ -131,12 +172,41 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { couponCredits = coupons.reduce((total, coupon) => total + coupon.credits, 0); } - if (subscriptionError || (!subscription && couponCredits <= 0)) { - return { success: false, message: "No active subscription or coupons found" }; - } + // Free credits, no coupons + if (subscriptionError || !subscription) { + // If there is no active subscription but there are available coupons + if (couponCredits >= credits) { + return { success: true, message: "Sufficient credits available" }; + } + + const { data: creditUsages, error: creditUsageError } = + await supabase_service + .from("credit_usage") + .select("credits_used") + .is("subscription_id", null) + .eq("team_id", team_id); + // .gte("created_at", subscription.current_period_start) + // .lte("created_at", subscription.current_period_end); - // If there is no active subscription but there are available coupons - if (couponCredits >= credits) { + if (creditUsageError) { + throw new Error( + `Failed to retrieve credit usage for subscription_id: ${subscription.id}` + ); + } + + const totalCreditsUsed = creditUsages.reduce( + (acc, usage) => acc + usage.credits_used, + 0 + ); + + console.log("totalCreditsUsed", totalCreditsUsed); + // 5. Compare the total credits used with the credits allowed by the plan. + if (totalCreditsUsed + credits > FREE_CREDITS) { + return { + success: false, + message: "Insufficient credits, please upgrade!", + }; + } return { success: true, message: "Sufficient credits available" }; } @@ -234,9 +304,6 @@ export async function countCreditsAndRemainingForCurrentBillingPeriod( const totalCreditsUsed = creditUsages.reduce((acc, usage) => acc + usage.credits_used, 0); - // Adjust total credits used by subtracting coupon value - // const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponCredits); - const { data: price, error: priceError } = await supabase_service .from("prices") .select("credits") @@ -247,7 +314,6 @@ export async function countCreditsAndRemainingForCurrentBillingPeriod( throw new Error(`Failed to retrieve price for price_id: ${subscription.price_id}`); } - // Calculate remaining credits. const remainingCredits = price.credits + couponCredits - totalCreditsUsed; return { From 8e324534246eea0daf25c354032ab1c4facfe6af Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Apr 2024 12:57:49 -0700 Subject: [PATCH 86/96] Update auth.ts --- apps/api/src/controllers/auth.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 49b2146a..2aa2297a 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -51,9 +51,19 @@ export async function supaAuthenticateUser( if ( token === "this_is_just_a_preview_token" && - (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview) + (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || mode === RateLimiterMode.Search) ) { return { success: true, team_id: "preview" }; + // check the origin of the request and make sure its from firecrawl.dev + // const origin = req.headers.origin; + // if (origin && origin.includes("firecrawl.dev")){ + // return { success: true, team_id: "preview" }; + // } + // if(process.env.ENV !== "production") { + // return { success: true, team_id: "preview" }; + // } + + // return { success: false, error: "Unauthorized: Invalid token", status: 401 }; } const normalizedApi = parseApi(token); From fdf913e0f1958552db5e4cd8897d3a0c9e577259 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Apr 2024 13:06:48 -0700 Subject: [PATCH 87/96] Update index.test.ts --- apps/api/src/__tests__/e2e_withAuth/index.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index f490306e..2b4c7e9f 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -183,6 +183,8 @@ const TEST_URL = "http://127.0.0.1:3002"; expect(response.statusCode).toBe(401); }); + + it("should return a successful response with a valid API key", async () => { const response = await request(TEST_URL) .post("/v0/search") From fdd3b704f754904936975ed99fad9620dbc2efa1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Apr 2024 13:37:00 -0700 Subject: [PATCH 88/96] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 256e2bd8..36ef431c 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ curl -X POST https://api.firecrawl.dev/v0/search \ } ``` -Coming soon to the SDKs and Integrations. +Coming soon to the Langchain and LLama Index integrations. ## Using Python SDK From 6cf147f5ecbf3926cae19a155361341d26d1f005 Mon Sep 17 00:00:00 2001 From: Eric Ciarla Date: Fri, 26 Apr 2024 16:56:22 -0400 Subject: [PATCH 89/96] Contradictions tutorial --- .../contradiction-testing-using-llms.mdx | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 tutorials/contradiction-testing-using-llms.mdx diff --git a/tutorials/contradiction-testing-using-llms.mdx b/tutorials/contradiction-testing-using-llms.mdx new file mode 100644 index 00000000..e2a4d73a --- /dev/null +++ b/tutorials/contradiction-testing-using-llms.mdx @@ -0,0 +1,78 @@ +# Build an agent that check your website for contradictions + +Learn how to use Firecrawl and Claude to scrape your website's data and look for contradictions and inconsistencies in a few lines of code. When you are shipping fast, data is bound to get stale, with FireCrawl and LLMs you can make sure your public web data is always consistent! We will be using Opus's huge 200k context window and Firecrawl's parellization, making this process accurate and fast. + +## Setup + +Install our python dependencies, including anthropic and firecrawl-py. + +```bash +pip install firecrawl-py anthropic +``` + +## Getting your Claude and Firecrawl API Keys + +To use Claude Opus and Firecrawl, you will need to get your API keys. You can get your Anthropic API key from [here](https://www.anthropic.com/) and your Firecrawl API key from [here](https://firecrawl.dev). + +## Load website with Firecrawl + +To be able to get all the data from our website page put it into an easy to read format for the LLM, we will use [FireCrawl](https://firecrawl.dev). It handles by-passing JS-blocked websites, extracting the main content, and outputting in a LLM-readable format for increased accuracy. + +Here is how we will scrape a website url using Firecrawl-py + +```python +from firecrawl import FirecrawlApp + +app = FirecrawlApp(api_key="YOUR-KEY") + +crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*','usecases/*']}}) + +print(crawl_result) +``` + +With all of the web data we want scraped and in a clean format, we can move onto the next step. + +## Combination and Generation + +Now that we have the website data, let's pair up every page and run every combination through Opus for analysis. + +```python +from itertools import combinations + +page_combinations = [] + +for first_page, second_page in combinations(crawl_result, 2): + combined_string = "First Page:\n" + first_page['markdown'] + "\n\nSecond Page:\n" + second_page['markdown'] + page_combinations.append(combined_string) + +import anthropic + +client = anthropic.Anthropic( + # defaults to os.environ.get("ANTHROPIC_API_KEY") + api_key="YOUR-KEY", +) + +final_output = [] + +for page_combination in page_combinations: + + prompt = "Here are two pages from a companies website, your job is to find any contradictions or differences in opinion between the two pages, this could be caused by outdated information or other. If you find any contradictions, list them out and provide a brief explanation of why they are contradictory or differing. Make sure the explanation is specific and concise. It is okay if you don't find any contradictions, just say 'No contradictions found' and nothing else. Here are the pages: " + "\n\n".join(page_combination) + + message = client.messages.create( + model="claude-3-opus-20240229", + max_tokens=1000, + temperature=0.0, + system="You are an assistant that helps find contradictions or differences in opinion between pages in a company website and knowledge base. This could be caused by outdated information in the knowledge base.", + messages=[ + {"role": "user", "content": prompt} + ] + ) + final_output.append(message.content) + +``` + +## That's about it! + +You have now built an agent that looks at your website and spots any inconsistencies it might have. + +If you have any questions or need help, feel free to reach out to us at [Firecrawl](https://firecrawl.dev). From 7689c31d3584b3ca3ce8e73104cb6d6292b2e49b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Apr 2024 14:36:19 -0700 Subject: [PATCH 90/96] Update credit_billing.ts --- apps/api/src/services/billing/credit_billing.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index a5060261..37db664e 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -40,8 +40,10 @@ export async function supaBillTeam(team_id: string, credits: number) { let sortedCoupons = coupons.sort((a, b) => b.credits - a.credits); // using coupon credits: if (couponCredits > 0) { + // if there is no subscription and they have enough coupon credits if (!subscription) { // using only coupon credits: + // if there are enough coupon credits if (couponCredits >= credits) { // remove credits from coupon credits let usedCredits = credits; From 8e44696c4d47edf282dff27f401f9b6ba5610897 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 28 Apr 2024 11:34:25 -0700 Subject: [PATCH 91/96] Nick: --- apps/api/src/scraper/WebScraper/single_url.ts | 43 +++++++++++++++---- .../WebScraper/utils/custom/website_params.ts | 24 +++++++++++ 2 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 apps/api/src/scraper/WebScraper/utils/custom/website_params.ts diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 6ab30036..262a90c0 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -5,9 +5,28 @@ import dotenv from "dotenv"; import { Document, PageOptions } from "../../lib/entities"; import { parseMarkdown } from "../../lib/html-to-markdown"; import { excludeNonMainTags } from "./utils/excludeTags"; +import { urlSpecificParams } from "./utils/custom/website_params"; dotenv.config(); +export async function generateRequestParams( + url: string, + wait_browser: string = "domcontentloaded", + timeout: number = 15000 +): Promise { + const defaultParams = { + url: url, + params: { timeout: timeout, wait_browser: wait_browser }, + headers: { "ScrapingService-Request": "TRUE" }, + }; + + const urlKey = new URL(url).hostname; + if (urlSpecificParams.hasOwnProperty(urlKey)) { + return { ...defaultParams, ...urlSpecificParams[urlKey] }; + } else { + return defaultParams; + } +} export async function scrapWithCustomFirecrawl( url: string, options?: any @@ -28,11 +47,13 @@ export async function scrapWithScrapingBee( ): Promise { try { const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY); - const response = await client.get({ - url: url, - params: { timeout: timeout, wait_browser: wait_browser }, - headers: { "ScrapingService-Request": "TRUE" }, - }); + const clientParams = await generateRequestParams( + url, + wait_browser, + timeout + ); + + const response = await client.get(clientParams); if (response.status !== 200 && response.status !== 404) { console.error( @@ -107,11 +128,15 @@ export async function scrapSingleUrl( let text = ""; switch (method) { case "firecrawl-scraper": - text = await scrapWithCustomFirecrawl(url,); + text = await scrapWithCustomFirecrawl(url); break; case "scrapingBee": if (process.env.SCRAPING_BEE_API_KEY) { - text = await scrapWithScrapingBee(url,"domcontentloaded", pageOptions.fallback === false? 7000 : 15000); + text = await scrapWithScrapingBee( + url, + "domcontentloaded", + pageOptions.fallback === false ? 7000 : 15000 + ); } break; case "playwright": @@ -141,7 +166,7 @@ export async function scrapSingleUrl( break; } let cleanedHtml = removeUnwantedElements(text, pageOptions); - + return [await parseMarkdown(cleanedHtml), text]; }; @@ -155,7 +180,7 @@ export async function scrapSingleUrl( let [text, html] = await attemptScraping(urlToScrap, "scrapingBee"); // Basically means that it is using /search endpoint - if(pageOptions.fallback === false){ + if (pageOptions.fallback === false) { const soup = cheerio.load(html); const metadata = extractMetadata(soup, urlToScrap); return { diff --git a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts new file mode 100644 index 00000000..164b0741 --- /dev/null +++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts @@ -0,0 +1,24 @@ +export const urlSpecificParams = { + "platform.openai.com": { + params: { + wait_browser: "networkidle2", + block_resources: false, + }, + headers: { + "User-Agent": + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "sec-fetch-site": "same-origin", + "sec-fetch-mode": "cors", + "sec-fetch-dest": "empty", + referer: "https://www.google.com/", + "accept-language": "en-US,en;q=0.9", + "accept-encoding": "gzip, deflate, br", + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + }, + cookies: { + __cf_bm: + "mC1On8P2GWT3A5UeSYH6z_MP94xcTAdZ5jfNi9IT2U0-1714327136-1.0.1.1-ILAP5pSX_Oo9PPo2iHEYCYX.p9a0yRBNLr58GHyrzYNDJ537xYpG50MXxUYVdfrD.h3FV5O7oMlRKGA0scbxaQ", + }, + }, +}; From e6d7a4761d382bb2915e58ed4864f47a91b82302 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 28 Apr 2024 11:41:42 -0700 Subject: [PATCH 92/96] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 36ef431c..7749a640 100644 --- a/README.md +++ b/README.md @@ -192,3 +192,6 @@ search_result = app.search(query) ## Contributing We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. + + +*It is the sole responsibility of the end users to scrape, search and crawl websites. Users are advised to adhere to the applicable privacy policies and terms of use of the websites prior to initiating any scraping activities. By default, Firecrawl respects the directives specified in the websites' robots.txt files when crawling. By utilizing Firecrawl, you expressly agree to comply with these conditions.* From d8ee4e90d6e64eff8cb5bb0ab557360e08dcba75 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 28 Apr 2024 11:47:25 -0700 Subject: [PATCH 93/96] Update website_params.ts --- .../WebScraper/utils/custom/website_params.ts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts index 164b0741..dd9f20ec 100644 --- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts +++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts @@ -21,4 +21,22 @@ export const urlSpecificParams = { "mC1On8P2GWT3A5UeSYH6z_MP94xcTAdZ5jfNi9IT2U0-1714327136-1.0.1.1-ILAP5pSX_Oo9PPo2iHEYCYX.p9a0yRBNLr58GHyrzYNDJ537xYpG50MXxUYVdfrD.h3FV5O7oMlRKGA0scbxaQ", }, }, + "support.greenpay.me":{ + params: { + wait_browser: "networkidle2", + block_resources: false, + }, + headers: { + "User-Agent": + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "sec-fetch-site": "same-origin", + "sec-fetch-mode": "cors", + "sec-fetch-dest": "empty", + referer: "https://www.google.com/", + "accept-language": "en-US,en;q=0.9", + "accept-encoding": "gzip, deflate, br", + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + }, + } }; From 68838c9e0da8c74f9921e4d89e459275f9d235ce Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 28 Apr 2024 12:44:00 -0700 Subject: [PATCH 94/96] Update single_url.ts --- apps/api/src/scraper/WebScraper/single_url.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 262a90c0..ff73e954 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -20,10 +20,15 @@ export async function generateRequestParams( headers: { "ScrapingService-Request": "TRUE" }, }; - const urlKey = new URL(url).hostname; - if (urlSpecificParams.hasOwnProperty(urlKey)) { - return { ...defaultParams, ...urlSpecificParams[urlKey] }; - } else { + try { + const urlKey = new URL(url).hostname; + if (urlSpecificParams.hasOwnProperty(urlKey)) { + return { ...defaultParams, ...urlSpecificParams[urlKey] }; + } else { + return defaultParams; + } + } catch (error) { + console.error(`Error generating URL key: ${error}`); return defaultParams; } } From a72d2cc68ec07d553552a16e7847ba1c3433c9b5 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 28 Apr 2024 13:06:46 -0700 Subject: [PATCH 95/96] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7749a640..5d0a4850 100644 --- a/README.md +++ b/README.md @@ -194,4 +194,4 @@ search_result = app.search(query) We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. -*It is the sole responsibility of the end users to scrape, search and crawl websites. Users are advised to adhere to the applicable privacy policies and terms of use of the websites prior to initiating any scraping activities. By default, Firecrawl respects the directives specified in the websites' robots.txt files when crawling. By utilizing Firecrawl, you expressly agree to comply with these conditions.* +*It is the sole responsibility of the end users to respect websites' policies when scraping, searching and crawling with Firecrawl. Users are advised to adhere to the applicable privacy policies and terms of use of the websites prior to initiating any scraping activities. By default, Firecrawl respects the directives specified in the websites' robots.txt files when crawling. By utilizing Firecrawl, you expressly agree to comply with these conditions.* From d3c36adaa7be8f736e27edca3d607311a8bab1ea Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 29 Apr 2024 17:58:47 -0300 Subject: [PATCH 96/96] Update index.ts --- apps/api/src/scraper/WebScraper/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 1904ef99..386dfb20 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -64,6 +64,7 @@ export class WebScraperDataProvider { useCaching: boolean = false, inProgress?: (progress: Progress) => void ): Promise { + if (this.urls[0].trim() === "") { throw new Error("Url is required"); }