mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-15 19:22:19 +08:00
moving js-sdk to monorepo
This commit is contained in:
parent
0113d66739
commit
3e4064bce2
130
apps/js-sdk/firecrawl/.gitignore
vendored
Normal file
130
apps/js-sdk/firecrawl/.gitignore
vendored
Normal file
|
@ -0,0 +1,130 @@
|
|||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
.cache
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
148
apps/js-sdk/firecrawl/README.md
Normal file
148
apps/js-sdk/firecrawl/README.md
Normal file
|
@ -0,0 +1,148 @@
|
|||
# Firecrawl JavaScript SDK
|
||||
|
||||
The Firecrawl JavaScript SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
|
||||
|
||||
## Installation
|
||||
|
||||
To install the Firecrawl JavaScript SDK, you can use npm:
|
||||
|
||||
```bash
|
||||
npm install @mendable/firecrawl-js
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
|
||||
2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
|
||||
|
||||
|
||||
Here's an example of how to use the SDK with error handling:
|
||||
|
||||
```js
|
||||
import FirecrawlApp from '@mendable/firecrawl-js';
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
// Initialize the FirecrawlApp with your API key
|
||||
const app = new FirecrawlApp({ apiKey: "YOUR_API_KEY" });
|
||||
|
||||
// Scrape a single URL
|
||||
const url = 'https://mendable.ai';
|
||||
const scrapedData = await app.scrapeUrl(url);
|
||||
console.log(scrapedData);
|
||||
|
||||
// Crawl a website
|
||||
const crawlUrl = 'https://mendable.ai';
|
||||
const crawlParams = {
|
||||
crawlerOptions: {
|
||||
excludes: ['blog/'],
|
||||
includes: [], // leave empty for all pages
|
||||
limit: 1000,
|
||||
}
|
||||
};
|
||||
|
||||
const crawlResult = await app.crawlUrl(crawlUrl, crawlParams);
|
||||
console.log(crawlResult);
|
||||
|
||||
} catch (error) {
|
||||
console.error('An error occurred:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
```
|
||||
|
||||
### Scraping a URL
|
||||
|
||||
To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
|
||||
|
||||
```js
|
||||
async function scrapeExample() {
|
||||
try {
|
||||
const url = 'https://example.com';
|
||||
const scrapedData = await app.scrapeUrl(url);
|
||||
console.log(scrapedData);
|
||||
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Error occurred while scraping:',
|
||||
error.message
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
scrapeExample();
|
||||
```
|
||||
|
||||
|
||||
### Crawling a Website
|
||||
|
||||
To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
|
||||
|
||||
```js
|
||||
async function crawlExample() {
|
||||
try {
|
||||
const crawlUrl = 'https://example.com';
|
||||
const crawlParams = {
|
||||
crawlerOptions: {
|
||||
excludes: ['blog/'],
|
||||
includes: [], // leave empty for all pages
|
||||
limit: 1000,
|
||||
}
|
||||
};
|
||||
const waitUntilDone = true;
|
||||
const timeout = 5;
|
||||
const crawlResult = await app.crawlUrl(
|
||||
crawlUrl,
|
||||
crawlParams,
|
||||
waitUntilDone,
|
||||
timeout
|
||||
);
|
||||
|
||||
console.log(crawlResult);
|
||||
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Error occurred while crawling:',
|
||||
error.message
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
crawlExample();
|
||||
```
|
||||
|
||||
|
||||
### Checking Crawl Status
|
||||
|
||||
To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job.
|
||||
|
||||
```js
|
||||
async function checkStatusExample(jobId) {
|
||||
try {
|
||||
const status = await app.checkCrawlStatus(jobId);
|
||||
console.log(status);
|
||||
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Error occurred while checking crawl status:',
|
||||
error.message
|
||||
);
|
||||
}
|
||||
}
|
||||
// Example usage, assuming you have a jobId
|
||||
checkStatusExample('your_job_id_here');
|
||||
```
|
||||
|
||||
|
||||
## Error Handling
|
||||
|
||||
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. The examples above demonstrate how to handle these errors using `try/catch` blocks.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions to the Firecrawl JavaScript SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
||||
|
||||
## License
|
||||
|
||||
The Firecrawl JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
|
146
apps/js-sdk/firecrawl/build/index.js
Normal file
146
apps/js-sdk/firecrawl/build/index.js
Normal file
|
@ -0,0 +1,146 @@
|
|||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
||||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
||||
return new (P || (P = Promise))(function (resolve, reject) {
|
||||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
||||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
||||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
||||
});
|
||||
};
|
||||
import axios from 'axios';
|
||||
import dotenv from 'dotenv';
|
||||
dotenv.config();
|
||||
export default class FirecrawlApp {
|
||||
constructor({ apiKey = null }) {
|
||||
this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
|
||||
if (!this.apiKey) {
|
||||
throw new Error('No API key provided');
|
||||
}
|
||||
}
|
||||
scrapeUrl(url_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
}
|
||||
try {
|
||||
const response = yield axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return responseData.data;
|
||||
}
|
||||
else {
|
||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'scrape URL');
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
crawlUrl(url_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData = { url };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
}
|
||||
try {
|
||||
const response = yield this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
const jobId = response.data.jobId;
|
||||
if (waitUntilDone) {
|
||||
return this.monitorJobStatus(jobId, headers, timeout);
|
||||
}
|
||||
else {
|
||||
return { jobId };
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'start crawl job');
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
console.log(error);
|
||||
throw new Error(error.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
checkCrawlStatus(jobId) {
|
||||
return __awaiter(this, void 0, void 0, function* () {
|
||||
const headers = this.prepareHeaders();
|
||||
try {
|
||||
const response = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'check crawl status');
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
prepareHeaders() {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
};
|
||||
}
|
||||
postRequest(url, data, headers) {
|
||||
return axios.post(url, data, { headers });
|
||||
}
|
||||
getRequest(url, headers) {
|
||||
return axios.get(url, { headers });
|
||||
}
|
||||
monitorJobStatus(jobId, headers, timeout) {
|
||||
return __awaiter(this, void 0, void 0, function* () {
|
||||
while (true) {
|
||||
const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === 'completed') {
|
||||
if ('data' in statusData) {
|
||||
return statusData.data;
|
||||
}
|
||||
else {
|
||||
throw new Error('Crawl job completed but no data was returned');
|
||||
}
|
||||
}
|
||||
else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
|
||||
if (timeout < 2) {
|
||||
timeout = 2;
|
||||
}
|
||||
yield new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
}
|
||||
else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(statusResponse, 'check crawl status');
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
handleError(response, action) {
|
||||
if ([402, 409, 500].includes(response.status)) {
|
||||
const errorMessage = response.data.error || 'Unknown error occurred';
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
}
|
||||
else {
|
||||
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
||||
}
|
||||
}
|
||||
}
|
172
apps/js-sdk/firecrawl/package-lock.json
generated
Normal file
172
apps/js-sdk/firecrawl/package-lock.json
generated
Normal file
|
@ -0,0 +1,172 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.7",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.7",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/dotenv": "^8.2.0",
|
||||
"@types/node": "^20.12.7",
|
||||
"typescript": "^5.4.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/axios": {
|
||||
"version": "0.14.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/axios/-/axios-0.14.0.tgz",
|
||||
"integrity": "sha512-KqQnQbdYE54D7oa/UmYVMZKq7CO4l8DEENzOKc4aBRwxCXSlJXGz83flFx5L7AWrOQnmuN3kVsRdt+GZPPjiVQ==",
|
||||
"deprecated": "This is a stub types definition for axios (https://github.com/mzabriskie/axios). axios provides its own type definitions, so you don't need @types/axios installed!",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"axios": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/dotenv": {
|
||||
"version": "8.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz",
|
||||
"integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==",
|
||||
"deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"dotenv": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.12.7",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
|
||||
"integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.6.8",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz",
|
||||
"integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/combined-stream": {
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"dependencies": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/delayed-stream": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.4.5",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
}
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.6",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
||||
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "individual",
|
||||
"url": "https://github.com/sponsors/RubenVerborgh"
|
||||
}
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"debug": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/mime-db": {
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/mime-types": {
|
||||
"version": "2.1.35",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||
"dependencies": {
|
||||
"mime-db": "1.52.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.4.5",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz",
|
||||
"integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==",
|
||||
"dev": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
}
|
39
apps/js-sdk/firecrawl/package.json
Normal file
39
apps/js-sdk/firecrawl/package.json
Normal file
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.9",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/mendableai/firecrawl.git"
|
||||
},
|
||||
"author": "Mendable.ai",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/mendableai/firecrawl/issues"
|
||||
},
|
||||
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
||||
"devDependencies": {
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/dotenv": "^8.2.0",
|
||||
"@types/node": "^20.12.7",
|
||||
"typescript": "^5.4.5"
|
||||
},
|
||||
"keywords": [
|
||||
"firecrawl",
|
||||
"mendable",
|
||||
"crawler",
|
||||
"web",
|
||||
"scraper",
|
||||
"api",
|
||||
"sdk"
|
||||
]
|
||||
}
|
135
apps/js-sdk/firecrawl/src/index.ts
Normal file
135
apps/js-sdk/firecrawl/src/index.ts
Normal file
|
@ -0,0 +1,135 @@
|
|||
import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
||||
import dotenv from 'dotenv';
|
||||
dotenv.config();
|
||||
|
||||
interface FirecrawlAppConfig {
|
||||
apiKey?: string | null;
|
||||
}
|
||||
|
||||
interface Params {
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
export default class FirecrawlApp {
|
||||
private apiKey: string;
|
||||
|
||||
constructor({ apiKey = null }: FirecrawlAppConfig) {
|
||||
this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
|
||||
if (!this.apiKey) {
|
||||
throw new Error('No API key provided');
|
||||
}
|
||||
}
|
||||
|
||||
async scrapeUrl(url: string, params: Params | null = null): Promise<any> {
|
||||
const headers: AxiosRequestHeaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return responseData.data;
|
||||
} else {
|
||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'scrape URL');
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise<any> {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
const jobId: string = response.data.jobId;
|
||||
if (waitUntilDone) {
|
||||
return this.monitorJobStatus(jobId, headers, timeout);
|
||||
} else {
|
||||
return { jobId };
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'start crawl job');
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.log(error)
|
||||
throw new Error(error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async checkCrawlStatus(jobId: string): Promise<any> {
|
||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
this.handleError(response, 'check crawl status');
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
}
|
||||
|
||||
prepareHeaders(): AxiosRequestHeaders {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
}
|
||||
|
||||
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||
return axios.post(url, data, { headers });
|
||||
}
|
||||
|
||||
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||
return axios.get(url, { headers });
|
||||
}
|
||||
|
||||
async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any> {
|
||||
while (true) {
|
||||
const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === 'completed') {
|
||||
if ('data' in statusData) {
|
||||
return statusData.data;
|
||||
} else {
|
||||
throw new Error('Crawl job completed but no data was returned');
|
||||
}
|
||||
} else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
|
||||
if (timeout < 2) {
|
||||
timeout = 2;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
} else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
}
|
||||
} else {
|
||||
this.handleError(statusResponse, 'check crawl status');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handleError(response: AxiosResponse, action: string): void {
|
||||
if ([402, 409, 500].includes(response.status)) {
|
||||
const errorMessage: string = response.data.error || 'Unknown error occurred';
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
} else {
|
||||
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
||||
}
|
||||
}
|
||||
}
|
109
apps/js-sdk/firecrawl/tsconfig.json
Normal file
109
apps/js-sdk/firecrawl/tsconfig.json
Normal file
|
@ -0,0 +1,109 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
/* Visit https://aka.ms/tsconfig to read more about this file */
|
||||
|
||||
/* Projects */
|
||||
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
|
||||
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
|
||||
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
|
||||
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
|
||||
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
|
||||
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
||||
|
||||
/* Language and Environment */
|
||||
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
||||
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
||||
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
||||
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
||||
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
|
||||
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
|
||||
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
|
||||
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
|
||||
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
|
||||
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
|
||||
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
|
||||
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
||||
|
||||
/* Modules */
|
||||
"module": "NodeNext", /* Specify what module code is generated. */
|
||||
"rootDir": "./src", /* Specify the root folder within your source files. */
|
||||
"moduleResolution": "nodenext", /* Specify how TypeScript looks up a file from a given module specifier. */
|
||||
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
||||
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
||||
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|
||||
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
|
||||
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
|
||||
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
|
||||
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
|
||||
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
|
||||
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
|
||||
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
|
||||
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
|
||||
// "resolveJsonModule": true, /* Enable importing .json files. */
|
||||
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
|
||||
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
|
||||
|
||||
/* JavaScript Support */
|
||||
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
|
||||
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
|
||||
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
|
||||
|
||||
/* Emit */
|
||||
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
|
||||
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
|
||||
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
|
||||
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
|
||||
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
|
||||
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
|
||||
"outDir": "./build", /* Specify an output folder for all emitted files. */
|
||||
// "removeComments": true, /* Disable emitting comments. */
|
||||
// "noEmit": true, /* Disable emitting files from a compilation. */
|
||||
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
|
||||
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
|
||||
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
|
||||
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
|
||||
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
|
||||
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
|
||||
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
|
||||
// "newLine": "crlf", /* Set the newline character for emitting files. */
|
||||
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
|
||||
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
|
||||
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
|
||||
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
|
||||
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
|
||||
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
|
||||
|
||||
/* Interop Constraints */
|
||||
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
|
||||
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
|
||||
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
|
||||
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
|
||||
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
|
||||
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
|
||||
|
||||
/* Type Checking */
|
||||
"strict": true, /* Enable all strict type-checking options. */
|
||||
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
|
||||
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
|
||||
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
|
||||
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
|
||||
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
|
||||
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
|
||||
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
|
||||
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
|
||||
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
|
||||
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
|
||||
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
|
||||
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
|
||||
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
|
||||
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
|
||||
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
|
||||
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
|
||||
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
|
||||
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
|
||||
|
||||
/* Completeness */
|
||||
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
|
||||
"skipLibCheck": true /* Skip type checking all .d.ts files. */
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user