Merge branch 'mendableai:main' into feat/add-go-sdk

2024-11-16 03:32:22 +08:00 · 2024-08-10 00:46:15 +08:00 · 2024-08-10 00:46:15 +08:00 · 1fda882983
commit 1fda882983
parent b802ea02a1 bf1d325aa9
23 changed files with 389 additions and 42 deletions
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@ -84,6 +84,11 @@
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
+                      "fullPageScreenshot": {
+                        "type": "boolean",
+                        "description": "Include a full page screenshot of the page that you are scraping.",
+                        "default": false
+                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
@ -317,6 +322,11 @@
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
+                      "fullPageScreenshot": {
+                        "type": "boolean",
+                        "description": "Include a full page screenshot of the page that you are scraping.",
+                        "default": false
+                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
--- a/apps/api/src/lib/default-values.ts
+++ b/apps/api/src/lib/default-values.ts
@ -7,6 +7,7 @@ export const defaultPageOptions = {
  includeHtml: false,
  waitFor: 0,
  screenshot: false,
+  fullPageScreenshot: false,
  parsePDF: true
 };

--- a/apps/api/src/lib/entities.ts
+++ b/apps/api/src/lib/entities.ts
@ -18,6 +18,7 @@ export type PageOptions = {
  fetchPageContent?: boolean;
  waitFor?: number;
  screenshot?: boolean;
+  fullPageScreenshot?: boolean;
  headers?: Record<string, string>;
  replaceAllPathsWithAbsolutePaths?: boolean;
  parsePDF?: boolean;
@ -42,8 +43,8 @@ export type SearchOptions = {

 export type CrawlerOptions = {
  returnOnlyUrls?: boolean;
-  includes?: string[];
-  excludes?: string[];
+  includes?: string | string[];
+  excludes?: string | string[];
  maxCrawledLinks?: number;
  maxDepth?: number;
  limit?: number;
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@ -131,13 +131,13 @@ const saveJob = async (job: Job, result: any) => {

      if (error) throw new Error(error.message);
      try {
-        await job.moveToCompleted(null);
+        await job.moveToCompleted(null, false, false);
      } catch (error) {
        // I think the job won't exist here anymore
      }
    } else {
      try {
-        await job.moveToCompleted(result);
+        await job.moveToCompleted(result, false, false);
      } catch (error) {
        // I think the job won't exist here anymore
      }
--- a/apps/api/src/scraper/WebScraper/index.ts
+++ b/apps/api/src/scraper/WebScraper/index.ts
@ -27,8 +27,8 @@ export class WebScraperDataProvider {
  private bullJobId: string;
  private urls: string[] = [""];
  private mode: "single_urls" | "sitemap" | "crawl" = "single_urls";
-  private includes: string[];
-  private excludes: string[];
+  private includes: string | string[];
+  private excludes: string | string[];
  private maxCrawledLinks: number;
  private maxCrawledDepth: number = 10;
  private returnOnlyUrls: boolean;
@ -171,8 +171,8 @@ export class WebScraperDataProvider {
    const crawler = new WebCrawler({
      jobId: this.jobId,
      initialUrl: this.urls[0],
-      includes: this.includes,
-      excludes: this.excludes,
+      includes: Array.isArray(this.includes) ? this.includes : this.includes.split(','),
+      excludes: Array.isArray(this.excludes) ? this.excludes : this.excludes.split(','),
      maxCrawledLinks: this.maxCrawledLinks,
      maxCrawledDepth: getAdjustedMaxDepth(this.urls[0], this.maxCrawledDepth),
      limit: this.limit,
@ -445,6 +445,10 @@ export class WebScraperDataProvider {
      const url = new URL(document.metadata.sourceURL);
      const path = url.pathname;

+      if (!Array.isArray(this.excludes)) {
+        this.excludes = this.excludes.split(',');
+      }
+
      if (this.excludes.length > 0 && this.excludes[0] !== "") {
        // Check if the link should be excluded
        if (
@ -456,6 +460,10 @@ export class WebScraperDataProvider {
        }
      }

+      if (!Array.isArray(this.includes)) {
+        this.includes = this.includes.split(',');
+      }
+
      if (this.includes.length > 0 && this.includes[0] !== "") {
        // Check if the link matches the include patterns, if any are specified
        if (this.includes.length > 0) {
@ -567,8 +575,15 @@ export class WebScraperDataProvider {
      options.crawlerOptions?.replaceAllPathsWithAbsolutePaths ??
      options.pageOptions?.replaceAllPathsWithAbsolutePaths ??
      false;
-    //! @nicolas, for some reason this was being injected and breaking everything. Don't have time to find source of the issue so adding this check
-    this.excludes = this.excludes.filter((item) => item !== "");
+
+    if (typeof options.crawlerOptions?.excludes === 'string') {
+      this.excludes = options.crawlerOptions?.excludes.split(',').filter((item) => item.trim() !== "");
+    }
+
+    if (typeof options.crawlerOptions?.includes === 'string') {
+      this.includes = options.crawlerOptions?.includes.split(',').filter((item) => item.trim() !== "");
+    }
+
    this.crawlerMode = options.crawlerOptions?.mode ?? "default";
    this.ignoreSitemap = options.crawlerOptions?.ignoreSitemap ?? false;
    this.allowBackwardCrawling =
--- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
@ -11,6 +11,7 @@ import { Logger } from "../../../lib/logger";
 * @param url The URL to scrape
 * @param waitFor The time to wait for the page to load
 * @param screenshot Whether to take a screenshot
+ * @param fullPageScreenshot Whether to take a full page screenshot
 * @param pageOptions The options for the page
 * @param headers The headers to send with the request
 * @param options The options for the request
@ -20,6 +21,7 @@ export async function scrapWithFireEngine({
  url,
  waitFor = 0,
  screenshot = false,
+  fullPageScreenshot = false,
  pageOptions = { parsePDF: true },
  fireEngineOptions = {},
  headers,
@ -28,6 +30,7 @@ export async function scrapWithFireEngine({
  url: string;
  waitFor?: number;
  screenshot?: boolean;
+  fullPageScreenshot?: boolean;
  pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
  fireEngineOptions?: FireEngineOptions;
  headers?: Record<string, string>;
@ -49,6 +52,7 @@ export async function scrapWithFireEngine({
    const waitParam = reqParams["params"]?.wait ?? waitFor;
    const engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine  ?? "playwright";
    const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
+    const fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
    const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;


@ -61,7 +65,7 @@ export async function scrapWithFireEngine({
    let engine = engineParam; // do we want fireEngineOptions as first choice?

    Logger.info(
-      `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
+      `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
    );


@ -71,6 +75,7 @@ export async function scrapWithFireEngine({
        url: url,
        wait: waitParam,
        screenshot: screenshotParam,
+        fullPageScreenshot: fullPageScreenshotParam,
        headers: headers,
        pageOptions: pageOptions,
        ...fireEngineOptionsParam,
--- a/apps/api/src/scraper/WebScraper/single_url.ts
+++ b/apps/api/src/scraper/WebScraper/single_url.ts
@ -128,6 +128,7 @@ export async function scrapSingleUrl(
    includeRawHtml: false,
    waitFor: 0,
    screenshot: false,
+    fullPageScreenshot: false,
    headers: undefined,
  },
  extractorOptions: ExtractorOptions = {
@ -171,6 +172,7 @@ export async function scrapSingleUrl(
            url,
            waitFor: pageOptions.waitFor,
            screenshot: pageOptions.screenshot,
+            fullPageScreenshot: pageOptions.fullPageScreenshot,
            pageOptions: pageOptions,
            headers: pageOptions.headers,
            fireEngineOptions: {
@ -306,7 +308,7 @@ export async function scrapSingleUrl(
    const scrapersInOrder = getScrapingFallbackOrder(
      defaultScraper,
      pageOptions && pageOptions.waitFor && pageOptions.waitFor > 0,
-      pageOptions && pageOptions.screenshot && pageOptions.screenshot === true,
+      pageOptions && (pageOptions.screenshot || pageOptions.fullPageScreenshot) && (pageOptions.screenshot === true || pageOptions.fullPageScreenshot === true),
      pageOptions && pageOptions.headers && pageOptions.headers !== undefined
    );

--- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts
+++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts
@ -240,4 +240,12 @@ export const urlSpecificParams = {
      },
    },
  },
+  "digikey.com":{
+    defaultScraper: "fire-engine",
+    params:{
+      fireEngineOptions:{
+        engine: "tlsclient",
+      },
+    },
+  }
 };
--- a/apps/api/src/services/logging/scrape_log.ts
+++ b/apps/api/src/services/logging/scrape_log.ts
@ -44,9 +44,9 @@ export async function logScrape(
    ]);

    if (error) {
-      Logger.error(`Error logging proxy:\n${error}`);
+      Logger.error(`Error logging proxy:\n${JSON.stringify(error)}`);
    }
  } catch (error) {
-    Logger.error(`Error logging proxy:\n${error}`);
+    Logger.error(`Error logging proxy:\n${JSON.stringify(error)}`);
  }
 }
--- a/apps/api/src/services/queue-service.ts
+++ b/apps/api/src/services/queue-service.ts
@ -14,7 +14,7 @@ export function getWebScraperQueue() {
        maxStalledCount: 10,
      },
      defaultJobOptions:{
-        attempts: 5
+        attempts: 2
      }
    });
    Logger.info("Web scraper queue created");
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -20,7 +20,7 @@ if (process.env.ENV === 'production') {
 const wsq = getWebScraperQueue();

 async function processJob(job: Job, done) {
-  Logger.debug(`🐂 Worker taking job ${job.id}`);
+  Logger.info(`🐂 Worker taking job ${job.id}`);

  try {
    job.progress({
@ -61,7 +61,7 @@ async function processJob(job: Job, done) {
      pageOptions: job.data.pageOptions,
      origin: job.data.origin,
    });
-    Logger.debug(`🐂 Job done ${job.id}`);
+    Logger.info(`🐂 Job done ${job.id}`);
    done(null, data);
  } catch (error) {
    Logger.error(`🐂 Job errored ${job.id} - ${error}`);
--- a/apps/api/src/services/supabase.ts
+++ b/apps/api/src/services/supabase.ts
@ -36,17 +36,9 @@ export const supabase_service: SupabaseClient = new Proxy(
  new SupabaseService(),
  {
    get: function (target, prop, receiver) {
-      if (process.env.USE_DB_AUTHENTICATION === "false") {
-        Logger.debug(
-          "Attempted to access Supabase client when it's not configured."
-        );
-      }
      const client = target.getClient();
      // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
      if (client === null) {
-        Logger.error(
-          "Attempted to access Supabase client when it's not configured."
-        );
        return () => {
          throw new Error("Supabase client is not configured.");
        };
--- a/apps/js-sdk/firecrawl/.gitignore
+++ b/apps/js-sdk/firecrawl/.gitignore
@ -128,3 +128,5 @@ dist
 .yarn/build-state.yml
 .yarn/install-state.gz
 .pnp.*
+
+build
--- a/apps/js-sdk/firecrawl/build/cjs/index.js
+++ b/apps/js-sdk/firecrawl/build/cjs/index.js
@ -0,0 +1,271 @@
+"use strict";
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const axios_1 = __importDefault(require("axios"));
+const zod_1 = require("zod");
+const zod_to_json_schema_1 = require("zod-to-json-schema");
+/**
+ * Main class for interacting with the Firecrawl API.
+ */
+class FirecrawlApp {
+    /**
+     * Initializes a new instance of the FirecrawlApp class.
+     * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
+     */
+    constructor({ apiKey = null, apiUrl = null }) {
+        this.apiKey = apiKey || "";
+        this.apiUrl = apiUrl || "https://api.firecrawl.dev";
+        if (!this.apiKey) {
+            throw new Error("No API key provided");
+        }
+    }
+    /**
+     * Scrapes a URL using the Firecrawl API.
+     * @param {string} url - The URL to scrape.
+     * @param {Params | null} params - Additional parameters for the scrape request.
+     * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
+     */
+    scrapeUrl(url, params = null) {
+        var _a;
+        return __awaiter(this, void 0, void 0, function* () {
+            const headers = {
+                "Content-Type": "application/json",
+                Authorization: `Bearer ${this.apiKey}`,
+            };
+            let jsonData = Object.assign({ url }, params);
+            if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
+                let schema = params.extractorOptions.extractionSchema;
+                // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
+                if (schema instanceof zod_1.z.ZodSchema) {
+                    schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
+                }
+                jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
+            }
+            try {
+                const response = yield axios_1.default.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
+                if (response.status === 200) {
+                    const responseData = response.data;
+                    if (responseData.success) {
+                        return responseData;
+                    }
+                    else {
+                        throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
+                    }
+                }
+                else {
+                    this.handleError(response, "scrape URL");
+                }
+            }
+            catch (error) {
+                throw new Error(error.message);
+            }
+            return { success: false, error: "Internal server error." };
+        });
+    }
+    /**
+     * Searches for a query using the Firecrawl API.
+     * @param {string} query - The query to search for.
+     * @param {Params | null} params - Additional parameters for the search request.
+     * @returns {Promise<SearchResponse>} The response from the search operation.
+     */
+    search(query, params = null) {
+        return __awaiter(this, void 0, void 0, function* () {
+            const headers = {
+                "Content-Type": "application/json",
+                Authorization: `Bearer ${this.apiKey}`,
+            };
+            let jsonData = { query };
+            if (params) {
+                jsonData = Object.assign(Object.assign({}, jsonData), params);
+            }
+            try {
+                const response = yield axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
+                if (response.status === 200) {
+                    const responseData = response.data;
+                    if (responseData.success) {
+                        return responseData;
+                    }
+                    else {
+                        throw new Error(`Failed to search. Error: ${responseData.error}`);
+                    }
+                }
+                else {
+                    this.handleError(response, "search");
+                }
+            }
+            catch (error) {
+                throw new Error(error.message);
+            }
+            return { success: false, error: "Internal server error." };
+        });
+    }
+    /**
+     * Initiates a crawl job for a URL using the Firecrawl API.
+     * @param {string} url - The URL to crawl.
+     * @param {Params | null} params - Additional parameters for the crawl request.
+     * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
+     * @param {number} pollInterval - Time in seconds for job status checks.
+     * @param {string} idempotencyKey - Optional idempotency key for the request.
+     * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
+     */
+    crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
+        return __awaiter(this, void 0, void 0, function* () {
+            const headers = this.prepareHeaders(idempotencyKey);
+            let jsonData = { url };
+            if (params) {
+                jsonData = Object.assign(Object.assign({}, jsonData), params);
+            }
+            try {
+                const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
+                if (response.status === 200) {
+                    const jobId = response.data.jobId;
+                    if (waitUntilDone) {
+                        return this.monitorJobStatus(jobId, headers, pollInterval);
+                    }
+                    else {
+                        return { success: true, jobId };
+                    }
+                }
+                else {
+                    this.handleError(response, "start crawl job");
+                }
+            }
+            catch (error) {
+                console.log(error);
+                throw new Error(error.message);
+            }
+            return { success: false, error: "Internal server error." };
+        });
+    }
+    /**
+     * Checks the status of a crawl job using the Firecrawl API.
+     * @param {string} jobId - The job ID of the crawl operation.
+     * @returns {Promise<JobStatusResponse>} The response containing the job status.
+     */
+    checkCrawlStatus(jobId) {
+        return __awaiter(this, void 0, void 0, function* () {
+            const headers = this.prepareHeaders();
+            try {
+                const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
+                if (response.status === 200) {
+                    return {
+                        success: true,
+                        status: response.data.status,
+                        current: response.data.current,
+                        current_url: response.data.current_url,
+                        current_step: response.data.current_step,
+                        total: response.data.total,
+                        data: response.data.data,
+                        partial_data: !response.data.data
+                            ? response.data.partial_data
+                            : undefined,
+                    };
+                }
+                else {
+                    this.handleError(response, "check crawl status");
+                }
+            }
+            catch (error) {
+                throw new Error(error.message);
+            }
+            return {
+                success: false,
+                status: "unknown",
+                current: 0,
+                current_url: "",
+                current_step: "",
+                total: 0,
+                error: "Internal server error.",
+            };
+        });
+    }
+    /**
+     * Prepares the headers for an API request.
+     * @returns {AxiosRequestHeaders} The prepared headers.
+     */
+    prepareHeaders(idempotencyKey) {
+        return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
+    }
+    /**
+     * Sends a POST request to the specified URL.
+     * @param {string} url - The URL to send the request to.
+     * @param {Params} data - The data to send in the request.
+     * @param {AxiosRequestHeaders} headers - The headers for the request.
+     * @returns {Promise<AxiosResponse>} The response from the POST request.
+     */
+    postRequest(url, data, headers) {
+        return axios_1.default.post(url, data, { headers });
+    }
+    /**
+     * Sends a GET request to the specified URL.
+     * @param {string} url - The URL to send the request to.
+     * @param {AxiosRequestHeaders} headers - The headers for the request.
+     * @returns {Promise<AxiosResponse>} The response from the GET request.
+     */
+    getRequest(url, headers) {
+        return axios_1.default.get(url, { headers });
+    }
+    /**
+     * Monitors the status of a crawl job until completion or failure.
+     * @param {string} jobId - The job ID of the crawl operation.
+     * @param {AxiosRequestHeaders} headers - The headers for the request.
+     * @param {number} timeout - Timeout in seconds for job status checks.
+     * @returns {Promise<any>} The final job status or data.
+     */
+    monitorJobStatus(jobId, headers, checkInterval) {
+        return __awaiter(this, void 0, void 0, function* () {
+            while (true) {
+                const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
+                if (statusResponse.status === 200) {
+                    const statusData = statusResponse.data;
+                    if (statusData.status === "completed") {
+                        if ("data" in statusData) {
+                            return statusData.data;
+                        }
+                        else {
+                            throw new Error("Crawl job completed but no data was returned");
+                        }
+                    }
+                    else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
+                        if (checkInterval < 2) {
+                            checkInterval = 2;
+                        }
+                        yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
+                    }
+                    else {
+                        throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
+                    }
+                }
+                else {
+                    this.handleError(statusResponse, "check crawl status");
+                }
+            }
+        });
+    }
+    /**
+     * Handles errors from API responses.
+     * @param {AxiosResponse} response - The response from the API.
+     * @param {string} action - The action being performed when the error occurred.
+     */
+    handleError(response, action) {
+        if ([402, 408, 409, 500].includes(response.status)) {
+            const errorMessage = response.data.error || "Unknown error occurred";
+            throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
+        }
+        else {
+            throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
+        }
+    }
+}
+exports.default = FirecrawlApp;
--- a/apps/js-sdk/firecrawl/build/cjs/package.json
+++ b/apps/js-sdk/firecrawl/build/cjs/package.json
@ -0,0 +1 @@
+{"type": "commonjs"}
--- a/apps/js-sdk/firecrawl/build/esm/index.js
+++ b/apps/js-sdk/firecrawl/build/esm/index.js
--- a/apps/js-sdk/firecrawl/build/esm/package.json
+++ b/apps/js-sdk/firecrawl/build/esm/package.json
@ -0,0 +1 @@
+{"type": "module"}
--- a/apps/js-sdk/firecrawl/package-lock.json
+++ b/apps/js-sdk/firecrawl/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "@mendable/firecrawl-js",
-  "version": "0.0.29",
+  "version": "0.0.34",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@mendable/firecrawl-js",
-      "version": "0.0.29",
+      "version": "0.0.34",
      "license": "MIT",
      "dependencies": {
        "axios": "^1.6.8",
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@ -1,12 +1,16 @@
 {
  "name": "@mendable/firecrawl-js",
-  "version": "0.0.29",
+  "version": "0.0.35",
  "description": "JavaScript SDK for Firecrawl API",
-  "main": "build/index.js",
+  "main": "build/cjs/index.js",
  "types": "types/index.d.ts",
  "type": "module",
+  "exports": {
+    "require": "./build/cjs/index.js",
+    "import": "./build/esm/index.js"
+  },
  "scripts": {
-    "build": "tsc",
+    "build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",
    "build-and-publish": "npm run build && npm publish --access public",
    "publish-beta": "npm run build && npm publish --access public --tag beta",
    "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/**/*.test.ts"
--- a/apps/js-sdk/firecrawl/types/index.d.ts
+++ b/apps/js-sdk/firecrawl/types/index.d.ts
@ -73,16 +73,16 @@ export interface ScrapeResponse {
    error?: string;
 }
 /**
-* Response interface for searching operations.
-*/
+ * Response interface for searching operations.
+ */
 export interface SearchResponse {
    success: boolean;
    data?: FirecrawlDocument[];
    error?: string;
 }
 /**
-* Response interface for crawling operations.
-*/
+ * Response interface for crawling operations.
+ */
 export interface CrawlResponse {
    success: boolean;
    jobId?: string;
@ -90,24 +90,28 @@ export interface CrawlResponse {
    error?: string;
 }
 /**
-* Response interface for job status checks.
-*/
+ * Response interface for job status checks.
+ */
 export interface JobStatusResponse {
    success: boolean;
    status: string;
+    current?: number;
+    current_url?: string;
+    current_step?: string;
+    total?: number;
    jobId?: string;
    data?: FirecrawlDocument[];
    partial_data?: FirecrawlDocument[];
    error?: string;
 }
 /**
-  * Generic parameter interface.
-  */
+ * Generic parameter interface.
+ */
 export interface Params {
    [key: string]: any;
    extractorOptions?: {
        extractionSchema: z.ZodSchema | any;
-        mode?: "llm-extraction" | "llm-extraction-from-raw-html";
+        mode?: "llm-extraction";
        extractionPrompt?: string;
    };
 }
--- a/apps/js-sdk/package-lock.json
+++ b/apps/js-sdk/package-lock.json
@ -13,6 +13,7 @@
        "axios": "^1.6.8",
        "ts-node": "^10.9.2",
        "typescript": "^5.4.5",
+        "uuid": "^10.0.0",
        "zod": "^3.23.8"
      },
      "devDependencies": {
@ -450,6 +451,15 @@
      "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz",
      "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA=="
    },
+    "node_modules/@types/node": {
+      "version": "20.14.11",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.11.tgz",
+      "integrity": "sha512-kprQpL8MMeszbz6ojB5/tU8PLN4kesnN8Gjzw349rDlNgsSzg90lAVj3llK99Dh7JON+t9AuscPPFW6mPbTnSA==",
+      "peer": true,
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
+    },
    "node_modules/acorn": {
      "version": "8.11.3",
      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz",
@ -728,6 +738,24 @@
        "node": ">=14.17"
      }
    },
+    "node_modules/undici-types": {
+      "version": "5.26.5",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+      "peer": true
+    },
+    "node_modules/uuid": {
+      "version": "10.0.0",
+      "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz",
+      "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==",
+      "funding": [
+        "https://github.com/sponsors/broofa",
+        "https://github.com/sponsors/ctavan"
+      ],
+      "bin": {
+        "uuid": "dist/bin/uuid"
+      }
+    },
    "node_modules/v8-compile-cache-lib": {
      "version": "3.0.1",
      "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
@ -750,9 +778,9 @@
      }
    },
    "node_modules/zod-to-json-schema": {
-      "version": "3.23.0",
-      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz",
-      "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==",
+      "version": "3.23.1",
+      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.1.tgz",
+      "integrity": "sha512-oT9INvydob1XV0v1d2IadrR74rLtDInLvDFfAa1CG0Pmg/vxATk7I2gSelfj271mbzeM4Da0uuDQE/Nkj3DWNw==",
      "peerDependencies": {
        "zod": "^3.23.3"
      }
--- a/apps/js-sdk/package.json
+++ b/apps/js-sdk/package.json
@ -15,6 +15,7 @@
    "axios": "^1.6.8",
    "ts-node": "^10.9.2",
    "typescript": "^5.4.5",
+    "uuid": "^10.0.0",
    "zod": "^3.23.8"
  },
  "devDependencies": {
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -29,6 +29,7 @@ x-common-service: &common-service
    - SCRAPING_BEE_API_KEY=${SCRAPING_BEE_API_KEY}
    - HOST=${HOST:-0.0.0.0}
    - SELF_HOSTED_WEBHOOK_URL=${SELF_HOSTED_WEBHOOK_URL}
+    - LOGGING_LEVEL=${LOGGING_LEVEL}
  extra_hosts:
    - "host.docker.internal:host-gateway"