Merge remote-tracking branch 'origin/main' into pr/765

2024-11-16 03:32:22 +08:00 · 2024-10-29 11:02:23 -03:00 · 2024-10-29 11:02:23 -03:00 · d301c1bf0f
commit d301c1bf0f
parent e574f966f6 c96b36d045
68 changed files with 4774 additions and 272 deletions
--- a/.github/workflows/check-queues.yml
+++ b/.github/workflows/check-queues.yml
@ -1,20 +0,0 @@
 name: Check Queues
 on:
  schedule:
    - cron: '*/5 * * * *'
 env:
  BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
 jobs:
  clean-jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Send GET request to check queues
        run: |
          response=$(curl --write-out '%{http_code}' --silent --output /dev/null --max-time 180 https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/check-queues)
          if [ "$response" -ne 200 ]; then
            echo "Failed to check queues. Response: $response"
            exit 1
          fi
          echo "Successfully checked queues. Response: $response"
--- a/.gitignore
+++ b/.gitignore
@ -28,3 +28,5 @@ apps/js-sdk/firecrawl/dist
 /examples/o1_web_crawler/firecrawl_env
 /examples/crm_lead_enrichment/crm_lead_enrichment_env
 /.venv
 /examples/claude_web_crawler/firecrawl_env
--- a/README.md
+++ b/README.md
@ -1,4 +1,5 @@
 <h3 align="center">
  <a name="readme-top"></a>
  <img
    src="https://raw.githubusercontent.com/mendableai/firecrawl/main/img/firecrawl_logo.png"
    height="200"
@ -79,6 +80,7 @@ To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and ge
 - **Media parsing**: pdfs, docx, images.
 - **Reliability first**: designed to get the data you need - no matter how hard it is.
 - **Actions**: click, scroll, input, wait and more before extracting data
 - **Batching (New)**: scrape thousands of URLs at the same time with a new async endpoint
 You can find all of Firecrawl's capabilities and how to use them in our [documentation](https://docs.firecrawl.dev)
@ -349,6 +351,19 @@ curl -X POST https://api.firecrawl.dev/v1/scrape \
    }'
 ```
 ### Batch Scraping Multiple URLs (New)
 You can now batch scrape multiple URLs at the same time. It is very similar to how the /crawl endpoint works. It submits a batch scrape job and returns a job ID to check the status of the batch scrape.
 ```bash
 curl -X POST https://api.firecrawl.dev/v1/batch/scrape \
    -H 'Content-Type: application/json' \
    -H 'Authorization: Bearer YOUR_API_KEY' \
    -d '{
      "urls": ["https://docs.firecrawl.dev", "https://docs.firecrawl.dev/sdks/overview"],
      "formats" : ["markdown", "html"]
    }'
 ```
 ### Search (v0) (Beta)
@ -482,7 +497,7 @@ const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
  scrapeOptions: {
    formats: ['markdown', 'html'],
  }
-} as CrawlParams, true, 30) as CrawlStatusResponse;
+} satisfies CrawlParams, true, 30) satisfies CrawlStatusResponse;
 if (crawlResponse) {
  console.log(crawlResponse)
@ -541,6 +556,12 @@ We love contributions! Please read our [contributing guide](CONTRIBUTING.md) bef
 _It is the sole responsibility of the end users to respect websites' policies when scraping, searching and crawling with Firecrawl. Users are advised to adhere to the applicable privacy policies and terms of use of the websites prior to initiating any scraping activities. By default, Firecrawl respects the directives specified in the websites' robots.txt files when crawling. By utilizing Firecrawl, you expressly agree to comply with these conditions._
 ## Contributors
 <a href="https://github.com/mendableai/firecrawl/graphs/contributors">
  <img alt="contributors" src="https://contrib.rocks/image?repo=mendableai/firecrawl"/>
 </a>
 ## License Disclaimer
 This project is primarily licensed under the GNU Affero General Public License v3.0 (AGPL-3.0), as specified in the LICENSE file in the root directory of this repository. However, certain components of this project are licensed under the MIT License. Refer to the LICENSE files in these specific directories for details.
@ -552,3 +573,10 @@ Please note:
 - When using or contributing to this project, ensure you comply with the appropriate license terms for the specific component you are working with.
 For more details on the licensing of specific components, please refer to the LICENSE files in the respective directories or contact the project maintainers.
 <p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
        ↑ Back to Top ↑
    </a>
 </p>
--- a/SELF_HOST.md
+++ b/SELF_HOST.md
@ -36,7 +36,7 @@ Self-hosting Firecrawl is ideal for those who need full control over their scrap
 Create an `.env` in the root directory you can copy over the template in `apps/api/.env.example`
-To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features)
+To start, we won't set up authentication or any optional subservices (pdf parsing, JS blocking support, AI features)
 `.env:`
 ```
@ -47,7 +47,7 @@ HOST=0.0.0.0
 REDIS_URL=redis://redis:6379
 REDIS_RATE_LIMIT_URL=redis://redis:6379
-## To turn on DB authentication, you need to set up supabase.
+## To turn on DB authentication, you need to set up Supabase.
 USE_DB_AUTHENTICATION=false
 # ===== Optional ENVS ======
@ -59,8 +59,8 @@ SUPABASE_SERVICE_TOKEN=
 # Other Optionals
 TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
-SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
+SCRAPING_BEE_API_KEY= # use if you'd like to use as a fallback scraper
-OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
+OPENAI_API_KEY= # add for LLM-dependent features (e.g., image alt generation)
 BULL_AUTH_KEY= @
 LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
 PLAYWRIGHT_MICROSERVICE_URL=  # set if you'd like to run a playwright fallback
@ -176,4 +176,4 @@ By addressing these common issues, you can ensure a smoother setup and operation
 ## Install Firecrawl on a Kubernetes Cluster (Simple Version)
-Read the [examples/kubernetes/cluster-install/README.md](https://github.com/mendableai/firecrawl/blob/main/examples/kubernetes/cluster-install/README.md) for instructions on how to install Firecrawl on a Kubernetes Cluster.
+Read the [examples/kubernetes/cluster-install/README.md](https://github.com/mendableai/firecrawl/blob/main/examples/kubernetes/cluster-install/README.md) for instructions on how to install Firecrawl on a Kubernetes Cluster.
--- a/apps/api/.env.example
+++ b/apps/api/.env.example
@ -1,5 +1,5 @@
 # ===== Required ENVS ======
-NUM_WORKERS_PER_QUEUE=8 
+NUM_WORKERS_PER_QUEUE=8
 PORT=3002
 HOST=0.0.0.0
 REDIS_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
@ -11,9 +11,14 @@ USE_DB_AUTHENTICATION=true
 # ===== Optional ENVS ======
 # SearchApi key. Head to https://searchapi.com/ to get your API key
 SEARCHAPI_API_KEY=
 # SearchApi engine, defaults to google. Available options: google, bing, baidu, google_news, etc. Head to https://searchapi.com/ to explore more engines
 SEARCHAPI_ENGINE=
 # Supabase Setup (used to support DB authentication, advanced logging, etc.)
-SUPABASE_ANON_TOKEN= 
+SUPABASE_ANON_TOKEN=
-SUPABASE_URL= 
+SUPABASE_URL=
 SUPABASE_SERVICE_TOKEN=
 # Other Optionals
--- a/apps/api/.env.local
+++ b/apps/api/.env.local
@ -12,4 +12,4 @@ ANTHROPIC_API_KEY=
 BULL_AUTH_KEY=
 LOGTAIL_KEY=
 PLAYWRIGHT_MICROSERVICE_URL=
-
+SEARCHAPI_API_KEY=
--- a/apps/api/src/tests/e2e_v1_withAuth/index.test.ts
+++ b/apps/api/src/tests/e2e_v1_withAuth/index.test.ts
@ -121,6 +121,49 @@ describe("E2E Tests for v1 API Routes", () => {
      },
      30000
    ); // 30 seconds timeout
    it.concurrent(
      "should return a successful response with a valid API key",
      async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://arxiv.org/abs/2410.04840",
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequest);
        expect(response.statusCode).toBe(200);
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).not.toHaveProperty("content");
        expect(response.body.data).toHaveProperty("markdown");
        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data).not.toHaveProperty("html");
        expect(response.body.data.markdown).toContain("Strong Model Collapse");
        expect(response.body.data.metadata.error).toBeUndefined();
        expect(response.body.data.metadata.description).toContain("Abstract page for arXiv paper 2410.04840: Strong Model Collapse");
        expect(response.body.data.metadata.citation_title).toBe("Strong Model Collapse");
        expect(response.body.data.metadata.citation_author).toEqual([
          "Dohmatob, Elvis",
          "Feng, Yunzhen",
          "Subramonian, Arjun",
          "Kempe, Julia"
        ]);
        expect(response.body.data.metadata.citation_date).toBe("2024/10/07");
        expect(response.body.data.metadata.citation_online_date).toBe("2024/10/08");
        expect(response.body.data.metadata.citation_pdf_url).toBe("http://arxiv.org/pdf/2410.04840");
        expect(response.body.data.metadata.citation_arxiv_id).toBe("2410.04840");
        expect(response.body.data.metadata.citation_abstract).toContain("Within the scaling laws paradigm");
        expect(response.body.data.metadata.sourceURL).toBe("https://arxiv.org/abs/2410.04840");
        expect(response.body.data.metadata.statusCode).toBe(200);
      },
      30000
    );
    it.concurrent(
      "should return a successful response with a valid API key and includeHtml set to true",
      async () => {
--- a/apps/api/src/controllers/auth.ts
+++ b/apps/api/src/controllers/auth.ts
@ -13,7 +13,7 @@ import { setTraceAttributes } from "@hyperdx/node-opentelemetry";
 import { sendNotification } from "../services/notification/email_notification";
 import { Logger } from "../lib/logger";
 import { redlock } from "../services/redlock";
-import { getValue } from "../services/redis";
+import { deleteKey, getValue } from "../services/redis";
 import { setValue } from "../services/redis";
 import { validate } from "uuid";
 import * as Sentry from "@sentry/node";
@ -37,12 +37,17 @@ function normalizedApiIsUuid(potentialUuid: string): boolean {
  return validate(potentialUuid);
 }
-export async function setCachedACUC(api_key: string, acuc: AuthCreditUsageChunk | ((acuc: AuthCreditUsageChunk) => AuthCreditUsageChunk)) { 
+export async function setCachedACUC(
  api_key: string,
  acuc:
    | AuthCreditUsageChunk
    | ((acuc: AuthCreditUsageChunk) => AuthCreditUsageChunk)
 ) {
  const cacheKeyACUC = `acuc_${api_key}`;
  const redLockKey = `lock_${cacheKeyACUC}`;
  try {
-    await redlock.using([redLockKey], 10000, {}, async signal => {
+    await redlock.using([redLockKey], 10000, {}, async (signal) => {
      if (typeof acuc === "function") {
        acuc = acuc(JSON.parse(await getValue(cacheKeyACUC)));
@ -68,31 +73,60 @@ export async function setCachedACUC(api_key: string, acuc: AuthCreditUsageChunk
  }
 }
-export async function getACUC(api_key: string, cacheOnly = false): Promise<AuthCreditUsageChunk | null> {
+export async function getACUC(
  api_key: string,
  cacheOnly = false,
  useCache = true
 ): Promise<AuthCreditUsageChunk | null> {
  const cacheKeyACUC = `acuc_${api_key}`;
-  const cachedACUC = await getValue(cacheKeyACUC);
+  if (useCache) {
    const cachedACUC = await getValue(cacheKeyACUC);
    if (cachedACUC !== null) {
      return JSON.parse(cachedACUC);
    }
  }
-  if (cachedACUC !== null) {
+  if (!cacheOnly) {
-    return JSON.parse(cachedACUC);
+    let data;
-  } else if (!cacheOnly) {
+    let error;
-    const { data, error } =
+    let retries = 0;
-      await supabase_service.rpc("auth_credit_usage_chunk", { input_key: api_key });
+    const maxRetries = 5;
-    
+
-    if (error) {
+    while (retries < maxRetries) {
-      throw new Error("Failed to retrieve authentication and credit usage data: " + JSON.stringify(error));
+      ({ data, error } = await supabase_service.rpc(
        "auth_credit_usage_chunk_test_21_credit_pack",
        { input_key: api_key }
      ));
      if (!error) {
        break;
      }
      Logger.warn(
        `Failed to retrieve authentication and credit usage data after ${retries}, trying again...`
      );
      retries++;
      if (retries === maxRetries) {
        throw new Error(
          "Failed to retrieve authentication and credit usage data after 3 attempts: " +
            JSON.stringify(error)
        );
      }
      // Wait for a short time before retrying
      await new Promise((resolve) => setTimeout(resolve, 200));
    }
-    const chunk: AuthCreditUsageChunk | null = data.length === 0
+    const chunk: AuthCreditUsageChunk | null =
-      ? null
+      data.length === 0 ? null : data[0].team_id === null ? null : data[0];
      : data[0].team_id === null
      ? null
      : data[0];
    // NOTE: Should we cache null chunks? - mogery
-    if (chunk !== null) {
+    if (chunk !== null && useCache) {
      setCachedACUC(api_key, chunk);
    }
    // console.log(chunk);
    return chunk;
  } else {
@ -100,6 +134,13 @@ export async function getACUC(api_key: string, cacheOnly = false): Promise<AuthC
  }
 }
 export async function clearACUC(
  api_key: string,
 ): Promise<void> {
  const cacheKeyACUC = `acuc_${api_key}`;
  await deleteKey(cacheKeyACUC);
 }
 export async function authenticateUser(
  req,
  res,
@ -132,7 +173,11 @@ export async function supaAuthenticateUser(
  plan?: PlanType;
  chunk?: AuthCreditUsageChunk;
 }> {
-  const authHeader = req.headers.authorization ?? (req.headers["sec-websocket-protocol"] ? `Bearer ${req.headers["sec-websocket-protocol"]}` : null);
+  const authHeader =
    req.headers.authorization ??
    (req.headers["sec-websocket-protocol"]
      ? `Bearer ${req.headers["sec-websocket-protocol"]}`
      : null);
  if (!authHeader) {
    return { success: false, error: "Unauthorized", status: 401 };
  }
@ -162,7 +207,7 @@ export async function supaAuthenticateUser(
      rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
    } else {
      rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
-    }      
+    }
    teamId = "preview";
  } else {
    normalizedApi = parseApi(token);
--- a/apps/api/src/controllers/v0/admin/acuc-cache-clear.ts
+++ b/apps/api/src/controllers/v0/admin/acuc-cache-clear.ts
@ -0,0 +1,22 @@
 import { Request, Response } from "express";
 import { supabase_service } from "../../../services/supabase";
 import { clearACUC } from "../../auth";
 import { Logger } from "../../../lib/logger";
 export async function acucCacheClearController(req: Request, res: Response) {
  try {
    const team_id: string = req.body.team_id;
    const keys = await supabase_service
      .from("api_keys")
      .select("*")
      .eq("team_id", team_id);
    await Promise.all(keys.data.map((x) => clearACUC(x.key)));
    res.json({ ok: true });
  } catch (error) {
    Logger.error(`Error clearing ACUC cache via API route: ${error}`);
    res.status(500).json({ error: "Internal server error" });
  }
 }
--- a/apps/api/src/controllers/v0/crawl-status.ts
+++ b/apps/api/src/controllers/v0/crawl-status.ts
@ -60,7 +60,7 @@ export async function crawlStatusController(req: Request, res: Response) {
    }));
    // Filter out failed jobs
-    jobsWithStatuses = jobsWithStatuses.filter(x => x.status !== "failed");
+    jobsWithStatuses = jobsWithStatuses.filter(x => x.status !== "failed" && x.status !== "unknown");
    // Sort jobs by timestamp
    jobsWithStatuses.sort((a, b) => a.job.timestamp - b.job.timestamp);
--- a/apps/api/src/controllers/v1/batch-scrape.ts
+++ b/apps/api/src/controllers/v1/batch-scrape.ts
@ -0,0 +1,103 @@
 import { Response } from "express";
 import { v4 as uuidv4 } from "uuid";
 import {
  BatchScrapeRequest,
  batchScrapeRequestSchema,
  CrawlResponse,
  legacyExtractorOptions,
  legacyScrapeOptions,
  RequestWithAuth,
 } from "./types";
 import {
  addCrawlJobs,
  lockURLs,
  saveCrawl,
  StoredCrawl,
 } from "../../lib/crawl-redis";
 import { logCrawl } from "../../services/logging/crawl_log";
 import { getScrapeQueue } from "../../services/queue-service";
 import { getJobPriority } from "../../lib/job-priority";
 export async function batchScrapeController(
  req: RequestWithAuth<{}, CrawlResponse, BatchScrapeRequest>,
  res: Response<CrawlResponse>
 ) {
  req.body = batchScrapeRequestSchema.parse(req.body);
  const id = uuidv4();
  await logCrawl(id, req.auth.team_id);
  let { remainingCredits } = req.account;
  const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
  if(!useDbAuthentication){
    remainingCredits = Infinity;
  }
  const pageOptions = legacyScrapeOptions(req.body);
  const extractorOptions = req.body.extract ? legacyExtractorOptions(req.body.extract) : undefined;
  const sc: StoredCrawl = {
    crawlerOptions: null,
    pageOptions,
    team_id: req.auth.team_id,
    createdAt: Date.now(),
    plan: req.auth.plan,
  };
  await saveCrawl(id, sc);
  let jobPriority = 20;
  // If it is over 1000, we need to get the job priority,
  // otherwise we can use the default priority of 20
  if(req.body.urls.length > 1000){
    // set base to 21
    jobPriority = await getJobPriority({plan: req.auth.plan, team_id: req.auth.team_id, basePriority: 21})
  }
  const jobs = req.body.urls.map((x) => {
    const uuid = uuidv4();
    return {
      name: uuid,
      data: {
        url: x,
        mode: "single_urls",
        team_id: req.auth.team_id,
        plan: req.auth.plan,
        crawlerOptions: null,
        pageOptions,
        extractorOptions,
        origin: "api",
        crawl_id: id,
        sitemapped: true,
        v1: true,
      },
      opts: {
        jobId: uuid,
        priority: 20,
      },
    };
  });
  await lockURLs(
    id,
    jobs.map((x) => x.data.url)
  );
  await addCrawlJobs(
    id,
    jobs.map((x) => x.opts.jobId)
  );
  await getScrapeQueue().addBulk(jobs);
  const protocol = process.env.ENV === "local" ? req.protocol : "https";
  return res.status(200).json({
    success: true,
    id,
    url: `${protocol}://${req.get("host")}/v1/batch/scrape/${id}`,
  });
 }
--- a/apps/api/src/controllers/v1/crawl-status-ws.ts
+++ b/apps/api/src/controllers/v1/crawl-status-ws.ts
@ -97,12 +97,23 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth<CrawlStatusPara
  let jobIDs = await getCrawlJobs(req.params.jobId);
  let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
  const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
-  jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed
+
-  // filter out failed jobs
+  const throttledJobsSet = new Set(throttledJobs);
-  jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
+
-  // filter the job statues
+  const validJobStatuses = [];
-  jobStatuses = jobStatuses.filter(x => x[1] !== "failed");
+  const validJobIDs = [];
-  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
+
  for (const [id, status] of jobStatuses) {
    if (!throttledJobsSet.has(id) && status !== "failed" && status !== "unknown") {
      validJobStatuses.push([id, status]);
      validJobIDs.push(id);
    }
  }
  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : validJobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
  jobIDs = validJobIDs; // Use validJobIDs instead of jobIDs for further processing
  const doneJobs = await getJobs(doneJobIDs);
  const data = doneJobs.map(x => x.returnvalue);
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@ -44,7 +44,7 @@ export async function getJobs(ids: string[]) {
  return jobs;
 }
-export async function crawlStatusController(req: RequestWithAuth<CrawlStatusParams, undefined, CrawlStatusResponse>, res: Response<CrawlStatusResponse>) {
+export async function crawlStatusController(req: RequestWithAuth<CrawlStatusParams, undefined, CrawlStatusResponse>, res: Response<CrawlStatusResponse>, isBatch = false) {
  const sc = await getCrawl(req.params.jobId);
  if (!sc) {
    return res.status(404).json({ success: false, error: "Job not found" });
@ -60,12 +60,24 @@ export async function crawlStatusController(req: RequestWithAuth<CrawlStatusPara
  let jobIDs = await getCrawlJobs(req.params.jobId);
  let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
  const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
-  jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed
+
-  // filter out failed jobs
+  const throttledJobsSet = new Set(throttledJobs);
-  jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
+
-  // filter the job statues
+  const validJobStatuses = [];
-  jobStatuses = jobStatuses.filter(x => x[1] !== "failed");
+  const validJobIDs = [];
-  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
+
  for (const [id, status] of jobStatuses) {
    if (!throttledJobsSet.has(id) && status !== "failed" && status !== "unknown") {
      validJobStatuses.push([id, status]);
      validJobIDs.push(id);
    }
  }
  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : validJobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
  // Use validJobIDs instead of jobIDs for further processing
  jobIDs = validJobIDs;
  const doneJobsLength = await getDoneJobsOrderedLength(req.params.jobId);
  const doneJobsOrder = await getDoneJobsOrdered(req.params.jobId, start, end ?? -1);
@ -100,7 +112,7 @@ export async function crawlStatusController(req: RequestWithAuth<CrawlStatusPara
  const data = doneJobs.map(x => x.returnvalue);
-  const nextURL = new URL(`${req.protocol}://${req.get("host")}/v1/crawl/${req.params.jobId}`);
+  const nextURL = new URL(`${req.protocol}://${req.get("host")}/v1/${isBatch ? "batch/scrape" : "crawl"}/${req.params.jobId}`);
  nextURL.searchParams.set("skip", (start + data.length).toString());
--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@ -78,7 +78,7 @@ export async function crawlController(
  const crawler = crawlToCrawler(id, sc);
  try {
-    sc.robots = await crawler.getRobotsTxt();
+    sc.robots = await crawler.getRobotsTxt(pageOptions.skipTlsVerification);
  } catch (e) {
    Logger.debug(
      `[Crawl] Failed to get robots.txt (this is probably fine!): ${JSON.stringify(
--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@ -63,7 +63,7 @@ export async function mapController(
  const maxPages = Math.ceil(Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage);
  const cacheKey = `fireEngineMap:${mapUrl}`;
-  const cachedResult = await redis.get(cacheKey);
+  const cachedResult = null;
  let allResults: any[];
  let pagePromises: Promise<any>[];
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@ -139,7 +139,7 @@ export async function scrapeController(
    crawlerOptions: {},
    pageOptions: pageOptions,
    origin: origin,
-    extractor_options: { mode: "markdown" },
+    extractor_options: extractorOptions,
    num_tokens: numTokens,
  });
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@ -4,6 +4,7 @@ import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
 import { Action, ExtractorOptions, PageOptions } from "../../lib/entities";
 import { protocolIncluded, checkUrl } from "../../lib/validateUrl";
 import { PlanType } from "../../types";
 import { countries } from "../../lib/validate-country";
 export type Format =
  | "markdown"
@ -108,6 +109,28 @@ export const scrapeOptions = z.object({
  extract: extractOptions.optional(),
  parsePDF: z.boolean().default(true),
  actions: actionsSchema.optional(),
  // New
  location: z.object({
    country: z.string().optional().refine(
      (val) => !val || Object.keys(countries).includes(val.toUpperCase()),
      {
        message: "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
      }
    ).transform(val => val ? val.toUpperCase() : 'US'),
    languages: z.string().array().optional(),
  }).optional(),
  // Deprecated
  geolocation: z.object({
    country: z.string().optional().refine(
      (val) => !val || Object.keys(countries).includes(val.toUpperCase()),
      {
        message: "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
      }
    ).transform(val => val ? val.toUpperCase() : 'US'),
    languages: z.string().array().optional(),
  }).optional(),
  skipTlsVerification: z.boolean().default(false),
 }).strict(strictMessage)
@ -132,19 +155,29 @@ export const scrapeRequestSchema = scrapeOptions.extend({
  return obj;
 });
 // export type ScrapeRequest = {
 //   url: string;
 //   formats?: Format[];
 //   headers?: { [K: string]: string };
 //   includeTags?: string[];
 //   excludeTags?: string[];
 //   onlyMainContent?: boolean;
 //   timeout?: number;
 //   waitFor?: number;
 // }
 export type ScrapeRequest = z.infer<typeof scrapeRequestSchema>;
 export const batchScrapeRequestSchema = scrapeOptions.extend({
  urls: url.array(),
  origin: z.string().optional().default("api"),
 }).strict(strictMessage).refine(
  (obj) => {
    const hasExtractFormat = obj.formats?.includes("extract");
    const hasExtractOptions = obj.extract !== undefined;
    return (hasExtractFormat && hasExtractOptions) || (!hasExtractFormat && !hasExtractOptions);
  },
  {
    message: "When 'extract' format is specified, 'extract' options must be provided, and vice versa",
  }
 ).transform((obj) => {
  if ((obj.formats?.includes("extract") || obj.extract) && !obj.timeout) {
    return { ...obj, timeout: 60000 };
  }
  return obj;
 });
 export type BatchScrapeRequest = z.infer<typeof batchScrapeRequestSchema>;
 const crawlerOptions = z.object({
  includePaths: z.string().array().default([]),
  excludePaths: z.string().array().default([]),
@ -250,6 +283,8 @@ export type Document = {
    sourceURL?: string;
    statusCode?: number;
    error?: string;
    [key: string]: string | string[] | number | undefined;
  };
 };
@ -340,6 +375,8 @@ export type AuthCreditUsageChunk = {
  coupons: any[];
  adjusted_credits_used: number; // credits this period minus coupons used
  remaining_credits: number;
  sub_user_id: string | null;
  total_credits_sum: number;
 };
 export interface RequestWithMaybeACUC<
@ -421,6 +458,8 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
    fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
    parsePDF: x.parsePDF,
    actions: x.actions as Action[], // no strict null checking grrrr - mogery
    geolocation: x.location ?? x.geolocation,
    skipTlsVerification: x.skipTlsVerification
  };
 }
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -20,6 +20,7 @@ import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws";
 import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
 import { ZodError } from "zod";
 import { v4 as uuidv4 } from "uuid";
 import dns from 'node:dns';
 const { createBullBoard } = require("@bull-board/api");
 const { BullAdapter } = require("@bull-board/api/bullAdapter");
@ -28,13 +29,13 @@ const { ExpressAdapter } = require("@bull-board/express");
 const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length;
 Logger.info(`Number of CPUs: ${numCPUs} available`);
-const cacheable = new CacheableLookup({
+const cacheable = new CacheableLookup()
  // this is important to avoid querying local hostnames see https://github.com/szmarczak/cacheable-lookup readme
  lookup:false
 });
 // Install cacheable lookup for all other requests
 cacheable.install(http.globalAgent);
-cacheable.install(https.globalAgent)
+cacheable.install(https.globalAgent);
 const ws = expressWs(express());
 const app = ws.app;
--- a/apps/api/src/lib/LLM-extraction/helpers.ts
+++ b/apps/api/src/lib/LLM-extraction/helpers.ts
@ -6,7 +6,13 @@ export function numTokensFromString(message: string, model: string): number {
  const encoder = encoding_for_model(model as TiktokenModel);
  // Encode the message into tokens
-  const tokens = encoder.encode(message);
+  let tokens: Uint32Array;
  try {
    tokens = encoder.encode(message);
  } catch (error) {
    message = message.replace("<|endoftext|>", "");
    tokens = encoder.encode(message);
  }
  // Free the encoder resources after use
  encoder.free();
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@ -3,7 +3,7 @@ import { redisConnection } from "../services/queue-service";
 import { Logger } from "./logger";
 export type StoredCrawl = {
-    originUrl: string;
+    originUrl?: string;
    crawlerOptions: any;
    pageOptions: any;
    team_id: string;
--- a/apps/api/src/lib/entities.ts
+++ b/apps/api/src/lib/entities.ts
@ -51,6 +51,10 @@ export type PageOptions = {
  disableJsDom?: boolean; // beta
  atsv?: boolean; // anti-bot solver, beta
  actions?: Action[]; // beta
  geolocation?: {
    country?: string;
  };
  skipTlsVerification?: boolean;
 };
 export type ExtractorOptions = {
--- a/apps/api/src/lib/validate-country.ts
+++ b/apps/api/src/lib/validate-country.ts
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@ -112,7 +112,7 @@ export async function runWebScraper({
    }
    // remove docs with empty content
-    const filteredDocs = crawlerOptions.returnOnlyUrls
+    const filteredDocs = crawlerOptions?.returnOnlyUrls
      ? docs.map((doc) => {
          if (doc.metadata.sourceURL) {
            return { url: doc.metadata.sourceURL };
@ -121,8 +121,13 @@ export async function runWebScraper({
      : docs;
    if(is_scrape === false) {
-      billTeam(team_id, undefined, filteredDocs.length).catch(error => {
+      let creditsToBeBilled = 1; // Assuming 1 credit per document
-        Logger.error(`Failed to bill team ${team_id} for ${filteredDocs.length} credits: ${error}`);
+      if (extractorOptions && (extractorOptions.mode === "llm-extraction" || extractorOptions.mode === "extract")) {
        creditsToBeBilled = 5;
      }
      billTeam(team_id, undefined, creditsToBeBilled * filteredDocs.length).catch(error => {
        Logger.error(`Failed to bill team ${team_id} for ${creditsToBeBilled * filteredDocs.length} credits: ${error}`);
        // Optionally, you could notify an admin or add to a retry queue here
      });
    }
--- a/apps/api/src/routes/admin.ts
+++ b/apps/api/src/routes/admin.ts
@ -6,6 +6,8 @@ import {
  cleanBefore24hCompleteJobsController,
  queuesController,
 } from "../controllers/v0/admin/queue";
 import { acucCacheClearController } from "../controllers/v0/admin/acuc-cache-clear";
 import { wrap } from "./v1";
 export const adminRouter = express.Router();
@ -33,3 +35,8 @@ adminRouter.get(
  `/admin/${process.env.BULL_AUTH_KEY}/autoscaler`,
  autoscalerController
 );
 adminRouter.post(
  `/admin/${process.env.BULL_AUTH_KEY}/acuc-cache-clear`,
  wrap(acucCacheClearController),
 );
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@ -17,6 +17,7 @@ import { crawlCancelController } from "../controllers/v1/crawl-cancel";
 import { Logger } from "../lib/logger";
 import { scrapeStatusController } from "../controllers/v1/scrape-status";
 import { concurrencyCheckController } from "../controllers/v1/concurrency-check";
 import { batchScrapeController } from "../controllers/v1/batch-scrape";
 // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
 // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
 // import { searchController } from "../../src/controllers/v1/search";
@ -29,14 +30,14 @@ function checkCreditsMiddleware(minimum?: number): (req: RequestWithAuth, res: R
    return (req, res, next) => {
        (async () => {
            if (!minimum && req.body) {
-                minimum = (req.body as any)?.limit ?? 1;
+                minimum = (req.body as any)?.limit ?? (req.body as any)?.urls?.length ?? 1;
            }
            const { success, remainingCredits, chunk } = await checkTeamCredits(req.acuc, req.auth.team_id, minimum);
            req.acuc = chunk;
            if (!success) {
                Logger.error(`Insufficient credits: ${JSON.stringify({ team_id: req.auth.team_id, minimum, remainingCredits })}`);
                if (!res.headersSent) {
-                    return res.status(402).json({ success: false, error: "Insufficient credits. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing." });
+                    return res.status(402).json({ success: false, error: "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value." });
                }
            }
            req.account = { remainingCredits };
@ -94,7 +95,7 @@ function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
    next();
 }
-function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any {
+export function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any {
    return (req, res, next) => {
        controller(req, res)
            .catch(err => next(err))
@ -122,6 +123,15 @@ v1Router.post(
    wrap(crawlController)
 );
 v1Router.post(
    "/batch/scrape",
    authMiddleware(RateLimiterMode.Crawl),
    checkCreditsMiddleware(),
    blocklistMiddleware,
    idempotencyMiddleware,
    wrap(batchScrapeController)
 );
 v1Router.post(
    "/map",
    authMiddleware(RateLimiterMode.Map),
@ -136,6 +146,13 @@ v1Router.get(
    wrap(crawlStatusController)
 );
 v1Router.get(
    "/batch/scrape/:jobId",
    authMiddleware(RateLimiterMode.CrawlStatus),
    // Yes, it uses the same controller as the normal crawl status controller
    wrap((req:any, res):any => crawlStatusController(req, res, true))
 );
 v1Router.get(
    "/scrape/:jobId",
    wrap(scrapeStatusController)
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@ -9,7 +9,7 @@ import robotsParser from "robots-parser";
 import { getURLDepth } from "./utils/maxDepthUtils";
 import { axiosTimeout } from "../../../src/lib/timeout";
 import { Logger } from "../../../src/lib/logger";
-
+import https from "https";
 export class WebCrawler {
  private jobId: string;
  private initialUrl: string;
@ -136,13 +136,23 @@ export class WebCrawler {
          return false;
        }
        if (this.isFile(link)) {
          return false;
        }
        return true;
      })
      .slice(0, limit);
  }
-  public async getRobotsTxt(): Promise<string> {
+  public async getRobotsTxt(skipTlsVerification = false): Promise<string> {
-    const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout });
+    let extraArgs = {};
    if(skipTlsVerification) {
      extraArgs["httpsAgent"] = new https.Agent({
        rejectUnauthorized: false
      });
    }
    const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout, ...extraArgs });
    return response.data;
  }
@ -478,7 +488,14 @@ export class WebCrawler {
      ".webp",
      ".inc"
    ];
-    return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
+
    try {
      const urlWithoutQuery = url.split('?')[0].toLowerCase();
      return fileExtensions.some((ext) => urlWithoutQuery.endsWith(ext));
    } catch (error) {
      Logger.error(`Error processing URL in isFile: ${error}`);
      return false;
    }
  }
  private isSocialMediaOrEmail(url: string): boolean {
--- a/apps/api/src/scraper/WebScraper/index.ts
+++ b/apps/api/src/scraper/WebScraper/index.ts
@ -593,6 +593,8 @@ export class WebScraperDataProvider {
      disableJsDom: options.pageOptions?.disableJsDom ?? false,
      atsv: options.pageOptions?.atsv ?? false,
      actions: options.pageOptions?.actions ?? undefined,
      geolocation: options.pageOptions?.geolocation ?? undefined,
      skipTlsVerification: options.pageOptions?.skipTlsVerification ?? false,
    };
    this.extractorOptions = options.extractorOptions ?? { mode: "markdown" };
    this.replaceAllPathsWithAbsolutePaths =
--- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
@ -28,7 +28,7 @@ export async function scrapWithFireEngine({
  waitFor = 0,
  screenshot = false,
  fullPageScreenshot = false,
-  pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
+  pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false, geolocation: { country: "US" }, skipTlsVerification: false },
  fireEngineOptions = {},
  headers,
  options,
@ -40,7 +40,7 @@ export async function scrapWithFireEngine({
  waitFor?: number;
  screenshot?: boolean;
  fullPageScreenshot?: boolean;
-  pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
+  pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean, geolocation?: { country?: string }, skipTlsVerification?: boolean };
  fireEngineOptions?: FireEngineOptions;
  headers?: Record<string, string>;
  options?: any;
@ -118,6 +118,8 @@ export async function scrapWithFireEngine({
          ...fireEngineOptionsParam,
          atsv: pageOptions?.atsv ?? false,
          scrollXPaths: pageOptions?.scrollXPaths ?? [],
          geolocation: pageOptions?.geolocation,
          skipTlsVerification: pageOptions?.skipTlsVerification ?? false,
          actions: actions,
        },
        {
--- a/apps/api/src/scraper/WebScraper/single_url.ts
+++ b/apps/api/src/scraper/WebScraper/single_url.ts
@ -156,6 +156,8 @@ export async function scrapSingleUrl(
    disableJsDom: pageOptions.disableJsDom ?? false,
    atsv: pageOptions.atsv ?? false,
    actions: pageOptions.actions ?? undefined,
    geolocation: pageOptions.geolocation ?? undefined,
    skipTlsVerification: pageOptions.skipTlsVerification ?? false,
  }
  if (extractorOptions) {
@ -207,14 +209,15 @@ export async function scrapSingleUrl(
            if (action.type === "click" || action.type === "write" || action.type === "press") {
              const result: Action[] = [];
              // Don't add a wait if the previous action is a wait
-              if (index === 0 || array[index - 1].type !== "wait") {
+              // if (index === 0 || array[index - 1].type !== "wait") {
-                result.push({ type: "wait", milliseconds: 1200 } as Action);
+              //   result.push({ type: "wait", milliseconds: 1200 } as Action);
-              }
+              // }
              // Fire-engine now handles wait times automatically, leaving the code here for now
              result.push(action);
              // Don't add a wait if the next action is a wait
-              if (index === array.length - 1 || array[index + 1].type !== "wait") {
+              // if (index === array.length - 1 || array[index + 1].type !== "wait") {
-                result.push({ type: "wait", milliseconds: 1200 } as Action);
+              //   result.push({ type: "wait", milliseconds: 1200 } as Action);
-              }
+              // }
              return result;
            }
            return [action as Action];
--- a/apps/api/src/scraper/WebScraper/utils/excludeTags.ts
+++ b/apps/api/src/scraper/WebScraper/utils/excludeTags.ts
@ -3,10 +3,8 @@ export const excludeNonMainTags = [
  "footer",
  "nav",
  "aside",
  ".header",
  ".top",
  ".navbar",
  "#header",
  ".footer",
  ".bottom",
  "#footer",
@ -39,8 +37,6 @@ export const excludeNonMainTags = [
  "#search",
  ".share",
  "#share",
  ".widget",
  "#widget",
  ".cookie",
  "#cookie"
 ];
--- a/apps/api/src/scraper/WebScraper/utils/metadata.ts
+++ b/apps/api/src/scraper/WebScraper/utils/metadata.ts
@ -34,6 +34,7 @@ interface Metadata {
  sourceURL?: string;
  pageStatusCode?: number;
  pageError?: string;
  [key: string]: string | string[] | number | undefined;
 }
 export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
@ -70,40 +71,78 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
  let pageStatusCode: number | null = null;
  let pageError: string | null = null;
  const customMetadata: Record<string, string | string[]> = {};
  try {
    // TODO: remove this as it is redundant with the below implementation
    title = soup("title").text() || null;
    description = soup('meta[name="description"]').attr("content") || null;
-    
+
-    // Assuming the language is part of the URL as per the regex pattern
+    language = soup("html").attr("lang") || null;
    language = soup('html').attr('lang') || null;
    keywords = soup('meta[name="keywords"]').attr("content") || null;
    robots = soup('meta[name="robots"]').attr("content") || null;
    ogTitle = soup('meta[property="og:title"]').attr("content") || null;
-    ogDescription = soup('meta[property="og:description"]').attr("content") || null;
+    ogDescription =
      soup('meta[property="og:description"]').attr("content") || null;
    ogUrl = soup('meta[property="og:url"]').attr("content") || null;
    ogImage = soup('meta[property="og:image"]').attr("content") || null;
    ogAudio = soup('meta[property="og:audio"]').attr("content") || null;
-    ogDeterminer = soup('meta[property="og:determiner"]').attr("content") || null;
+    ogDeterminer =
      soup('meta[property="og:determiner"]').attr("content") || null;
    ogLocale = soup('meta[property="og:locale"]').attr("content") || null;
-    ogLocaleAlternate = soup('meta[property="og:locale:alternate"]').map((i, el) => soup(el).attr("content")).get() || null;
+    ogLocaleAlternate =
      soup('meta[property="og:locale:alternate"]')
        .map((i, el) => soup(el).attr("content"))
        .get() || null;
    ogSiteName = soup('meta[property="og:site_name"]').attr("content") || null;
    ogVideo = soup('meta[property="og:video"]').attr("content") || null;
-    articleSection = soup('meta[name="article:section"]').attr("content") || null;
+    articleSection =
      soup('meta[name="article:section"]').attr("content") || null;
    articleTag = soup('meta[name="article:tag"]').attr("content") || null;
-    publishedTime = soup('meta[property="article:published_time"]').attr("content") || null;
+    publishedTime =
-    modifiedTime = soup('meta[property="article:modified_time"]').attr("content") || null;
+      soup('meta[property="article:published_time"]').attr("content") || null;
-    dctermsKeywords = soup('meta[name="dcterms.keywords"]').attr("content") || null;
+    modifiedTime =
      soup('meta[property="article:modified_time"]').attr("content") || null;
    dctermsKeywords =
      soup('meta[name="dcterms.keywords"]').attr("content") || null;
    dcDescription = soup('meta[name="dc.description"]').attr("content") || null;
    dcSubject = soup('meta[name="dc.subject"]').attr("content") || null;
-    dctermsSubject = soup('meta[name="dcterms.subject"]').attr("content") || null;
+    dctermsSubject =
-    dctermsAudience = soup('meta[name="dcterms.audience"]').attr("content") || null;
+      soup('meta[name="dcterms.subject"]').attr("content") || null;
    dctermsAudience =
      soup('meta[name="dcterms.audience"]').attr("content") || null;
    dcType = soup('meta[name="dc.type"]').attr("content") || null;
    dctermsType = soup('meta[name="dcterms.type"]').attr("content") || null;
    dcDate = soup('meta[name="dc.date"]').attr("content") || null;
-    dcDateCreated = soup('meta[name="dc.date.created"]').attr("content") || null;
+    dcDateCreated =
-    dctermsCreated = soup('meta[name="dcterms.created"]').attr("content") || null;
+      soup('meta[name="dc.date.created"]').attr("content") || null;
    dctermsCreated =
      soup('meta[name="dcterms.created"]').attr("content") || null;
    try {
      // Extract all meta tags for custom metadata
      soup("meta").each((i, elem) => {
        try {
          const name = soup(elem).attr("name") || soup(elem).attr("property");
          const content = soup(elem).attr("content");
          if (name && content) {
            if (customMetadata[name] === undefined) {
              customMetadata[name] = content;
            } else if (Array.isArray(customMetadata[name])) {
              (customMetadata[name] as string[]).push(content);
            } else {
              customMetadata[name] = [customMetadata[name] as string, content];
            }
          }
        } catch (error) {
          Logger.error(`Error extracting custom metadata (in): ${error}`);
        }
      });
    } catch (error) {
      Logger.error(`Error extracting custom metadata: ${error}`);
    }
  } catch (error) {
    Logger.error(`Error extracting metadata: ${error}`);
  }
@ -141,5 +180,6 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
    ...(sourceURL ? { sourceURL } : {}),
    ...(pageStatusCode ? { pageStatusCode } : {}),
    ...(pageError ? { pageError } : {}),
    ...customMetadata,
  };
 }
--- a/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts
+++ b/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts
@ -1,5 +1,5 @@
 import axios, { AxiosResponse } from "axios";
-import fs from "fs";
+import fs from "fs/promises";
 import { createReadStream, createWriteStream } from "node:fs";
 import FormData from "form-data";
 import dotenv from "dotenv";
@ -15,7 +15,7 @@ export async function fetchAndProcessPdf(url: string, parsePDF: boolean): Promis
  try {
    const { tempFilePath, pageStatusCode, pageError } = await downloadPdf(url);
    const content = await processPdfToText(tempFilePath, parsePDF);
-    fs.unlinkSync(tempFilePath); // Clean up the temporary file
+    await fs.unlink(tempFilePath); // Clean up the temporary file
    return { content, pageStatusCode, pageError };
  } catch (error) {
    Logger.error(`Failed to fetch and process PDF: ${error.message}`);
@ -120,7 +120,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
    }
  } else {
    try {
-      content = fs.readFileSync(filePath, "utf-8");
+      content = await fs.readFile(filePath, "utf-8");
    } catch (error) {
      Logger.error(`Failed to read PDF file: ${error}`);
      content = "";
@ -131,7 +131,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
 async function processPdf(file: string) {
  try {
-    const fileContent = fs.readFileSync(file);
+    const fileContent = await fs.readFile(file);
    const data = await pdf(fileContent);
    return data.text;
  } catch (error) {
--- a/apps/api/src/search/fireEngine.ts
+++ b/apps/api/src/search/fireEngine.ts
@ -6,6 +6,7 @@ import { Logger } from "../lib/logger";
 dotenv.config();
 export async function fireEngineMap(
  q: string,
  options: {
@ -41,11 +42,12 @@ export async function fireEngineMap(
      url: `${process.env.FIRE_ENGINE_BETA_URL}/search`,
      headers: {
        "Content-Type": "application/json",
        "X-Disable-Cache": "true"
      },
      data: data,
    };
    const response = await axios(config);
-    if (response && response) {
+    if (response && response.data) {
      return response.data;
    } else {
      return [];
--- a/apps/api/src/search/index.ts
+++ b/apps/api/src/search/index.ts
@ -2,6 +2,7 @@ import { Logger } from "../../src/lib/logger";
 import { SearchResult } from "../../src/lib/entities";
 import { googleSearch } from "./googlesearch";
 import { fireEngineMap } from "./fireEngine";
 import { searchapi_search } from "./searchapi";
 import { serper_search } from "./serper";
 export async function search({
@ -30,7 +31,16 @@ export async function search({
  timeout?: number;
 }): Promise<SearchResult[]> {
  try {
-    
+    if (process.env.SEARCHAPI_API_KEY) {
      return await searchapi_search(query, {
        num_results,
        tbs,
        filter,
        lang,
        country,
        location
      });
    }
    if (process.env.SERPER_API_KEY) {
      return await serper_search(query, {
        num_results,
--- a/apps/api/src/search/searchapi.ts
+++ b/apps/api/src/search/searchapi.ts
@ -0,0 +1,60 @@
 import axios from "axios";
 import dotenv from "dotenv";
 import { SearchResult } from "../../src/lib/entities";
 dotenv.config();
 interface SearchOptions {
  tbs?: string;
  filter?: string;
  lang?: string;
  country?: string;
  location?: string;
  num_results: number;
  page?: number;
 }
 export async function searchapi_search(q: string, options: SearchOptions): Promise<SearchResult[]> {
  const params = {
    q: q,
    hl: options.lang,
    gl: options.country,
    location: options.location,
    num: options.num_results,
    page: options.page ?? 1,
    engine: process.env.SEARCHAPI_ENGINE || "google",
  };
  const url = `https://www.searchapi.io/api/v1/search`;
  try {
    const response = await axios.get(url, {
      headers: {
        "Authorization": `Bearer ${process.env.SEARCHAPI_API_KEY}`,
        "Content-Type": "application/json",
        "X-SearchApi-Source": "Firecrawl",
      },
      params: params,
    });
    if (response.status === 401) {
      throw new Error("Unauthorized. Please check your API key.");
    }
    const data = response.data;
    if (data && Array.isArray(data.organic_results)) {
      return data.organic_results.map((a: any) => ({
        url: a.link,
        title: a.title,
        description: a.snippet,
      }));
    } else {
      return [];
    }
  } catch (error) {
    console.error(`There was an error searching for content: ${error.message}`);
    return [];
  }
 }
--- a/apps/api/src/services/billing/auto_charge.ts
+++ b/apps/api/src/services/billing/auto_charge.ts
@ -0,0 +1,176 @@
 // Import necessary dependencies and types
 import { AuthCreditUsageChunk } from "../../controllers/v1/types";
 import { getACUC, setCachedACUC } from "../../controllers/auth";
 import { redlock } from "../redlock";
 import { supabase_service } from "../supabase";
 import { createPaymentIntent } from "./stripe";
 import { issueCredits } from "./issue_credits";
 import { sendNotification } from "../notification/email_notification";
 import { NotificationType } from "../../types";
 import { deleteKey, getValue, setValue } from "../redis";
 import { sendSlackWebhook } from "../alerts/slack";
 import { Logger } from "../../lib/logger";
 // Define the number of credits to be added during auto-recharge
 const AUTO_RECHARGE_CREDITS = 1000;
 const AUTO_RECHARGE_COOLDOWN = 300; // 5 minutes in seconds
 /**
 * Attempt to automatically charge a user's account when their credit balance falls below a threshold
 * @param chunk The user's current usage data
 * @param autoRechargeThreshold The credit threshold that triggers auto-recharge
 */
 export async function autoCharge(
  chunk: AuthCreditUsageChunk,
  autoRechargeThreshold: number
 ): Promise<{ success: boolean; message: string; remainingCredits: number; chunk: AuthCreditUsageChunk }> {
  const resource = `auto-recharge:${chunk.team_id}`;
  const cooldownKey = `auto-recharge-cooldown:${chunk.team_id}`;
  try {
    // Check if the team is in the cooldown period
    // Another check to prevent race conditions, double charging - cool down of 5 minutes
    const cooldownValue = await getValue(cooldownKey);
    if (cooldownValue) {
      Logger.info(`Auto-recharge for team ${chunk.team_id} is in cooldown period`);
      return {
        success: false,
        message: "Auto-recharge is in cooldown period",
        remainingCredits: chunk.remaining_credits,
        chunk,
      };
    }
    // Use a distributed lock to prevent concurrent auto-charge attempts
    return await redlock.using([resource], 5000, async (signal) : Promise<{ success: boolean; message: string; remainingCredits: number; chunk: AuthCreditUsageChunk }> => {
      // Recheck the condition inside the lock to prevent race conditions
      const updatedChunk = await getACUC(chunk.api_key, false, false);
      if (
        updatedChunk &&
        updatedChunk.remaining_credits < autoRechargeThreshold
      ) {
        if (chunk.sub_user_id) {
          // Fetch the customer's Stripe information
          const { data: customer, error: customersError } =
            await supabase_service
              .from("customers")
              .select("id, stripe_customer_id")
              .eq("id", chunk.sub_user_id)
              .single();
          if (customersError) {
            Logger.error(`Error fetching customer data: ${customersError}`);
            return {
              success: false,
              message: "Error fetching customer data",
              remainingCredits: chunk.remaining_credits,
              chunk,
            };
          }
          if (customer && customer.stripe_customer_id) {
            let issueCreditsSuccess = false;
            // Attempt to create a payment intent
            const paymentStatus = await createPaymentIntent(
              chunk.team_id,
              customer.stripe_customer_id
            );
            // If payment is successful or requires further action, issue credits
            if (
              paymentStatus.return_status === "succeeded" ||
              paymentStatus.return_status === "requires_action"
            ) {
              issueCreditsSuccess = await issueCredits(
                chunk.team_id,
                AUTO_RECHARGE_CREDITS
              );
            }
            // Record the auto-recharge transaction
            await supabase_service.from("auto_recharge_transactions").insert({
              team_id: chunk.team_id,
              initial_payment_status: paymentStatus.return_status,
              credits_issued: issueCreditsSuccess ? AUTO_RECHARGE_CREDITS : 0,
              stripe_charge_id: paymentStatus.charge_id,
            });
            // Send a notification if credits were successfully issued
            if (issueCreditsSuccess) {
              await sendNotification(
                chunk.team_id, 
                NotificationType.AUTO_RECHARGE_SUCCESS, 
                chunk.sub_current_period_start, 
                chunk.sub_current_period_end, 
                chunk, 
                true
              );
              // Set cooldown period
              await setValue(cooldownKey, 'true', AUTO_RECHARGE_COOLDOWN);
            } 
            // Reset ACUC cache to reflect the new credit balance
            const cacheKeyACUC = `acuc_${chunk.api_key}`;
            await deleteKey(cacheKeyACUC);
            if (process.env.SLACK_ADMIN_WEBHOOK_URL) {
              const webhookCooldownKey = `webhook_cooldown_${chunk.team_id}`;
              const isInCooldown = await getValue(webhookCooldownKey);
              if (!isInCooldown) {
                sendSlackWebhook(
                  `Auto-recharge: Team ${chunk.team_id}. ${AUTO_RECHARGE_CREDITS} credits added. Payment status: ${paymentStatus.return_status}.`,
                  false,
                  process.env.SLACK_ADMIN_WEBHOOK_URL
                ).catch((error) => {
                  Logger.debug(`Error sending slack notification: ${error}`);
                });
                // Set cooldown for 1 hour
                await setValue(webhookCooldownKey, 'true', 60 * 60);
              }
            }
            return {
              success: true,
              message: "Auto-recharge successful",
              remainingCredits: chunk.remaining_credits + AUTO_RECHARGE_CREDITS,
              chunk: {...chunk, remaining_credits: chunk.remaining_credits + AUTO_RECHARGE_CREDITS},
            };
          } else {
            Logger.error("No Stripe customer ID found for user");
            return {
              success: false,
              message: "No Stripe customer ID found for user",
              remainingCredits: chunk.remaining_credits,
              chunk,
            };
          }
        } else {
            Logger.error("No sub_user_id found in chunk");
          return {
            success: false,
            message: "No sub_user_id found in chunk",
            remainingCredits: chunk.remaining_credits,
            chunk,
          };
        }
      }
      return {
        success: false,
        message: "No need to auto-recharge",
        remainingCredits: chunk.remaining_credits,
        chunk,
      };
    });
  } catch (error) {
    Logger.error(`Failed to acquire lock for auto-recharge: ${error}`);
    return {
      success: false,
      message: "Failed to acquire lock for auto-recharge",
      remainingCredits: chunk.remaining_credits,
      chunk,
    };
  }
 }
--- a/apps/api/src/services/billing/credit_billing.ts
+++ b/apps/api/src/services/billing/credit_billing.ts
@ -6,24 +6,40 @@ import { Logger } from "../../lib/logger";
 import * as Sentry from "@sentry/node";
 import { AuthCreditUsageChunk } from "../../controllers/v1/types";
 import { getACUC, setCachedACUC } from "../../controllers/auth";
 import { issueCredits } from "./issue_credits";
 import { redlock } from "../redlock";
 import { autoCharge } from "./auto_charge";
 import { getValue, setValue } from "../redis";
 const FREE_CREDITS = 500;
 /**
 * If you do not know the subscription_id in the current context, pass subscription_id as undefined.
 */
-export async function billTeam(team_id: string, subscription_id: string | null | undefined, credits: number) {
+export async function billTeam(
  team_id: string,
  subscription_id: string | null | undefined,
  credits: number
 ) {
  return withAuth(supaBillTeam)(team_id, subscription_id, credits);
 }
-export async function supaBillTeam(team_id: string, subscription_id: string, credits: number) {
+export async function supaBillTeam(
  team_id: string,
  subscription_id: string,
  credits: number
 ) {
  if (team_id === "preview") {
    return { success: true, message: "Preview team, no credits used" };
  }
  Logger.info(`Billing team ${team_id} for ${credits} credits`);
-  const { data, error } =
+  const { data, error } = await supabase_service.rpc("bill_team", {
-    await supabase_service.rpc("bill_team", { _team_id: team_id, sub_id: subscription_id ?? null, fetch_subscription: subscription_id === undefined, credits });
+    _team_id: team_id,
-  
+    sub_id: subscription_id ?? null,
    fetch_subscription: subscription_id === undefined,
    credits,
  });
  if (error) {
    Sentry.captureException(error);
    Logger.error("Failed to bill team: " + JSON.stringify(error));
@ -31,53 +47,126 @@ export async function supaBillTeam(team_id: string, subscription_id: string, cre
  }
  (async () => {
-    for (const apiKey of (data ?? []).map(x => x.api_key)) {
+    for (const apiKey of (data ?? []).map((x) => x.api_key)) {
-      await setCachedACUC(apiKey, acuc => (acuc ? {
+      await setCachedACUC(apiKey, (acuc) =>
-        ...acuc,
+        acuc
-        credits_used: acuc.credits_used + credits,
+          ? {
-        adjusted_credits_used: acuc.adjusted_credits_used + credits,
+              ...acuc,
-        remaining_credits: acuc.remaining_credits - credits,
+              credits_used: acuc.credits_used + credits,
-      } : null));
+              adjusted_credits_used: acuc.adjusted_credits_used + credits,
              remaining_credits: acuc.remaining_credits - credits,
            }
          : null
      );
    }
  })();
 }
-export async function checkTeamCredits(chunk: AuthCreditUsageChunk, team_id: string, credits: number) {
+export async function checkTeamCredits(
-  return withAuth(supaCheckTeamCredits)(chunk, team_id, credits);
+  chunk: AuthCreditUsageChunk,
  team_id: string,
  credits: number
 ): Promise<{ success: boolean; message: string; remainingCredits: number; chunk: AuthCreditUsageChunk }> {
  const result = await withAuth(supaCheckTeamCredits)(chunk, team_id, credits);
  return {
    success: result.success,
    message: result.message,
    remainingCredits: result.remainingCredits,
    chunk: chunk // Ensure chunk is always returned
  };
 }
 // if team has enough credits for the operation, return true, else return false
-export async function supaCheckTeamCredits(chunk: AuthCreditUsageChunk, team_id: string, credits: number) {
+export async function supaCheckTeamCredits(
  chunk: AuthCreditUsageChunk,
  team_id: string,
  credits: number
 ) {
  // WARNING: chunk will be null if team_id is preview -- do not perform operations on it under ANY circumstances - mogery
  if (team_id === "preview") {
-    return { success: true, message: "Preview team, no credits used", remainingCredits: Infinity };
+    return {
      success: true,
      message: "Preview team, no credits used",
      remainingCredits: Infinity,
    };
  }
  const creditsWillBeUsed = chunk.adjusted_credits_used + credits;
  // In case chunk.price_credits is undefined, set it to a large number to avoid mistakes
  const totalPriceCredits = chunk.total_credits_sum ?? 100000000;
  // Removal of + credits
-  const creditUsagePercentage = creditsWillBeUsed / chunk.price_credits;
+  const creditUsagePercentage = chunk.adjusted_credits_used / totalPriceCredits;
  let isAutoRechargeEnabled = false, autoRechargeThreshold = 1000;
  const cacheKey = `team_auto_recharge_${team_id}`;
  let cachedData = await getValue(cacheKey);
  if (cachedData) {
    const parsedData = JSON.parse(cachedData);
    isAutoRechargeEnabled = parsedData.auto_recharge;
    autoRechargeThreshold = parsedData.auto_recharge_threshold;
  } else {
    const { data, error } = await supabase_service
      .from("teams")
      .select("auto_recharge, auto_recharge_threshold")
      .eq("id", team_id)
      .single();
    if (data) {
      isAutoRechargeEnabled = data.auto_recharge;
      autoRechargeThreshold = data.auto_recharge_threshold;
      await setValue(cacheKey, JSON.stringify(data), 300); // Cache for 5 minutes (300 seconds)
    }
  }
  if (isAutoRechargeEnabled && chunk.remaining_credits < autoRechargeThreshold) {
    const autoChargeResult = await autoCharge(chunk, autoRechargeThreshold);
    if (autoChargeResult.success) {
      return {
        success: true,
      message: autoChargeResult.message,
      remainingCredits: autoChargeResult.remainingCredits,
      chunk: autoChargeResult.chunk,
    };
  }
  }
  // Compare the adjusted total credits used with the credits allowed by the plan
-  if (creditsWillBeUsed > chunk.price_credits) {
+  if (creditsWillBeUsed > totalPriceCredits) {
-    sendNotification(
+    // Only notify if their actual credits (not what they will use) used is greater than the total price credits
-      team_id,
+    if (chunk.adjusted_credits_used > totalPriceCredits) {
-      NotificationType.LIMIT_REACHED,
+      sendNotification(
-      chunk.sub_current_period_start,
+        team_id,
-      chunk.sub_current_period_end
+        NotificationType.LIMIT_REACHED,
-    );
+        chunk.sub_current_period_start,
-    return { success: false, message: "Insufficient credits. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing.", remainingCredits: chunk.remaining_credits, chunk };
+        chunk.sub_current_period_end,
        chunk
      );
    }
    return {
      success: false,
      message:
        "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing.",
      remainingCredits: chunk.remaining_credits,
      chunk,
    };
  } else if (creditUsagePercentage >= 0.8 && creditUsagePercentage < 1) {
    // Send email notification for approaching credit limit
    sendNotification(
      team_id,
      NotificationType.APPROACHING_LIMIT,
      chunk.sub_current_period_start,
-      chunk.sub_current_period_end
+      chunk.sub_current_period_end,
      chunk
    );
  }
-  return { success: true, message: "Sufficient credits available", remainingCredits: chunk.remaining_credits, chunk };
+  return {
    success: true,
    message: "Sufficient credits available",
    remainingCredits: chunk.remaining_credits,
    chunk,
  };
 }
 // Count the total credits used by a team within the current billing period and return the remaining credits.
--- a/apps/api/src/services/billing/issue_credits.ts
+++ b/apps/api/src/services/billing/issue_credits.ts
@ -0,0 +1,20 @@
 import { Logger } from "../../lib/logger";
 import { supabase_service } from "../supabase";
 export async function issueCredits(team_id: string, credits: number) {
  // Add an entry to supabase coupons
  const { data, error } = await supabase_service.from("coupons").insert({
    team_id: team_id,
    credits: credits,
    status: "active",
    // indicates that this coupon was issued from auto recharge
    from_auto_recharge: true,
  });
  if (error) {
    Logger.error(`Error adding coupon: ${error}`);
    return false;
  }
  return true;
 }
--- a/apps/api/src/services/billing/stripe.ts
+++ b/apps/api/src/services/billing/stripe.ts
@ -0,0 +1,56 @@
 import { Logger } from "../../lib/logger";
 import Stripe from "stripe";
 const stripe = new Stripe(process.env.STRIPE_SECRET_KEY ?? "");
 async function getCustomerDefaultPaymentMethod(customerId: string) {
  const paymentMethods = await stripe.customers.listPaymentMethods(customerId, {
    limit: 3,
  });
  return paymentMethods.data[0] ?? null;
 }
 type ReturnStatus = "succeeded" | "requires_action" | "failed";
 export async function createPaymentIntent(
  team_id: string,
  customer_id: string
 ): Promise<{ return_status: ReturnStatus; charge_id: string }> {
  try {
    const defaultPaymentMethod = await getCustomerDefaultPaymentMethod(customer_id);
    if (!defaultPaymentMethod) {
      Logger.error(`No default payment method found for customer: ${customer_id}`);
      return { return_status: "failed", charge_id: "" };
    }
    const paymentIntent = await stripe.paymentIntents.create({
      amount: 1100,
      currency: "usd",
      customer: customer_id,
      description: "Firecrawl: Auto re-charge of 1000 credits",
      payment_method_types: [defaultPaymentMethod?.type ?? "card"],
      payment_method: defaultPaymentMethod?.id,
      off_session: true,
      confirm: true,
    });
    if (paymentIntent.status === "succeeded") {
      Logger.info(`Payment succeeded for team: ${team_id}`);
      return { return_status: "succeeded", charge_id: paymentIntent.id };
    } else if (
      paymentIntent.status === "requires_action" ||
      paymentIntent.status === "processing" ||
      paymentIntent.status === "requires_capture"
    ) {
      Logger.warn(`Payment requires further action for team: ${team_id}`);
      return { return_status: "requires_action", charge_id: paymentIntent.id };
    } else {
      Logger.error(`Payment failed for team: ${team_id}`);
      return { return_status: "failed", charge_id: paymentIntent.id };
    }
  } catch (error) {
    Logger.error(
      `Failed to create or confirm PaymentIntent for team: ${team_id}`
    );
    console.error(error);
    return { return_status: "failed", charge_id: "" };
  }
 }
--- a/apps/api/src/services/logging/log_job.ts
+++ b/apps/api/src/services/logging/log_job.ts
@ -70,7 +70,9 @@ export async function logJob(job: FirecrawlJob) {
          retry: job.retry,
        },
      };
-      posthog.capture(phLog);
+      if(job.mode !== "single_urls") {
        posthog.capture(phLog);
      }
    }
    if (error) {
      Logger.error(`Error logging job: ${error.message}`);
--- a/apps/api/src/services/notification/email_notification.ts
+++ b/apps/api/src/services/notification/email_notification.ts
@ -3,6 +3,9 @@ import { withAuth } from "../../lib/withAuth";
 import { Resend } from "resend";
 import { NotificationType } from "../../types";
 import { Logger } from "../../../src/lib/logger";
 import { sendSlackWebhook } from "../alerts/slack";
 import { getNotificationString } from "./notification_string";
 import { AuthCreditUsageChunk } from "../../controllers/v1/types";
 const emailTemplates: Record<
  NotificationType,
@ -21,25 +24,37 @@ const emailTemplates: Record<
    subject: "Rate Limit Reached - Firecrawl",
    html: "Hey there,<br/><p>You've hit one of the Firecrawl endpoint's rate limit! Take a breather and try again in a few moments. If you need higher rate limits, consider upgrading your plan. Check out our <a href='https://firecrawl.dev/pricing'>pricing page</a> for more info.</p><p>If you have any questions, feel free to reach out to us at <a href='mailto:hello@firecrawl.com'>hello@firecrawl.com</a></p><br/>Thanks,<br/>Firecrawl Team<br/><br/>Ps. this email is only sent once every 7 days if you reach a rate limit.",
  },
  [NotificationType.AUTO_RECHARGE_SUCCESS]: {
    subject: "Auto recharge successful - Firecrawl",
    html: "Hey there,<br/><p>Your account was successfully recharged with 1000 credits because your remaining credits were below the threshold. Consider upgrading your plan at <a href='https://firecrawl.dev/pricing'>firecrawl.dev/pricing</a> to avoid hitting the limit.</p><br/>Thanks,<br/>Firecrawl Team<br/>",
  },
  [NotificationType.AUTO_RECHARGE_FAILED]: {
    subject: "Auto recharge failed - Firecrawl",
    html: "Hey there,<br/><p>Your auto recharge failed. Please try again manually. If the issue persists, please reach out to us at <a href='mailto:hello@firecrawl.com'>hello@firecrawl.com</a></p><br/>Thanks,<br/>Firecrawl Team<br/>",
  },
 };
 export async function sendNotification(
  team_id: string,
  notificationType: NotificationType,
  startDateString: string,
-  endDateString: string
+  endDateString: string,
  chunk: AuthCreditUsageChunk,
  bypassRecentChecks: boolean = false
 ) {
  return withAuth(sendNotificationInternal)(
    team_id,
    notificationType,
    startDateString,
-    endDateString
+    endDateString,
    chunk,
    bypassRecentChecks
  );
 }
-async function sendEmailNotification(
+export async function sendEmailNotification(
  email: string,
-  notificationType: NotificationType
+  notificationType: NotificationType,
 ) {
  const resend = new Resend(process.env.RESEND_API_KEY);
@ -66,80 +81,95 @@ export async function sendNotificationInternal(
  team_id: string,
  notificationType: NotificationType,
  startDateString: string,
-  endDateString: string
+  endDateString: string,
  chunk: AuthCreditUsageChunk,
  bypassRecentChecks: boolean = false
 ): Promise<{ success: boolean }> {
  if (team_id === "preview") {
    return { success: true };
  }
-  const fifteenDaysAgo = new Date();
+  if (!bypassRecentChecks) {
-  fifteenDaysAgo.setDate(fifteenDaysAgo.getDate() - 15);
+    const fifteenDaysAgo = new Date();
    fifteenDaysAgo.setDate(fifteenDaysAgo.getDate() - 15);
-  const { data, error } = await supabase_service
+    const { data, error } = await supabase_service
    .from("user_notifications")
    .select("*")
    .eq("team_id", team_id)
    .eq("notification_type", notificationType)
    .gte("sent_date", fifteenDaysAgo.toISOString());
  if (error) {
    Logger.debug(`Error fetching notifications: ${error}`);
    return { success: false };
  }
  if (data.length !== 0) {
    // Logger.debug(`Notification already sent for team_id: ${team_id} and notificationType: ${notificationType} in the last 15 days`);
    return { success: false };
  }
  const { data: recentData, error: recentError } = await supabase_service
    .from("user_notifications")
    .select("*")
    .eq("team_id", team_id)
    .eq("notification_type", notificationType)
    .gte("sent_date", startDateString)
    .lte("sent_date", endDateString);
  if (recentError) {
    Logger.debug(`Error fetching recent notifications: ${recentError}`);
    return { success: false };
  }
  if (recentData.length !== 0) {
    // Logger.debug(`Notification already sent for team_id: ${team_id} and notificationType: ${notificationType} within the specified date range`);
    return { success: false };
  } else {
    console.log(`Sending notification for team_id: ${team_id} and notificationType: ${notificationType}`);
    // get the emails from the user with the team_id
    const { data: emails, error: emailsError } = await supabase_service
      .from("users")
      .select("email")
      .eq("team_id", team_id);
    if (emailsError) {
      Logger.debug(`Error fetching emails: ${emailsError}`);
      return { success: false };
    }
    for (const email of emails) {
      await sendEmailNotification(email.email, notificationType);
    }
    const { error: insertError } = await supabase_service
      .from("user_notifications")
-      .insert([
+      .select("*")
-        {
+      .eq("team_id", team_id)
-          team_id: team_id,
+      .eq("notification_type", notificationType)
-          notification_type: notificationType,
+      .gte("sent_date", fifteenDaysAgo.toISOString());
          sent_date: new Date().toISOString(),
        },
      ]);
-    if (insertError) {
+    if (error) {
-      Logger.debug(`Error inserting notification record: ${insertError}`);
+      Logger.debug(`Error fetching notifications: ${error}`);
      return { success: false };
    }
-    return { success: true };
+    if (data.length !== 0) {
      return { success: false };
    }
    // TODO: observation: Free credits people are not receiving notifications
    const { data: recentData, error: recentError } = await supabase_service
      .from("user_notifications")
      .select("*")
      .eq("team_id", team_id)
      .eq("notification_type", notificationType)
      .gte("sent_date", startDateString)
      .lte("sent_date", endDateString);
    if (recentError) {
      Logger.debug(`Error fetching recent notifications: ${recentError.message}`);
      return { success: false };
    }
    if (recentData.length !== 0) {
      return { success: false };
    }
  }
  console.log(`Sending notification for team_id: ${team_id} and notificationType: ${notificationType}`);
  // get the emails from the user with the team_id
  const { data: emails, error: emailsError } = await supabase_service
    .from("users")
    .select("email")
    .eq("team_id", team_id);
  if (emailsError) {
    Logger.debug(`Error fetching emails: ${emailsError}`);
    return { success: false };
  }
  for (const email of emails) {
    await sendEmailNotification(email.email, notificationType);
  }
  const { error: insertError } = await supabase_service
    .from("user_notifications")
    .insert([
      {
        team_id: team_id,
        notification_type: notificationType,
        sent_date: new Date().toISOString(),
      },
    ]);
  if (process.env.SLACK_ADMIN_WEBHOOK_URL && emails.length > 0) {
    sendSlackWebhook(
      `${getNotificationString(notificationType)}: Team ${team_id}, with email ${emails[0].email}. Number of credits used: ${chunk.adjusted_credits_used} | Number of credits in the plan: ${chunk.price_credits}`,
      false,
      process.env.SLACK_ADMIN_WEBHOOK_URL
    ).catch((error) => {
      Logger.debug(`Error sending slack notification: ${error}`);
    });
  }
  if (insertError) {
    Logger.debug(`Error inserting notification record: ${insertError}`);
    return { success: false };
  }
  return { success: true };
 }
--- a/apps/api/src/services/notification/notification_string.ts
+++ b/apps/api/src/services/notification/notification_string.ts
@ -0,0 +1,21 @@
 import { NotificationType } from "../../types";
 // depending on the notification type, return the appropriate string
 export function getNotificationString(
  notificationType: NotificationType
 ): string {
  switch (notificationType) {
    case NotificationType.APPROACHING_LIMIT:
      return "Approaching the limit (80%)";
    case NotificationType.LIMIT_REACHED:
      return "Limit reached (100%)";
    case NotificationType.RATE_LIMIT_REACHED:
      return "Rate limit reached";
    case NotificationType.AUTO_RECHARGE_SUCCESS:
      return "Auto-recharge successful";
    case NotificationType.AUTO_RECHARGE_FAILED:
      return "Auto-recharge failed";
    default:
      return "Unknown notification type";
  }
 }
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -329,7 +329,8 @@ async function processJob(job: Job, token: string) {
        job.id as string,
        data,
        job.data.webhook,
-        job.data.v1
+        job.data.v1,
        job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
      );
    }
    if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
@ -339,7 +340,7 @@ async function processJob(job: Job, token: string) {
        data,
        job.data.webhook,
        job.data.v1,
-        "crawl.page",
+        job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
        true
      );
    }
@ -365,7 +366,7 @@ async function processJob(job: Job, token: string) {
      const sc = (await getCrawl(job.data.crawl_id)) as StoredCrawl;
-      if (!job.data.sitemapped) {
+      if (!job.data.sitemapped && job.data.crawlerOptions !== null) {
        if (!sc.cancelled) {
          const crawler = crawlToCrawler(job.data.crawl_id, sc);
@ -415,8 +416,6 @@ async function processJob(job: Job, token: string) {
      }
      if (await finishCrawl(job.data.crawl_id)) {
        if (!job.data.v1) {
          const jobIDs = await getCrawlJobs(job.data.crawl_id);
@ -439,7 +438,7 @@ async function processJob(job: Job, token: string) {
            docs: [],
            time_taken: (Date.now() - sc.createdAt) / 1000,
            team_id: job.data.team_id,
-            mode: "crawl",
+            mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
            url: sc.originUrl,
            crawlerOptions: sc.crawlerOptions,
            pageOptions: sc.pageOptions,
@ -469,7 +468,7 @@ async function processJob(job: Job, token: string) {
              data,
              job.data.webhook,
              job.data.v1,
-              "crawl.completed"
+              job.data.crawlerOptions !== null ? "crawl.completed" : "batch_scrape.completed"
            );
          }
        } else {
@ -487,7 +486,7 @@ async function processJob(job: Job, token: string) {
              [],
              job.data.webhook,
              job.data.v1,
-              "crawl.completed"
+              job.data.crawlerOptions !== null ? "crawl.completed" : "batch_scrape.completed"
              );
            }
@ -499,8 +498,8 @@ async function processJob(job: Job, token: string) {
            docs: [],
            time_taken: (Date.now() - sc.createdAt) / 1000,
            team_id: job.data.team_id,
-            mode: "crawl",
+            mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
-            url: sc.originUrl,
+            url: sc?.originUrl ?? (job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"),
            crawlerOptions: sc.crawlerOptions,
            pageOptions: sc.pageOptions,
            origin: job.data.origin,
@ -556,7 +555,8 @@ async function processJob(job: Job, token: string) {
        job.data.crawl_id ?? (job.id as string),
        data,
        job.data.webhook,
-        job.data.v1
+        job.data.v1,
        job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
      );
    }
    // if (job.data.v1) {
@ -605,7 +605,7 @@ async function processJob(job: Job, token: string) {
        docs: [],
        time_taken: 0,
        team_id: job.data.team_id,
-        mode: "crawl",
+        mode: job.data.crawlerOptions !== null ? "crawl" : "batch_scrape",
        url: sc ? sc.originUrl : job.data.url,
        crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
        pageOptions: sc ? sc.pageOptions : job.data.pageOptions,
--- a/apps/api/src/types.ts
+++ b/apps/api/src/types.ts
@ -130,6 +130,8 @@ export enum NotificationType {
  APPROACHING_LIMIT = "approachingLimit",
  LIMIT_REACHED = "limitReached",
  RATE_LIMIT_REACHED = "rateLimitReached",
  AUTO_RECHARGE_SUCCESS = "autoRechargeSuccess",
  AUTO_RECHARGE_FAILED = "autoRechargeFailed",
 }
 export type ScrapeLog = {
@ -159,4 +161,4 @@ export type PlanType =
  | "";
-export type WebhookEventType = "crawl.page" | "crawl.started" | "crawl.completed" | "crawl.failed";
+export type WebhookEventType = "crawl.page" | "batch_scrape.page" | "crawl.started" | "crawl.completed" | "batch_scrape.completed" | "crawl.failed";
--- a/apps/api/v1-openapi.json
+++ b/apps/api/v1-openapi.json
@ -6,7 +6,7 @@
    "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
    "contact": {
      "name": "Firecrawl Support",
-      "url": "https://firecrawl.dev",
+      "url": "https://firecrawl.dev/support",
      "email": "support@firecrawl.dev"
    }
  },
@ -97,6 +97,127 @@
                        "description": "The prompt to use for the extraction without a schema (Optional)"
                      }
                    }
                  },
                  "actions": {
                    "type": "array",
                    "description": "Actions to perform on the page before grabbing the content",
                    "items": {
                      "oneOf": [
                        {
                          "type": "object",
                          "title": "Wait",
                          "properties": {
                            "type": {
                              "type": "string",
                              "enum": ["wait"],
                              "description": "Wait for a specified amount of milliseconds"
                            },
                            "milliseconds": {
                              "type": "integer",
                              "minimum": 1,
                              "description": "Number of milliseconds to wait"
                            }
                          },
                          "required": ["type", "milliseconds"]
                        },
                        {
                          "type": "object",
                          "title": "Screenshot",
                          "properties": {
                            "type": {
                              "type": "string",
                              "enum": ["screenshot"],
                              "description": "Take a screenshot"
                            },
                            "fullPage": {
                              "type": "boolean",
                              "description": "Should the screenshot be full-page or viewport sized?",
                              "default": false
                            }
                          },
                          "required": ["type"]
                        },
                        {
                          "type": "object",
                          "title": "Click",
                          "properties": {
                            "type": {
                              "type": "string",
                              "enum": ["click"],
                              "description": "Click on an element"
                            },
                            "selector": {
                              "type": "string",
                              "description": "Query selector to find the element by",
                              "example": "#load-more-button"
                            }
                          },
                          "required": ["type", "selector"]
                        },
                        {
                          "type": "object",
                          "title": "Write text",
                          "properties": {
                            "type": {
                              "type": "string",
                              "enum": ["write"],
                              "description": "Write text into an input field"
                            },
                            "text": {
                              "type": "string",
                              "description": "Text to type",
                              "example": "Hello, world!"
                            },
                            "selector": {
                              "type": "string",
                              "description": "Query selector for the input field",
                              "example": "#search-input"
                            }
                          },
                          "required": ["type", "text", "selector"]
                        },
                        {
                          "type": "object",
                          "title": "Press a key",
                          "description": "Press a key on the page. See https://asawicki.info/nosense/doc/devices/keyboard/key_codes.html for key codes.",
                          "properties": {
                            "type": {
                              "type": "string",
                              "enum": ["press"],
                              "description": "Press a key on the page"
                            },
                            "key": {
                              "type": "string",
                              "description": "Key to press",
                              "example": "Enter"
                            }
                          },
                          "required": ["type", "key"]
                        },
                        {
                          "type": "object",
                          "title": "Scroll",
                          "properties": {
                            "type": {
                              "type": "string",
                              "enum": ["scroll"],
                              "description": "Scroll the page"
                            },
                            "direction": {
                              "type": "string",
                              "enum": ["up", "down"],
                              "description": "Direction to scroll"
                            },
                            "amount": {
                              "type": "integer",
                              "description": "Amount to scroll in pixels",
                              "minimum": 1
                            }
                          },
                          "required": ["type", "direction"]
                        }   
                      ]
                    }
                  }
                },
                "required": ["url"]
@ -341,14 +462,14 @@
                    "items": {
                      "type": "string"
                    },
-                    "description": "URL patterns to exclude"
+                    "description": "Specifies URL patterns to exclude from the crawl by comparing website paths against the provided regex patterns. For example, if you set \"excludePaths\": [\"blog/*\"] for the base URL firecrawl.dev, any results matching that pattern will be excluded, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."
                  },
                  "includePaths": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
-                    "description": "URL patterns to include"
+                    "description": "Specifies URL patterns to include in the crawl by comparing website paths against the provided regex patterns. Only the paths that match the specified patterns will be included in the response. For example, if you set \"includePaths\": [\"blog/*\"] for the base URL firecrawl.dev, only results matching that pattern will be included, such as https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap."
                  },
                  "maxDepth": {
                    "type": "integer",
@ -362,7 +483,7 @@
                  },
                  "limit": {
                    "type": "integer",
-                    "description": "Maximum number of pages to crawl",
+                    "description": "Maximum number of pages to crawl. Default limit is 10000.",
                    "default": 10
                  },
                  "allowBackwardLinks": {
@ -513,7 +634,7 @@
                    },
                    "search": {
                      "type": "string",
-                      "description": "Search query to use for mapping. During the Alpha phase, the 'smart' part of the search functionality is limited to 100 search results. However, if map finds more results, there is no limit applied."
+                      "description": "Search query to use for mapping. During the Alpha phase, the 'smart' part of the search functionality is limited to 1000 search results. However, if map finds more results, there is no limit applied."
                    },
                    "ignoreSitemap": {
                      "type": "boolean",
@ -642,6 +763,21 @@
                },
                "description": "List of links on the page if `links` is in `formats`"
              },
              "actions": {
                "type": "object",
                "nullable": true,
                "description": "Results of the actions specified in the `actions` parameter. Only present if the `actions` parameter was provided in the request",
                "properties": {
                  "screenshots": {
                    "type": "array",
                    "description": "Screenshot URLs, in the same order as the screenshot actions provided.",
                    "items": {
                      "type": "string",
                      "format": "url"
                    }
                  }
                }
              },  
              "metadata": {
                "type": "object",
                "properties": {
--- a/apps/js-sdk/firecrawl/README.md
+++ b/apps/js-sdk/firecrawl/README.md
@ -145,6 +145,46 @@ watch.addEventListener("done", state => {
 });
 ```
 ### Batch scraping multiple URLs
 To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
 ```js
 const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], {
  formats: ['markdown', 'html'],
 })
 ```
 #### Asynchronous batch scrape
 To initiate an asynchronous batch scrape, utilize the `asyncBatchScrapeUrls` method. This method requires the starting URLs and optional parameters as inputs. The params argument enables you to define various settings for the scrape, such as the output formats. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the batch scrape.
 ```js
 const asyncBatchScrapeResult = await app.asyncBatchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
 ```
 #### Batch scrape with WebSockets
 To use batch scrape with WebSockets, use the `batchScrapeUrlsAndWatch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
 ```js
 // Batch scrape multiple URLs with WebSockets:
 const watch = await app.batchScrapeUrlsAndWatch(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
 watch.addEventListener("document", doc => {
 console.log("DOC", doc.detail);
 });
 watch.addEventListener("error", err => {
 console.error("ERR", err.detail.error);
 });
 watch.addEventListener("done", state => {
 console.log("DONE", state.detail.status);
 });
 ```
 ## Error Handling
 The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. The examples above demonstrate how to handle these errors using `try/catch` blocks.
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@ -1,6 +1,6 @@
 {
-  "name": "firecrawl",
+  "name": "@mendable/firecrawl-js",
-  "version": "1.6.1",
+  "version": "1.7.2",
  "description": "JavaScript SDK for Firecrawl API",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@ -82,6 +82,10 @@ export interface CrawlScrapeOptions {
  onlyMainContent?: boolean;
  waitFor?: number;
  timeout?: number;
  location?: {
    country?: string;
    languages?: string[];
  };
 }
 export type Action = {
@ -154,6 +158,17 @@ export interface CrawlResponse {
  error?: string;
 }
 /**
 * Response interface for batch scrape operations.
 * Defines the structure of the response received after initiating a crawl.
 */
 export interface BatchScrapeResponse {
  id?: string;
  url?: string;
  success: true;
  error?: string;
 }
 /**
 * Response interface for job status checks.
 * Provides detailed status of a crawl job including progress and results.
@ -169,6 +184,21 @@ export interface CrawlStatusResponse {
  data: FirecrawlDocument<undefined>[];
 };
 /**
 * Response interface for batch scrape job status checks.
 * Provides detailed status of a batch scrape job including progress and results.
 */
 export interface BatchScrapeStatusResponse {
  success: true;
  status: "scraping" | "completed" | "failed" | "cancelled";
  completed: number;
  total: number;
  creditsUsed: number;
  expiresAt: Date;
  next?: string;
  data: FirecrawlDocument<undefined>[];
 };
 /**
 * Parameters for mapping operations.
 * Defines options for mapping URLs during a crawl.
@ -493,6 +523,144 @@ export default class FirecrawlApp {
    return { success: false, error: "Internal server error." };
  }
  /**
   * Initiates a batch scrape job for multiple URLs using the Firecrawl API.
   * @param url - The URLs to scrape.
   * @param params - Additional parameters for the scrape request.
   * @param pollInterval - Time in seconds for job status checks.
   * @param idempotencyKey - Optional idempotency key for the request.
   * @returns The response from the crawl operation.
   */
  async batchScrapeUrls(
    urls: string[],
    params?: ScrapeParams,
    pollInterval: number = 2,
    idempotencyKey?: string
  ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
    const headers = this.prepareHeaders(idempotencyKey);
    let jsonData: any = { urls, ...(params ?? {}) };
    try {
      const response: AxiosResponse = await this.postRequest(
        this.apiUrl + `/v1/batch/scrape`,
        jsonData,
        headers
      );
      if (response.status === 200) {
        const id: string = response.data.id;
        return this.monitorJobStatus(id, headers, pollInterval);
      } else {
        this.handleError(response, "start batch scrape job");
      }
    } catch (error: any) {
      if (error.response?.data?.error) {
        throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
      } else {
        throw new FirecrawlError(error.message, 500);
      }
    }
    return { success: false, error: "Internal server error." };
  }
  async asyncBatchScrapeUrls(
    urls: string[],
    params?: ScrapeParams,
    idempotencyKey?: string
  ): Promise<BatchScrapeResponse | ErrorResponse> {
    const headers = this.prepareHeaders(idempotencyKey);
    let jsonData: any = { urls, ...(params ?? {}) };
    try {
      const response: AxiosResponse = await this.postRequest(
        this.apiUrl + `/v1/batch/scrape`,
        jsonData,
        headers
      );
      if (response.status === 200) {
        return response.data;
      } else {
        this.handleError(response, "start batch scrape job");
      }
    } catch (error: any) {
      if (error.response?.data?.error) {
        throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
      } else {
        throw new FirecrawlError(error.message, 500);
      }
    }
    return { success: false, error: "Internal server error." };
  }
  /**
   * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
   * @param urls - The URL to scrape.
   * @param params - Additional parameters for the scrape request.
   * @param idempotencyKey - Optional idempotency key for the request.
   * @returns A CrawlWatcher instance to monitor the crawl job.
   */
  async batchScrapeUrlsAndWatch(
    urls: string[],
    params?: ScrapeParams,
    idempotencyKey?: string,
  ) {
    const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
    if (crawl.success && crawl.id) {
      const id = crawl.id;
      return new CrawlWatcher(id, this);
    }
    throw new FirecrawlError("Batch scrape job failed to start", 400);
  }
  /**
   * Checks the status of a batch scrape job using the Firecrawl API.
   * @param id - The ID of the batch scrape operation.
   * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
   * @returns The response containing the job status.
   */
  async checkBatchScrapeStatus(id?: string, getAllData = false): Promise<BatchScrapeStatusResponse | ErrorResponse> {
    if (!id) {
      throw new FirecrawlError("No batch scrape ID provided", 400);
    }
    const headers: AxiosRequestHeaders = this.prepareHeaders();
    try {
      const response: AxiosResponse = await this.getRequest(
        `${this.apiUrl}/v1/batch/scrape/${id}`,
        headers
      );
      if (response.status === 200) {
        let allData = response.data.data;
        if (getAllData && response.data.status === "completed") {
          let statusData = response.data
          if ("data" in statusData) {
            let data = statusData.data;
            while ('next' in statusData) {
              statusData = (await this.getRequest(statusData.next, headers)).data;
              data = data.concat(statusData.data);
            }
            allData = data;
          }
        }
        return ({
          success: response.data.success,
          status: response.data.status,
          total: response.data.total,
          completed: response.data.completed,
          creditsUsed: response.data.creditsUsed,
          expiresAt: new Date(response.data.expiresAt),
          next: response.data.next,
          data: allData,
          error: response.data.error,
        })
      } else {
        this.handleError(response, "check batch scrape status");
      }
    } catch (error: any) {
      throw new FirecrawlError(error.message, 500);
    }
    return { success: false, error: "Internal server error." };
  }
  /**
   * Prepares the headers for an API request.
   * @param idempotencyKey - Optional key to ensure idempotency.
--- a/apps/js-sdk/package-lock.json
+++ b/apps/js-sdk/package-lock.json
@ -9,7 +9,7 @@
      "version": "1.0.0",
      "license": "ISC",
      "dependencies": {
-        "@mendable/firecrawl-js": "^1.0.3",
+        "@mendable/firecrawl-js": "^1.7.0-beta.2",
        "axios": "^1.6.8",
        "firecrawl": "^1.2.0",
        "ts-node": "^10.9.2",
@ -423,31 +423,17 @@
      }
    },
    "node_modules/@mendable/firecrawl-js": {
-      "version": "1.2.2",
+      "version": "1.7.0-beta.2",
-      "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-1.2.2.tgz",
+      "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-1.7.0-beta.2.tgz",
-      "integrity": "sha512-2A1GzLD0bczlFIlcjxHcm/x8i76ndtV4EUzOfc81oOJ/HbycE2mbT6EUthoL+r4s5A8yO3bKr9o/GxmEn456VA==",
+      "integrity": "sha512-6L5r6BOuMPjLgSDq85xs2IpVgX9Tb/EdesKZvmtFucoaFZzIsgCQb0ZfSvwaRmqTkj53o+7eSgCcm+gsnR/yeQ==",
      "dependencies": {
        "axios": "^1.6.8",
        "dotenv": "^16.4.5",
        "isows": "^1.0.4",
        "typescript-event-target": "^1.1.1",
        "uuid": "^9.0.1",
        "zod": "^3.23.8",
        "zod-to-json-schema": "^3.23.0"
      }
    },
    "node_modules/@mendable/firecrawl-js/node_modules/uuid": {
      "version": "9.0.1",
      "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
      "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
      "funding": [
        "https://github.com/sponsors/broofa",
        "https://github.com/sponsors/ctavan"
      ],
      "bin": {
        "uuid": "dist/bin/uuid"
      }
    },
    "node_modules/@tsconfig/node10": {
      "version": "1.0.11",
      "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz",
--- a/apps/js-sdk/package.json
+++ b/apps/js-sdk/package.json
@ -11,7 +11,7 @@
  "author": "",
  "license": "ISC",
  "dependencies": {
-    "@mendable/firecrawl-js": "^1.0.3",
+    "@mendable/firecrawl-js": "1.7.1",
    "axios": "^1.6.8",
    "firecrawl": "^1.2.0",
    "ts-node": "^10.9.2",
--- a/apps/python-sdk/README.md
+++ b/apps/python-sdk/README.md
@ -36,7 +36,6 @@ crawl_status = app.crawl_url(
    'limit': 100, 
    'scrapeOptions': {'formats': ['markdown', 'html']}
  }, 
  wait_until_done=True, 
  poll_interval=30
 )
 print(crawl_status)
@ -150,6 +149,69 @@ async def start_crawl_and_watch():
 await start_crawl_and_watch()
 ```
 ### Scraping multiple URLs in batch
 To batch scrape multiple URLs, use the `batch_scrape_urls` method. It takes the URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the scraper such as the output formats.
 ```python
 idempotency_key = str(uuid.uuid4()) # optional idempotency key
 batch_scrape_result = app.batch_scrape_urls(['firecrawl.dev', 'mendable.ai'], {'formats': ['markdown', 'html']}, 2, idempotency_key)
 print(batch_scrape_result)
 ```
 ### Asynchronous batch scrape
 To run a batch scrape asynchronously, use the `async_batch_scrape_urls` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the scraper, such as the output formats.
 ```python
 batch_scrape_result = app.async_batch_scrape_urls(['firecrawl.dev', 'mendable.ai'], {'formats': ['markdown', 'html']})
 print(batch_scrape_result)
 ```
 ### Checking batch scrape status
 To check the status of an asynchronous batch scrape job, use the `check_batch_scrape_status` method. It takes the job ID as a parameter and returns the current status of the batch scrape job.
 ```python
 id = batch_scrape_result['id']
 status = app.check_batch_scrape_status(id)
 ```
 ### Batch scrape with WebSockets
 To use batch scrape with WebSockets, use the `batch_scrape_urls_and_watch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the scraper, such as the output formats.
 ```python
 # inside an async function...
 nest_asyncio.apply()
 # Define event handlers
 def on_document(detail):
    print("DOC", detail)
 def on_error(detail):
    print("ERR", detail['error'])
 def on_done(detail):
    print("DONE", detail['status'])
 # Function to start the crawl and watch process
 async def start_crawl_and_watch():
    # Initiate the crawl job and get the watcher
    watcher = app.batch_scrape_urls_and_watch(['firecrawl.dev', 'mendable.ai'], {'formats': ['markdown', 'html']})
    # Add event listeners
    watcher.add_event_listener("document", on_document)
    watcher.add_event_listener("error", on_error)
    watcher.add_event_listener("done", on_done)
    # Start the watcher
    await watcher.connect()
 # Run the event loop
 await start_crawl_and_watch()
 ```
 ## Error Handling
 The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@ -9,6 +9,23 @@ app = FirecrawlApp(api_key="fc-")
 scrape_result = app.scrape_url('firecrawl.dev')
 print(scrape_result['markdown'])
 # Test batch scrape
 urls = ['https://example.com', 'https://docs.firecrawl.dev']
 batch_scrape_params = {
    'formats': ['markdown', 'html'],
 }
 # Synchronous batch scrape
 batch_result = app.batch_scrape_urls(urls, batch_scrape_params)
 print("Synchronous Batch Scrape Result:")
 print(batch_result['data'][0]['markdown'])
 # Asynchronous batch scrape
 async_batch_result = app.async_batch_scrape_urls(urls, batch_scrape_params)
 print("\nAsynchronous Batch Scrape Result:")
 print(async_batch_result)
 # Crawl a website:
 idempotency_key = str(uuid.uuid4()) # optional idempotency key
 crawl_result = app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key)
--- a/apps/python-sdk/firecrawl/init.py
+++ b/apps/python-sdk/firecrawl/init.py
@ -13,7 +13,7 @@ import os
 from .firecrawl import FirecrawlApp
-__version__ = "1.3.0"
+__version__ = "1.4.0"
 # Define the logger for the Firecrawl project
 logger: logging.Logger = logging.getLogger("firecrawl")
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@ -81,8 +81,10 @@ class FirecrawlApp:
            response = response.json()
            if response['success'] and 'data' in response:
                return response['data']
-            else:
+            elif "error" in response:
                raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
            else:
                raise Exception(f'Failed to scrape URL. Error: {response}')
        else:
            self._handle_error(response, 'scrape URL')
@ -117,7 +119,14 @@ class FirecrawlApp:
            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
        Returns:
-            Any: The crawl job ID or the crawl results if waiting until completion.
+            Dict[str, Any]: A dictionary containing the crawl results. The structure includes:
                - 'success' (bool): Indicates if the crawl was successful.
                - 'status' (str): The final status of the crawl job (e.g., 'completed').
                - 'completed' (int): Number of scraped pages that completed.
                - 'total' (int): Total number of scraped pages.
                - 'creditsUsed' (int): Estimated number of API credits used for this crawl.
                - 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the crawl data expires.
                - 'data' (List[Dict]): List of all the scraped pages.
        Raises:
            Exception: If the crawl job initiation or monitoring fails.
@ -146,7 +155,10 @@ class FirecrawlApp:
            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
        Returns:
-            Dict[str, Any]: The response from the crawl initiation request.
+            Dict[str, Any]: A dictionary containing the crawl initiation response. The structure includes:
                - 'success' (bool): Indicates if the crawl initiation was successful.
                - 'id' (str): The unique identifier for the crawl job.
                - 'url' (str): The URL to check the status of the crawl job.
        """
        endpoint = f'/v1/crawl'
        headers = self._prepare_headers(idempotency_key)
@ -236,7 +248,7 @@ class FirecrawlApp:
            params (Optional[Dict[str, Any]]): Additional parameters for the map search.
        Returns:
-            Any: The result of the map search, typically a dictionary containing mapping data.
+            List[str]: A list of URLs discovered during the map search.
        """
        endpoint = f'/v1/map'
        headers = self._prepare_headers()
@ -256,11 +268,130 @@ class FirecrawlApp:
            response = response.json()
            if response['success'] and 'links' in response:
                return response
-            else:
+            elif 'error' in response:
                raise Exception(f'Failed to map URL. Error: {response["error"]}')
            else:
                raise Exception(f'Failed to map URL. Error: {response}')
        else:
            self._handle_error(response, 'map')
    def batch_scrape_urls(self, urls: list[str],
                  params: Optional[Dict[str, Any]] = None,
                  poll_interval: Optional[int] = 2,
                  idempotency_key: Optional[str] = None) -> Any:
        """
        Initiate a batch scrape job for the specified URLs using the Firecrawl API.
        Args:
            urls (list[str]): The URLs to scrape.
            params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
            poll_interval (Optional[int]): Time in seconds between status checks when waiting for job completion. Defaults to 2 seconds.
            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
        Returns:
            Dict[str, Any]: A dictionary containing the scrape results. The structure includes:
                - 'success' (bool): Indicates if the batch scrape was successful.
                - 'status' (str): The final status of the batch scrape job (e.g., 'completed').
                - 'completed' (int): Number of scraped pages that completed.
                - 'total' (int): Total number of scraped pages.
                - 'creditsUsed' (int): Estimated number of API credits used for this batch scrape.
                - 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the batch scrape data expires.
                - 'data' (List[Dict]): List of all the scraped pages.
        Raises:
            Exception: If the batch scrape job initiation or monitoring fails.
        """
        endpoint = f'/v1/batch/scrape'
        headers = self._prepare_headers(idempotency_key)
        json_data = {'urls': urls}
        if params:
            json_data.update(params)
        response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
        if response.status_code == 200:
            id = response.json().get('id')
            return self._monitor_job_status(id, headers, poll_interval)
        else:
            self._handle_error(response, 'start batch scrape job')
    def async_batch_scrape_urls(self, urls: list[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
        """
        Initiate a crawl job asynchronously.
        Args:
            urls (list[str]): The URLs to scrape.
            params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
        Returns:
            Dict[str, Any]: A dictionary containing the batch scrape initiation response. The structure includes:
                - 'success' (bool): Indicates if the batch scrape initiation was successful.
                - 'id' (str): The unique identifier for the batch scrape job.
                - 'url' (str): The URL to check the status of the batch scrape job.
        """
        endpoint = f'/v1/batch/scrape'
        headers = self._prepare_headers(idempotency_key)
        json_data = {'urls': urls}
        if params:
            json_data.update(params)
        response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
        if response.status_code == 200:
            return response.json()
        else:
            self._handle_error(response, 'start batch scrape job')
    def batch_scrape_urls_and_watch(self, urls: list[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
        """
        Initiate a batch scrape job and return a CrawlWatcher to monitor the job via WebSocket.
        Args:
            urls (list[str]): The URLs to scrape.
            params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
        Returns:
            CrawlWatcher: An instance of CrawlWatcher to monitor the batch scrape job.
        """
        crawl_response = self.async_batch_scrape_urls(urls, params, idempotency_key)
        if crawl_response['success'] and 'id' in crawl_response:
            return CrawlWatcher(crawl_response['id'], self)
        else:
            raise Exception("Batch scrape job failed to start")
    def check_batch_scrape_status(self, id: str) -> Any:
        """
        Check the status of a batch scrape job using the Firecrawl API.
        Args:
            id (str): The ID of the batch scrape job.
        Returns:
            Any: The status of the batch scrape job.
        Raises:
            Exception: If the status check request fails.
        """
        endpoint = f'/v1/batch/scrape/{id}'
        headers = self._prepare_headers()
        response = self._get_request(f'{self.api_url}{endpoint}', headers)
        if response.status_code == 200:
            data = response.json()
            return {
                'success': True,
                'status': data.get('status'),
                'total': data.get('total'),
                'completed': data.get('completed'),
                'creditsUsed': data.get('creditsUsed'),
                'expiresAt': data.get('expiresAt'),
                'next': data.get('next'),
                'data': data.get('data'),
                'error': data.get('error')
            }
        else:
            self._handle_error(response, 'check batch scrape status')
    def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
        """
        Prepare the headers for API requests.
--- a/examples/claude_web_crawler/claude_web_crawler.py
+++ b/examples/claude_web_crawler/claude_web_crawler.py
@ -0,0 +1,166 @@
 import os
 from firecrawl import FirecrawlApp
 import json
 from dotenv import load_dotenv
 import anthropic
 import agentops
 # ANSI color codes
 class Colors:
    CYAN = '\033[96m'
    YELLOW = '\033[93m'
    GREEN = '\033[92m'
    RED = '\033[91m'
    MAGENTA = '\033[95m'
    BLUE = '\033[94m'
    RESET = '\033[0m'
 # Load environment variables
 load_dotenv()
 # Retrieve API keys from environment variables
 firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
 anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 # Initialize the FirecrawlApp and OpenAI client
 app = FirecrawlApp(api_key=firecrawl_api_key)
 client = anthropic.Anthropic(api_key=anthropic_api_key)
 # Find the page that most likely contains the objective
 def find_relevant_page_via_map(objective, url, app, client):
    try:
        print(f"{Colors.CYAN}Understood. The objective is: {objective}{Colors.RESET}")
        print(f"{Colors.CYAN}Initiating search on the website: {url}{Colors.RESET}")
        map_prompt = f"""
        The map function generates a list of URLs from a website and it accepts a search parameter. Based on the objective of: {objective}, come up with a 1-2 word search parameter that will help us find the information we need. Only respond with 1-2 words nothing else.
        """
        print(f"{Colors.YELLOW}Analyzing objective to determine optimal search parameter...{Colors.RESET}")
        completion = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            temperature=0,
            system="You are an expert web crawler. Respond with the best search parameter.",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": map_prompt
                        }
                    ]
                }
            ]
        )
        map_search_parameter = completion.content[0].text
        print(f"{Colors.GREEN}Optimal search parameter identified: {map_search_parameter}{Colors.RESET}")
        print(f"{Colors.YELLOW}Mapping website using the identified search parameter...{Colors.RESET}")
        map_website = app.map_url(url, params={"search": map_search_parameter})
        print(f"{Colors.GREEN}Website mapping completed successfully.{Colors.RESET}")
        print(f"{Colors.GREEN}Located {len(map_website['links'])} relevant links.{Colors.RESET}")
        return map_website['links']
    except Exception as e:
        print(f"{Colors.RED}Error encountered during relevant page identification: {str(e)}{Colors.RESET}")
        return None
 # Scrape the top 3 pages and see if the objective is met, if so return in json format else return None
 def find_objective_in_top_pages(map_website, objective, app, client):
    try:
        # Get top 2 links from the map result
        top_links = map_website[:2]
        print(f"{Colors.CYAN}Proceeding to analyze top {len(top_links)} links: {top_links}{Colors.RESET}")
        # Scrape the pages in batch
        batch_scrape_result = app.batch_scrape_urls(top_links, {'formats': ['markdown']})
        print(f"{Colors.GREEN}Batch page scraping completed successfully.{Colors.RESET}")
        for scrape_result in batch_scrape_result['data']:
            # Check if objective is met
            check_prompt = f"""
            Given the following scraped content and objective, determine if the objective is met.
            If it is, extract the relevant information in a simple and concise JSON format. Use only the necessary fields and avoid nested structures if possible.
            If the objective is not met with confidence, respond with 'Objective not met'.
            Objective: {objective}
            Scraped content: {scrape_result['markdown']}
            Remember:
            1. Only return JSON if you are confident the objective is fully met.
            2. Keep the JSON structure as simple and flat as possible.
            3. Do not include any explanations or markdown formatting in your response.
            """
            completion = client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=1000,
                temperature=0,
                system="You are an expert web crawler. Respond with the relevant information in JSON format.",
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": check_prompt
                            }
                        ]
                    }
                ]
            )
            result = completion.content[0].text
            if result != "Objective not met":
                print(f"{Colors.GREEN}Objective potentially fulfilled. Relevant information identified.{Colors.RESET}")
                try:
                    return json.loads(result)
                except json.JSONDecodeError:
                    print(f"{Colors.RED}Error in parsing response. Proceeding to next page...{Colors.RESET}")
            else:
                print(f"{Colors.YELLOW}Objective not met on this page. Proceeding to next link...{Colors.RESET}")
        print(f"{Colors.RED}All available pages analyzed. Objective not fulfilled in examined content.{Colors.RESET}")
        return None
    except Exception as e:
        print(f"{Colors.RED}Error encountered during page analysis: {str(e)}{Colors.RESET}")
        return None
 # Main function to execute the process
 def main():
    # Get user input
    url = input(f"{Colors.BLUE}Enter the website to crawl: {Colors.RESET}")
    if not url.strip():
        url = "https://www.firecrawl.dev/"
    objective = input(f"{Colors.BLUE}Enter your objective: {Colors.RESET}")
    if not objective.strip():
        objective = "find me the pricing plans"
    print(f"{Colors.YELLOW}Initiating web crawling process...{Colors.RESET}")
    # Find the relevant page
    map_website = find_relevant_page_via_map(objective, url, app, client)
    print(map_website)
    if map_website:
        print(f"{Colors.GREEN}Relevant pages identified. Proceeding with detailed analysis...{Colors.RESET}")
        # Find objective in top pages
        result = find_objective_in_top_pages(map_website, objective, app, client)
        if result:
            print(f"{Colors.GREEN}Objective successfully fulfilled. Extracted information:{Colors.RESET}")
            print(f"{Colors.MAGENTA}{json.dumps(result, indent=2)}{Colors.RESET}")
        else:
            print(f"{Colors.RED}Unable to fulfill the objective with the available content.{Colors.RESET}")
    else:
        print(f"{Colors.RED}No relevant pages identified. Consider refining the search parameters or trying a different website.{Colors.RESET}")
 if __name__ == "__main__":
    agentops.init(os.getenv("AGENTOPS_API_KEY"))
    main()
--- a/examples/grok_web_crawler/grok_web_crawler.py
+++ b/examples/grok_web_crawler/grok_web_crawler.py
@ -0,0 +1,150 @@
 import os
 from firecrawl import FirecrawlApp
 import json
 from dotenv import load_dotenv
 import requests
 # ANSI color codes
 class Colors:
    CYAN = '\033[96m'
    YELLOW = '\033[93m'
    GREEN = '\033[92m'
    RED = '\033[91m'
    MAGENTA = '\033[95m'
    BLUE = '\033[94m'
    RESET = '\033[0m'
 # Load environment variables
 load_dotenv()
 # Retrieve API keys from environment variables
 firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
 grok_api_key = os.getenv("GROK_API_KEY")
 # Initialize the FirecrawlApp
 app = FirecrawlApp(api_key=firecrawl_api_key)
 # Function to make Grok API calls
 def grok_completion(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {grok_api_key}"
    }
    data = {
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful assistant."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        "model": "grok-beta",
        "stream": False,
        "temperature": 0
    }
    response = requests.post(url, headers=headers, json=data)
    return response.json()['choices'][0]['message']['content']
 # Find the page that most likely contains the objective
 def find_relevant_page_via_map(objective, url, app):
    try:
        print(f"{Colors.CYAN}Understood. The objective is: {objective}{Colors.RESET}")
        print(f"{Colors.CYAN}Initiating search on the website: {url}{Colors.RESET}")
        map_prompt = f"""
        The map function generates a list of URLs from a website and it accepts a search parameter. Based on the objective of: {objective}, come up with a 1-2 word search parameter that will help us find the information we need. Only respond with 1-2 words nothing else.
        """
        print(f"{Colors.YELLOW}Analyzing objective to determine optimal search parameter...{Colors.RESET}")
        map_search_parameter = grok_completion(map_prompt)
        print(f"{Colors.GREEN}Optimal search parameter identified: {map_search_parameter}{Colors.RESET}")
        print(f"{Colors.YELLOW}Mapping website using the identified search parameter...{Colors.RESET}")
        print(f"{Colors.MAGENTA}{map_search_parameter}{Colors.RESET}")
        map_website = app.map_url(url, params={"search": map_search_parameter})
        print(f"{Colors.GREEN}Website mapping completed successfully.{Colors.RESET}")
        print(f"{Colors.GREEN}Located {len(map_website['links'])} relevant links.{Colors.RESET}")
        print(f"{Colors.MAGENTA}{map_website}{Colors.RESET}")
        return map_website["links"]
    except Exception as e:
        print(f"{Colors.RED}Error encountered during relevant page identification: {str(e)}{Colors.RESET}")
        return None
 # Scrape the top 3 pages and see if the objective is met, if so return in json format else return None
 def find_objective_in_top_pages(map_website, objective, app):
    try:
        print(f"{Colors.MAGENTA}{map_website}{Colors.RESET}")
        # Get top 3 links from the map result
        top_links = map_website[:3] if isinstance(map_website, list) else []
        print(f"{Colors.CYAN}Proceeding to analyze top {len(top_links)} links: {top_links}{Colors.RESET}")
        for link in top_links:
            print(f"{Colors.YELLOW}Initiating scrape of page: {link}{Colors.RESET}")
            # Scrape the page
            scrape_result = app.scrape_url(link, params={'formats': ['markdown']})
            print(f"{Colors.GREEN}Page scraping completed successfully.{Colors.RESET}")
            # Check if objective is met
            check_prompt = f"""
            Given the following scraped content and objective, determine if the objective is met.
            If it is, extract the relevant information in a simple and concise JSON format. Use only the necessary fields and avoid nested structures if possible.
            If the objective is not met with confidence, respond with 'Objective not met'.
            Objective: {objective}
            Scraped content: {scrape_result['markdown']}
            Remember:
            1. Only return JSON if you are confident the objective is fully met.
            2. Keep the JSON structure as simple and flat as possible.
            3. Do not include any explanations or markdown formatting in your response.
            """
            result = grok_completion(check_prompt)
            print(f"{Colors.MAGENTA}{result}{Colors.RESET}")
            if result != "Objective not met":
                print(f"{Colors.GREEN}Objective potentially fulfilled. Relevant information identified.{Colors.RESET}")
                try:
                    result = result.replace("```json", "").replace("```", "")
                    return json.loads(result)
                except json.JSONDecodeError:
                    print(f"{Colors.RED}Error in parsing response. Proceeding to next page...{Colors.RESET}")
            else:
                print(f"{Colors.YELLOW}Objective not met on this page. Proceeding to next link...{Colors.RESET}")
        print(f"{Colors.RED}All available pages analyzed. Objective not fulfilled in examined content.{Colors.RESET}")
        return None
    except Exception as e:
        print(f"{Colors.RED}Error encountered during page analysis: {str(e)}{Colors.RESET}")
        return None
 # Main function to execute the process
 def main():
    # Get user input
    url = input(f"{Colors.BLUE}Enter the website to crawl: {Colors.RESET}")
    objective = input(f"{Colors.BLUE}Enter your objective: {Colors.RESET}")
    print(f"{Colors.YELLOW}Initiating web crawling process...{Colors.RESET}")
    # Find the relevant page
    map_website = find_relevant_page_via_map(objective, url, app)
    if map_website:
        print(f"{Colors.GREEN}Relevant pages identified. Proceeding with detailed analysis...{Colors.RESET}")
        # Find objective in top pages
        result = find_objective_in_top_pages(map_website, objective, app)
        if result:
            print(f"{Colors.GREEN}Objective successfully fulfilled. Extracted information:{Colors.RESET}")
            print(f"{Colors.MAGENTA}{json.dumps(result, indent=2)}{Colors.RESET}")
        else:
            print(f"{Colors.RED}Unable to fulfill the objective with the available content.{Colors.RESET}")
    else:
        print(f"{Colors.RED}No relevant pages identified. Consider refining the search parameters or trying a different website.{Colors.RESET}")
 if __name__ == "__main__":
    main()
--- a/examples/openai_swarm_firecrawl/.env.example
+++ b/examples/openai_swarm_firecrawl/.env.example
@ -0,0 +1,2 @@
 OPENAI_API_KEY=
 FIRECRAWL_API_KEY=
--- a/examples/openai_swarm_firecrawl/README.md
+++ b/examples/openai_swarm_firecrawl/README.md
@ -0,0 +1,37 @@
 # Swarm Firecrawl Marketing Agent
 A multi-agent system using [OpenAI Swarm](https://github.com/openai/swarm) for AI-powered marketing strategies using [Firecrawl](https://firecrawl.dev) for web scraping.
 ## Agents
 1. User Interface: Manages user interactions
 2. Website Scraper: Extracts clean LLM-ready content via Firecrawl API
 3. Analyst: Provides marketing insights
 4. Campaign Idea: Generates marketing campaign concepts
 5. Copywriter: Creates compelling marketing copy
 ## Requirements
 - [Firecrawl](https://firecrawl.dev) API key
 - [OpenAI](https://platform.openai.com/api-keys) API key
 ## Setup
 1. Install the required packages:
   ```
   pip install -r requirements.txt
   ```
 2. Set up your environment variables in a `.env` file:
   ```
   OPENAI_API_KEY=your_openai_api_key
   FIRECRAWL_API_KEY=your_firecrawl_api_key
   ```
 ## Usage
 Run the main script to start the interactive demo:
 ```
 python main.py
 ```
--- a/examples/openai_swarm_firecrawl/main.py
+++ b/examples/openai_swarm_firecrawl/main.py
@ -0,0 +1,108 @@
 import os
 from firecrawl import FirecrawlApp
 from swarm import Agent
 from swarm.repl import run_demo_loop
 import dotenv
 from openai import OpenAI
 dotenv.load_dotenv()
 # Initialize FirecrawlApp and OpenAI
 app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 def scrape_website(url):
    """Scrape a website using Firecrawl."""
    scrape_status = app.scrape_url(
        url,
        params={'formats': ['markdown']}
    )
    return scrape_status
 def generate_completion(role, task, content):
    """Generate a completion using OpenAI."""
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": f"You are a {role}. {task}"},
            {"role": "user", "content": content}
        ]
    )
    return response.choices[0].message.content
 def analyze_website_content(content):
    """Analyze the scraped website content using OpenAI."""
    analysis = generate_completion(
        "marketing analyst",
        "Analyze the following website content and provide key insights for marketing strategy.",
        content
    )
    return {"analysis": analysis}
 def generate_copy(brief):
    """Generate marketing copy based on a brief using OpenAI."""
    copy = generate_completion(
        "copywriter",
        "Create compelling marketing copy based on the following brief.",
        brief
    )
    return {"copy": copy}
 def create_campaign_idea(target_audience, goals):
    """Create a campaign idea based on target audience and goals using OpenAI."""
    campaign_idea = generate_completion(
        "marketing strategist",
        "Create an innovative campaign idea based on the target audience and goals provided.",
        f"Target Audience: {target_audience}\nGoals: {goals}"
    )
    return {"campaign_idea": campaign_idea}
 def handoff_to_copywriter():
    """Hand off the campaign idea to the copywriter agent."""
    return copywriter_agent
 def handoff_to_analyst():
    """Hand off the website content to the analyst agent."""
    return analyst_agent
 def handoff_to_campaign_idea():
    """Hand off the target audience and goals to the campaign idea agent."""
    return campaign_idea_agent
 def handoff_to_website_scraper():
    """Hand off the url to the website scraper agent."""
    return website_scraper_agent
 user_interface_agent = Agent(
    name="User Interface Agent",
    instructions="You are a user interface agent that handles all interactions with the user. You need to always start with a URL that the user wants to create a marketing strategy for. Ask clarification questions if needed. Be concise.",
    functions=[handoff_to_website_scraper],
 )
 website_scraper_agent = Agent(
    name="Website Scraper Agent",
    instructions="You are a website scraper agent specialized in scraping website content.",
    functions=[scrape_website, handoff_to_analyst],
 )
 analyst_agent = Agent(
    name="Analyst Agent",
    instructions="You are an analyst agent that examines website content and provides insights for marketing strategies. Be concise.",
    functions=[analyze_website_content, handoff_to_campaign_idea],
 )
 campaign_idea_agent = Agent(
    name="Campaign Idea Agent",
    instructions="You are a campaign idea agent that creates innovative marketing campaign ideas based on website content and target audience. Be concise.",
    functions=[create_campaign_idea, handoff_to_copywriter],
 )
 copywriter_agent = Agent(
    name="Copywriter Agent",
    instructions="You are a copywriter agent specialized in creating compelling marketing copy based on website content and campaign ideas. Be concise.",
    functions=[generate_copy],
 )
 if __name__ == "__main__":
    # Run the demo loop with the user interface agent
    run_demo_loop(user_interface_agent, stream=True)
--- a/examples/openai_swarm_firecrawl/requirements.txt
+++ b/examples/openai_swarm_firecrawl/requirements.txt
@ -0,0 +1,2 @@
 firecrawl-py
 openai
--- a/examples/openai_swarm_firecrawl_web_extractor/.env.example
+++ b/examples/openai_swarm_firecrawl_web_extractor/.env.example
@ -0,0 +1,3 @@
 OPENAI_API_KEY=
 FIRECRAWL_API_KEY=
 SERP_API_KEY=
--- a/examples/openai_swarm_firecrawl_web_extractor/main.py
+++ b/examples/openai_swarm_firecrawl_web_extractor/main.py
@ -0,0 +1,120 @@
 import os
 from firecrawl import FirecrawlApp
 from swarm import Agent
 from swarm.repl import run_demo_loop
 import dotenv
 from serpapi import GoogleSearch
 from openai import OpenAI
 dotenv.load_dotenv()
 # Initialize FirecrawlApp and OpenAI
 app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 def search_google(query, objective):
    """Search Google using SerpAPI."""
    print(f"Parameters: query={query}, objective={objective}")
    search = GoogleSearch({"q": query, "api_key": os.getenv("SERP_API_KEY")})
    results = search.get_dict().get("organic_results", [])
    return {"objective": objective, "results": results}
 def map_url_pages(url, objective):
    """Map a website's pages using Firecrawl."""
    search_query = generate_completion(
        "website search query generator",
        f"Generate a 1-2 word search query for the website: {url} based on the objective",
        "Objective: " + objective
    )
    print(f"Parameters: url={url}, objective={objective}, search_query={search_query}")
    map_status = app.map_url(url, params={'search': search_query})
    if map_status.get('status') == 'success':
        links = map_status.get('links', [])
        top_link = links[0] if links else None
        return {"objective": objective, "results": [top_link] if top_link else []}
    else:
        return {"objective": objective, "results": []}
 def scrape_url(url, objective):
    """Scrape a website using Firecrawl."""
    print(f"Parameters: url={url}, objective={objective}")
    scrape_status = app.scrape_url(
        url,
        params={'formats': ['markdown']}
    )
    return {"objective": objective, "results": scrape_status}
 def analyze_website_content(content, objective):
    """Analyze the scraped website content using OpenAI."""
    print(f"Parameters: content={content[:50]}..., objective={objective}")
    analysis = generate_completion(
        "website data extractor",
        f"Analyze the following website content and extract a JSON object based on the objective.",
        "Objective: " + objective + "\nContent: " + content
    )
    return {"objective": objective, "results": analysis}
 def generate_completion(role, task, content):
    """Generate a completion using OpenAI."""
    print(f"Parameters: role={role}, task={task[:50]}..., content={content[:50]}...")
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": f"You are a {role}. {task}"},
            {"role": "user", "content": content}
        ]
    )
    return response.choices[0].message.content
 def handoff_to_search_google():
    """Hand off the search query to the search google agent."""
    return google_search_agent
 def handoff_to_map_url():
    """Hand off the url to the map url agent."""
    return map_url_agent
 def handoff_to_website_scraper():
    """Hand off the url to the website scraper agent."""
    return website_scraper_agent
 def handoff_to_analyst():
    """Hand off the website content to the analyst agent."""
    return analyst_agent
 user_interface_agent = Agent(
    name="User Interface Agent",
    instructions="You are a user interface agent that handles all interactions with the user. You need to always start with an web data extraction objective that the user wants to achieve by searching the web, mapping the web pages, and extracting the content from a specific page. Be concise.",
    functions=[handoff_to_search_google],
 )
 google_search_agent = Agent(
    name="Google Search Agent",
    instructions="You are a google search agent specialized in searching the web. Only search for the website not any specific page. When you are done, you must hand off to the map agent.",
    functions=[search_google, handoff_to_map_url],
 )
 map_url_agent = Agent(
    name="Map URL Agent",
    instructions="You are a map url agent specialized in mapping the web pages. When you are done, you must hand off the results to the website scraper agent.",
    functions=[map_url_pages, handoff_to_website_scraper],
 )
 website_scraper_agent = Agent(
    name="Website Scraper Agent",
    instructions="You are a website scraper agent specialized in scraping website content. When you are done, you must hand off the website content to the analyst agent to extract the data based on the objective.",
    functions=[scrape_url, handoff_to_analyst],
 )
 analyst_agent = Agent(
    name="Analyst Agent",
    instructions="You are an analyst agent that examines website content and returns a JSON object. When you are done, you must return a JSON object.",
    functions=[analyze_website_content],
 )
 if __name__ == "__main__":
    # Run the demo loop with the user interface agent
    run_demo_loop(user_interface_agent, stream=True)
--- a/examples/openai_swarm_firecrawl_web_extractor/requirements.txt
+++ b/examples/openai_swarm_firecrawl_web_extractor/requirements.txt
@ -0,0 +1,4 @@
 firecrawl-py
 openai
 google-search-results
 git+https://github.com/openai/swarm.git
--- a/examples/sales_web_crawler/.env.example
+++ b/examples/sales_web_crawler/.env.example
@ -0,0 +1,3 @@
 OPENAI_API_KEY=
 FIRECRAWL_API_KEY=
 SERP_API_KEY=
--- a/examples/sales_web_crawler/app.py
+++ b/examples/sales_web_crawler/app.py
@ -0,0 +1,78 @@
 import csv
 import json
 import os
 from dotenv import load_dotenv
 from firecrawl import FirecrawlApp
 from openai import OpenAI
 from serpapi import GoogleSearch
 from swarm import Agent
 from swarm.repl import run_demo_loop
 load_dotenv()
 # Initialize FirecrawlApp and OpenAI
 app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 def crawl_and_analyze_url(url, objective):
    """Crawl a website using Firecrawl and analyze the content."""
    print(f"Parameters: url={url}, objective={objective}")
    # Crawl the website
    crawl_status = app.crawl_url(
        url,
        params={'limit': 10, 'scrapeOptions': {'formats': ['markdown']}},
        poll_interval=5
    )
    crawl_status = crawl_status['data']
    # Process each 'markdown' element individually
    combined_results = []
    for item in crawl_status:
        if 'markdown' in item:
            content = item['markdown']
            # Analyze the content
            analysis = generate_completion(
                "website data extractor",
                f"Analyze the following website content and extract a JSON object based on the objective. Do not write the ```json and ``` to denote a JSON when returning a response",
                "Objective: " + objective + "\nContent: " + content
            )
            # Parse the JSON result
            try:
                result = json.loads(analysis)
                combined_results.append(result)
            except json.JSONDecodeError:
                print(f"Could not parse JSON from analysis: {analysis}")
    # Combine the results
    return {"objective": objective, "results": combined_results}
 def generate_completion(role, task, content):
    """Generate a completion using OpenAI."""
    print(f"Parameters: role={role}, task={task[:50]}..., content={content[:50]}...")
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": f"You are a {role}. {task}"},
            {"role": "user", "content": content}
        ]
    )
    return response.choices[0].message.content
 def handoff_to_crawl_url():
    """Hand off the url to the crawl url agent."""
    return crawl_website_agent
 user_interface_agent = Agent(
    name="User Interface Agent",
    instructions="You are a user interface agent that handles all interactions with the user. You need to always start by asking for a URL to crawl and the web data extraction objective. Be concise.",
    functions=[handoff_to_crawl_url],
 )
 crawl_website_agent = Agent(
    name="Crawl Website Agent",
    instructions="You are a crawl URL agent specialized in crawling web pages and analyzing their content. When you are done, you must print the results to the console.",
    functions=[crawl_and_analyze_url],
 )
 if __name__ == "__main__":
    # Run the demo loop with the user interface agent
    run_demo_loop(user_interface_agent, stream=True)
--- a/examples/sales_web_crawler/requirements.txt
+++ b/examples/sales_web_crawler/requirements.txt
@ -0,0 +1,4 @@
 firecrawl-py
 openai
 google-search-results
 git+https://github.com/openai/swarm.git
--- a/examples/website_qa_with_gemini_caching/website_qa_with_gemini_caching.ipynb
+++ b/examples/website_qa_with_gemini_caching/website_qa_with_gemini_caching.ipynb
@ -98,7 +98,7 @@
   "source": [
    "# Create a cache with a 5 minute TTL\n",
    "cache = caching.CachedContent.create(\n",
-    "    model=\"models/gemini-1.5-pro-001\",\n",
+    "    model=\"models/gemini-1.5-pro-002\",\n",
    "    display_name=\"website crawl testing again\", # used to identify the cache\n",
    "    system_instruction=\"You are an expert at this website, and your job is to answer user's query based on the website you have access to.\",\n",
    "    contents=[text_file],\n",
--- a/examples/website_qa_with_gemini_caching/website_qa_with_gemini_flash_caching.ipynb
+++ b/examples/website_qa_with_gemini_caching/website_qa_with_gemini_flash_caching.ipynb
@ -0,0 +1,166 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ericciarla/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import datetime\n",
    "import time\n",
    "import google.generativeai as genai\n",
    "from google.generativeai import caching\n",
    "from dotenv import load_dotenv\n",
    "from firecrawl import FirecrawlApp\n",
    "import json\n",
    "\n",
    "# Load environment variables\n",
    "load_dotenv()\n",
    "\n",
    "# Retrieve API keys from environment variables\n",
    "google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
    "firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
    "\n",
    "# Configure the Google Generative AI module with the API key\n",
    "genai.configure(api_key=google_api_key)\n",
    "\n",
    "# Initialize the FirecrawlApp with your API key\n",
    "app = FirecrawlApp(api_key=firecrawl_api_key)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No data returned from crawl.\n"
     ]
    }
   ],
   "source": [
    "# Crawl a website\n",
    "crawl_url = 'https://dify.ai/'\n",
    "params = {\n",
    "   \n",
    "    'crawlOptions': {\n",
    "        'limit': 100\n",
    "    }\n",
    "}\n",
    "crawl_result = app.crawl_url(crawl_url, params=params)\n",
    "\n",
    "if crawl_result is not None:\n",
    "    # Convert crawl results to JSON format, excluding 'content' field from each entry\n",
    "    cleaned_crawl_result = [{k: v for k, v in entry.items() if k != 'content'} for entry in crawl_result]\n",
    "\n",
    "    # Save the modified results as a text file containing JSON data\n",
    "    with open('crawl_result.txt', 'w') as file:\n",
    "        file.write(json.dumps(cleaned_crawl_result, indent=4))\n",
    "else:\n",
    "    print(\"No data returned from crawl.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Upload the video using the Files API\n",
    "text_file = genai.upload_file(path=\"crawl_result.txt\")\n",
    "\n",
    "# Wait for the file to finish processing\n",
    "while text_file.state.name == \"PROCESSING\":\n",
    "    print('Waiting for file to be processed.')\n",
    "    time.sleep(2)\n",
    "    text_file = genai.get_file(text_file.name)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a cache with a 5 minute TTL\n",
    "cache = caching.CachedContent.create(\n",
    "    model=\"models/gemini-1.5-flash-002\",\n",
    "    display_name=\"website crawl testing again\", # used to identify the cache\n",
    "    system_instruction=\"You are an expert at this website, and your job is to answer user's query based on the website you have access to.\",\n",
    "    contents=[text_file],\n",
    "    ttl=datetime.timedelta(minutes=15),\n",
    ")\n",
    "# Construct a GenerativeModel which uses the created cache.\n",
    "model = genai.GenerativeModel.from_cached_content(cached_content=cache)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dify.AI utilizes the **Firecrawl** service for website scraping. This service can crawl and convert any website into clean markdown or structured data that's ready for use in building RAG applications. \n",
      "\n",
      "Here's how Firecrawl helps:\n",
      "\n",
      "* **Crawling and Conversion:** Firecrawl crawls the website and converts the content into a format that is easily understood by LLMs, such as markdown or structured data.\n",
      "* **Clean Output:**  Firecrawl ensures the data is clean and free of errors, making it easier to use in Dify's RAG engine.\n",
      "* **Parallel Crawling:**  Firecrawl efficiently crawls web pages in parallel, delivering results quickly.\n",
      "\n",
      "You can find Firecrawl on their website: [https://www.firecrawl.dev/](https://www.firecrawl.dev/)\n",
      "\n",
      "Firecrawl offers both a cloud service and an open-source software (OSS) edition. \n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Query the model\n",
    "response = model.generate_content([\"What powers website scraping with Dify?\"])\n",
    "response_dict = response.to_dict()\n",
    "response_text = response_dict['candidates'][0]['content']['parts'][0]['text']\n",
    "print(response_text)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }