From 8d467c8ca764afd287a9c27717e537e850781d74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?=
Date: Thu, 7 Nov 2024 20:57:33 +0100
Subject: [PATCH] `WebScraper` refactor into `scrapeURL` (#714)
* feat: use strictNullChecking
* feat: switch logger to Winston
* feat(scrapeURL): first batch
* fix(scrapeURL): error swallow
* fix(scrapeURL): add timeout to EngineResultsTracker
* fix(scrapeURL): report unexpected error to sentry
* chore: remove unused modules
* feat(transfomers/coerce): warn when a format's response is missing
* feat(scrapeURL): feature flag priorities, engine quality sorting, PDF and DOCX support
* (add note)
* feat(scrapeURL): wip readme
* feat(scrapeURL): LLM extract
* feat(scrapeURL): better warnings
* fix(scrapeURL/engines/fire-engine;playwright): fix screenshot
* feat(scrapeURL): add forceEngine internal option
* feat(scrapeURL/engines): scrapingbee
* feat(scrapeURL/transformars): uploadScreenshot
* feat(scrapeURL): more intense tests
* bunch of stuff
* get rid of WebScraper (mostly)
* adapt batch scrape
* add staging deploy workflow
* fix yaml
* fix logger issues
* fix v1 test schema
* feat(scrapeURL/fire-engine/chrome-cdp): remove wait inserts on actions
* scrapeURL: v0 backwards compat
* logger fixes
* feat(scrapeurl): v0 returnOnlyUrls support
* fix(scrapeURL/v0): URL leniency
* fix(batch-scrape): ts non-nullable
* fix(scrapeURL/fire-engine/chromecdp): fix wait action
* fix(logger): remove error debug key
* feat(requests.http): use dotenv expression
* fix(scrapeURL/extractMetadata): extract custom metadata
* fix crawl option conversion
* feat(scrapeURL): Add retry logic to robustFetch
* fix(scrapeURL): crawl stuff
* fix(scrapeURL): LLM extract
* fix(scrapeURL/v0): search fix
* fix(tests/v0): grant larger response size to v0 crawl status
* feat(scrapeURL): basic fetch engine
* feat(scrapeURL): playwright engine
* feat(scrapeURL): add url-specific parameters
* Update readme and examples
* added e2e tests for most parameters. Still a few actions, location and iframes to be done.
* fixed type
* Nick:
* Update scrape.ts
* Update index.ts
* added actions and base64 check
* Nick: skipTls feature flag?
* 403
* todo
* todo
* fixes
* yeet headers from url specific params
* add warning when final engine has feature deficit
* expose engine results tracker for ScrapeEvents implementation
* ingest scrape events
* fixed some tests
* comment
* Update index.test.ts
* fixed rawHtml
* Update index.test.ts
* update comments
* move geolocation to global f-e option, fix removeBase64Images
* Nick:
* trim url-specific params
* Update index.ts
---------
Co-authored-by: Eric Ciarla
Co-authored-by: rafaelmmiller <8574157+rafaelmmiller@users.noreply.github.com>
Co-authored-by: Nicolas
---
.github/archive/js-sdk.yml | 2 -
.github/archive/python-sdk.yml | 2 -
.github/archive/rust-sdk.yml | 2 -
.github/workflows/ci.yml | 2 -
.github/workflows/deploy-image-staging.yml | 32 +
CONTRIBUTING.md | 1 -
SELF_HOST.md | 1 -
apps/api/.env.example | 5 -
apps/api/jest.setup.js | 2 +-
apps/api/package.json | 10 +-
apps/api/pnpm-lock.yaml | 1772 ++---------------
apps/api/requests.http | 16 +-
apps/api/sharedLibs/go-html-to-md/.gitignore | 2 +
.../__tests__/e2e_full_withAuth/index.test.ts | 2 +-
.../src/__tests__/e2e_noAuth/index.test.ts | 1 -
.../__tests__/e2e_v1_withAuth/index.test.ts | 26 +-
.../e2e_v1_withAuth_all_params/index.test.ts | 603 ++++++
.../src/__tests__/e2e_withAuth/index.test.ts | 3 +-
apps/api/src/controllers/auth.ts | 50 +-
.../controllers/v0/admin/acuc-cache-clear.ts | 6 +-
apps/api/src/controllers/v0/admin/queue.ts | 18 +-
.../src/controllers/v0/admin/redis-health.ts | 16 +-
apps/api/src/controllers/v0/crawl-cancel.ts | 14 +-
apps/api/src/controllers/v0/crawl-status.ts | 20 +-
apps/api/src/controllers/v0/crawl.ts | 41 +-
apps/api/src/controllers/v0/crawlPreview.ts | 34 +-
apps/api/src/controllers/v0/keyAuth.ts | 7 +-
apps/api/src/controllers/v0/scrape.ts | 62 +-
apps/api/src/controllers/v0/search.ts | 38 +-
apps/api/src/controllers/v0/status.ts | 4 +-
apps/api/src/controllers/v1/batch-scrape.ts | 16 +-
apps/api/src/controllers/v1/crawl-cancel.ts | 6 +-
.../api/src/controllers/v1/crawl-status-ws.ts | 32 +-
apps/api/src/controllers/v1/crawl-status.ts | 17 +-
apps/api/src/controllers/v1/crawl.ts | 41 +-
apps/api/src/controllers/v1/map.ts | 25 +-
apps/api/src/controllers/v1/scrape-status.ts | 2 +-
apps/api/src/controllers/v1/scrape.ts | 64 +-
apps/api/src/controllers/v1/types.ts | 161 +-
apps/api/src/example.ts | 19 -
apps/api/src/index.ts | 33 +-
apps/api/src/lib/LLM-extraction/index.ts | 12 +-
apps/api/src/lib/LLM-extraction/models.ts | 2 +-
apps/api/src/lib/batch-process.ts | 2 +-
apps/api/src/lib/crawl-redis.ts | 15 +-
apps/api/src/lib/entities.ts | 5 +-
apps/api/src/lib/html-to-markdown.ts | 8 +-
apps/api/src/lib/job-priority.ts | 10 +-
apps/api/src/lib/load-testing-example.ts | 42 -
apps/api/src/lib/logger.ts | 119 +-
apps/api/src/lib/map-cosine.ts | 4 +-
apps/api/src/lib/scrape-events.ts | 12 +-
apps/api/src/lib/supabase-jobs.ts | 6 +-
apps/api/src/lib/withAuth.ts | 17 +-
apps/api/src/main/runWebScraper.ts | 164 +-
apps/api/src/routes/admin.ts | 2 +-
apps/api/src/routes/v1.ts | 25 +-
.../WebScraper/__tests__/crawler.test.ts | 160 --
.../WebScraper/__tests__/single_url.test.ts | 37 -
apps/api/src/scraper/WebScraper/crawler.ts | 197 +-
.../WebScraper/custom/handleCustomScraping.ts | 8 +-
apps/api/src/scraper/WebScraper/global.ts | 1 -
apps/api/src/scraper/WebScraper/index.ts | 743 -------
.../src/scraper/WebScraper/scrapers/fetch.ts | 89 -
.../scraper/WebScraper/scrapers/fireEngine.ts | 230 ---
.../scraper/WebScraper/scrapers/playwright.ts | 111 --
.../WebScraper/scrapers/scrapingBee.ts | 92 -
apps/api/src/scraper/WebScraper/single_url.ts | 506 -----
apps/api/src/scraper/WebScraper/sitemap.ts | 18 +-
.../utils/__tests__/docxProcessor.test.ts | 15 -
.../utils/__tests__/parseTable.test.ts | 128 --
.../utils/__tests__/pdfProcessor.test.ts | 19 -
.../__tests__/removeUnwantedElements.test.ts | 192 --
.../utils/__tests__/replacePaths.test.ts | 127 --
.../utils/__tests__/socialBlockList.test.ts | 66 -
.../src/scraper/WebScraper/utils/blocklist.ts | 4 +-
.../WebScraper/utils/custom/website_params.ts | 198 --
.../scraper/WebScraper/utils/docxProcessor.ts | 79 -
.../scraper/WebScraper/utils/excludeTags.ts | 42 -
.../WebScraper/utils/imageDescription.ts | 89 -
.../src/scraper/WebScraper/utils/metadata.ts | 185 --
.../scraper/WebScraper/utils/parseTable.ts | 74 -
.../scraper/WebScraper/utils/pdfProcessor.ts | 140 --
.../utils/removeUnwantedElements.ts | 82 -
.../scraper/WebScraper/utils/replacePaths.ts | 85 -
.../api/src/scraper/WebScraper/utils/utils.ts | 59 -
apps/api/src/scraper/scrapeURL/README.md | 25 +
.../scraper/scrapeURL/engines/docx/index.ts | 15 +
.../scraper/scrapeURL/engines/fetch/index.ts | 28 +
.../engines/fire-engine/checkStatus.ts | 107 +
.../scrapeURL/engines/fire-engine/delete.ts | 33 +
.../scrapeURL/engines/fire-engine/index.ts | 198 ++
.../scrapeURL/engines/fire-engine/scrape.ts | 94 +
.../src/scraper/scrapeURL/engines/index.ts | 295 +++
.../scraper/scrapeURL/engines/pdf/index.ts | 114 ++
.../scrapeURL/engines/playwright/index.ts | 42 +
.../scrapeURL/engines/scrapingbee/index.ts | 66 +
.../scrapeURL/engines/utils/downloadFile.ts | 45 +
.../engines/utils/specialtyHandler.ts | 14 +
apps/api/src/scraper/scrapeURL/error.ts | 34 +
apps/api/src/scraper/scrapeURL/index.ts | 320 +++
.../src/scraper/scrapeURL/lib/extractLinks.ts | 35 +
.../scraper/scrapeURL/lib/extractMetadata.ts | 132 ++
apps/api/src/scraper/scrapeURL/lib/fetch.ts | 144 ++
.../scrapeURL/lib/removeUnwantedElements.ts | 111 ++
.../scrapeURL/lib/urlSpecificParams.ts | 51 +
.../src/scraper/scrapeURL/scrapeURL.test.ts | 388 ++++
.../scraper/scrapeURL/transformers/index.ts | 130 ++
.../scrapeURL/transformers/llmExtract.ts | 131 ++
.../transformers/removeBase64Images.ts | 11 +
.../transformers/uploadScreenshot.ts | 26 +
apps/api/src/search/fireEngine.ts | 4 +-
apps/api/src/search/googlesearch.ts | 12 +-
apps/api/src/search/index.ts | 30 +-
apps/api/src/services/alerts/index.ts | 14 +-
apps/api/src/services/alerts/slack.ts | 6 +-
apps/api/src/services/billing/auto_charge.ts | 16 +-
.../src/services/billing/credit_billing.ts | 55 +-
.../api/src/services/billing/issue_credits.ts | 6 +-
apps/api/src/services/billing/stripe.ts | 12 +-
apps/api/src/services/idempotency/create.ts | 4 +-
apps/api/src/services/idempotency/validate.ts | 6 +-
apps/api/src/services/logging/crawl_log.ts | 4 +-
apps/api/src/services/logging/log_job.ts | 20 +-
apps/api/src/services/logging/scrape_log.ts | 8 +-
apps/api/src/services/logtail.ts | 20 -
.../notification/email_notification.ts | 26 +-
apps/api/src/services/posthog.ts | 10 +-
apps/api/src/services/queue-jobs.ts | 4 +-
apps/api/src/services/queue-service.ts | 6 +-
apps/api/src/services/queue-worker.ts | 167 +-
apps/api/src/services/rate-limiter.test.ts | 2 +-
apps/api/src/services/rate-limiter.ts | 2 +-
apps/api/src/services/redis.ts | 14 +-
apps/api/src/services/redlock.ts | 2 +-
apps/api/src/services/sentry.ts | 4 +-
apps/api/src/services/supabase.ts | 6 +-
apps/api/src/services/system-monitor.ts | 8 +-
apps/api/src/services/webhook.ts | 19 +-
apps/api/src/types.ts | 50 +-
apps/api/tsconfig.json | 5 +-
.../kubernetes/cluster-install/secret.yaml | 2 -
142 files changed, 4230 insertions(+), 6334 deletions(-)
create mode 100644 .github/workflows/deploy-image-staging.yml
create mode 100644 apps/api/sharedLibs/go-html-to-md/.gitignore
create mode 100644 apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
delete mode 100644 apps/api/src/example.ts
delete mode 100644 apps/api/src/lib/load-testing-example.ts
delete mode 100644 apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
delete mode 100644 apps/api/src/scraper/WebScraper/global.ts
delete mode 100644 apps/api/src/scraper/WebScraper/index.ts
delete mode 100644 apps/api/src/scraper/WebScraper/scrapers/fetch.ts
delete mode 100644 apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
delete mode 100644 apps/api/src/scraper/WebScraper/scrapers/playwright.ts
delete mode 100644 apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts
delete mode 100644 apps/api/src/scraper/WebScraper/single_url.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/__tests__/docxProcessor.test.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/__tests__/parseTable.test.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/__tests__/pdfProcessor.test.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/__tests__/removeUnwantedElements.test.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/__tests__/replacePaths.test.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/custom/website_params.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/docxProcessor.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/excludeTags.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/imageDescription.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/metadata.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/parseTable.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/removeUnwantedElements.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/replacePaths.ts
delete mode 100644 apps/api/src/scraper/WebScraper/utils/utils.ts
create mode 100644 apps/api/src/scraper/scrapeURL/README.md
create mode 100644 apps/api/src/scraper/scrapeURL/engines/docx/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/playwright/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/utils/downloadFile.ts
create mode 100644 apps/api/src/scraper/scrapeURL/engines/utils/specialtyHandler.ts
create mode 100644 apps/api/src/scraper/scrapeURL/error.ts
create mode 100644 apps/api/src/scraper/scrapeURL/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/lib/extractLinks.ts
create mode 100644 apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts
create mode 100644 apps/api/src/scraper/scrapeURL/lib/fetch.ts
create mode 100644 apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts
create mode 100644 apps/api/src/scraper/scrapeURL/lib/urlSpecificParams.ts
create mode 100644 apps/api/src/scraper/scrapeURL/scrapeURL.test.ts
create mode 100644 apps/api/src/scraper/scrapeURL/transformers/index.ts
create mode 100644 apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts
create mode 100644 apps/api/src/scraper/scrapeURL/transformers/removeBase64Images.ts
create mode 100644 apps/api/src/scraper/scrapeURL/transformers/uploadScreenshot.ts
delete mode 100644 apps/api/src/services/logtail.ts
diff --git a/.github/archive/js-sdk.yml b/.github/archive/js-sdk.yml
index c84bb8b1..7ef096d4 100644
--- a/.github/archive/js-sdk.yml
+++ b/.github/archive/js-sdk.yml
@@ -8,7 +8,6 @@ env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
- LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
@@ -21,7 +20,6 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
- HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
HDX_NODE_BETA_MODE: 1
jobs:
diff --git a/.github/archive/python-sdk.yml b/.github/archive/python-sdk.yml
index 27449888..bdefeab6 100644
--- a/.github/archive/python-sdk.yml
+++ b/.github/archive/python-sdk.yml
@@ -8,7 +8,6 @@ env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
- LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
@@ -21,7 +20,6 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
- HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
HDX_NODE_BETA_MODE: 1
jobs:
diff --git a/.github/archive/rust-sdk.yml b/.github/archive/rust-sdk.yml
index 62deeaab..792e06c2 100644
--- a/.github/archive/rust-sdk.yml
+++ b/.github/archive/rust-sdk.yml
@@ -8,7 +8,6 @@ env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
- LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
@@ -21,7 +20,6 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
- HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
HDX_NODE_BETA_MODE: 1
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8a9a74cc..ef7d1cba 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,6 @@ env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
- LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
@@ -25,7 +24,6 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
- HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
HDX_NODE_BETA_MODE: 1
FIRE_ENGINE_BETA_URL: ${{ secrets.FIRE_ENGINE_BETA_URL }}
USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }}
diff --git a/.github/workflows/deploy-image-staging.yml b/.github/workflows/deploy-image-staging.yml
new file mode 100644
index 00000000..e74aba9a
--- /dev/null
+++ b/.github/workflows/deploy-image-staging.yml
@@ -0,0 +1,32 @@
+name: STAGING Deploy Images to GHCR
+
+env:
+ DOTNET_VERSION: '6.0.x'
+
+on:
+ push:
+ branches:
+ - mog/webscraper-refactor
+ workflow_dispatch:
+
+jobs:
+ push-app-image:
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ working-directory: './apps/api'
+ steps:
+ - name: 'Checkout GitHub Action'
+ uses: actions/checkout@main
+
+ - name: 'Login to GitHub Container Registry'
+ uses: docker/login-action@v1
+ with:
+ registry: ghcr.io
+ username: ${{github.actor}}
+ password: ${{secrets.GITHUB_TOKEN}}
+
+ - name: 'Build Inventory Image'
+ run: |
+ docker build . --tag ghcr.io/mendableai/firecrawl-staging:latest
+ docker push ghcr.io/mendableai/firecrawl-staging:latest
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 73ccf0e6..b8c1f0a5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -41,7 +41,6 @@ TEST_API_KEY= # use if you've set up authentication and want to test with a real
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
BULL_AUTH_KEY= @
-LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
diff --git a/SELF_HOST.md b/SELF_HOST.md
index 78228485..46e08db9 100644
--- a/SELF_HOST.md
+++ b/SELF_HOST.md
@@ -62,7 +62,6 @@ TEST_API_KEY= # use if you've set up authentication and want to test with a real
SCRAPING_BEE_API_KEY= # use if you'd like to use as a fallback scraper
OPENAI_API_KEY= # add for LLM-dependent features (e.g., image alt generation)
BULL_AUTH_KEY= @
-LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
diff --git a/apps/api/.env.example b/apps/api/.env.example
index 6ba49daa..d54c696d 100644
--- a/apps/api/.env.example
+++ b/apps/api/.env.example
@@ -33,8 +33,6 @@ SCRAPING_BEE_API_KEY=
# add for LLM dependednt features (image alt generation, etc.)
OPENAI_API_KEY=
BULL_AUTH_KEY=@
-# use if you're configuring basic logging with logtail
-LOGTAIL_KEY=
# set if you have a llamaparse key you'd like to use to parse pdfs
LLAMAPARSE_API_KEY=
# set if you'd like to send slack server health status messages
@@ -54,9 +52,6 @@ STRIPE_PRICE_ID_STANDARD_NEW_YEARLY=
STRIPE_PRICE_ID_GROWTH=
STRIPE_PRICE_ID_GROWTH_YEARLY=
-HYPERDX_API_KEY=
-HDX_NODE_BETA_MODE=1
-
# set if you'd like to use the fire engine closed beta
FIRE_ENGINE_BETA_URL=
diff --git a/apps/api/jest.setup.js b/apps/api/jest.setup.js
index c158ca42..0b3b09b7 100644
--- a/apps/api/jest.setup.js
+++ b/apps/api/jest.setup.js
@@ -1 +1 @@
-global.fetch = require('jest-fetch-mock');
+// global.fetch = require('jest-fetch-mock');
diff --git a/apps/api/package.json b/apps/api/package.json
index a0f9cf8e..bb4ea268 100644
--- a/apps/api/package.json
+++ b/apps/api/package.json
@@ -32,9 +32,11 @@
"@tsconfig/recommended": "^1.0.3",
"@types/body-parser": "^1.19.2",
"@types/cors": "^2.8.13",
+ "@types/escape-html": "^1.0.4",
"@types/express": "^4.17.17",
"@types/jest": "^29.5.12",
"@types/node": "^20.14.1",
+ "@types/pdf-parse": "^1.1.4",
"body-parser": "^1.20.1",
"express": "^4.18.2",
"jest": "^29.6.3",
@@ -53,9 +55,7 @@
"@bull-board/api": "^5.20.5",
"@bull-board/express": "^5.20.5",
"@devil7softwares/pos": "^1.0.2",
- "@dqbd/tiktoken": "^1.0.13",
- "@hyperdx/node-opentelemetry": "^0.8.1",
- "@logtail/node": "^0.4.12",
+ "@dqbd/tiktoken": "^1.0.16",
"@nangohq/node": "^0.40.8",
"@sentry/cli": "^2.33.1",
"@sentry/node": "^8.26.0",
@@ -78,6 +78,7 @@
"date-fns": "^3.6.0",
"dotenv": "^16.3.1",
"dotenv-cli": "^7.4.2",
+ "escape-html": "^1.0.3",
"express-rate-limit": "^7.3.1",
"express-ws": "^5.0.2",
"form-data": "^4.0.0",
@@ -92,6 +93,7 @@
"languagedetect": "^2.0.0",
"logsnag": "^1.0.0",
"luxon": "^3.4.3",
+ "marked": "^14.1.2",
"md5": "^2.3.0",
"moment": "^2.29.4",
"mongoose": "^8.4.4",
@@ -114,6 +116,8 @@
"typesense": "^1.5.4",
"unstructured-client": "^0.11.3",
"uuid": "^10.0.0",
+ "winston": "^3.14.2",
+ "winston-transport": "^4.8.0",
"wordpos": "^2.1.0",
"ws": "^8.18.0",
"xml2js": "^0.6.2",
diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml
index 095b507c..3350c74e 100644
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@@ -24,14 +24,8 @@ importers:
specifier: ^1.0.2
version: 1.0.2
'@dqbd/tiktoken':
- specifier: ^1.0.13
- version: 1.0.15
- '@hyperdx/node-opentelemetry':
- specifier: ^0.8.1
- version: 0.8.1
- '@logtail/node':
- specifier: ^0.4.12
- version: 0.4.21
+ specifier: ^1.0.16
+ version: 1.0.16
'@nangohq/node':
specifier: ^0.40.8
version: 0.40.8
@@ -98,6 +92,9 @@ importers:
dotenv-cli:
specifier: ^7.4.2
version: 7.4.2
+ escape-html:
+ specifier: ^1.0.3
+ version: 1.0.3
express-rate-limit:
specifier: ^7.3.1
version: 7.3.1(express@4.19.2)
@@ -140,6 +137,9 @@ importers:
luxon:
specifier: ^3.4.3
version: 3.4.4
+ marked:
+ specifier: ^14.1.2
+ version: 14.1.2
md5:
specifier: ^2.3.0
version: 2.3.0
@@ -206,6 +206,12 @@ importers:
uuid:
specifier: ^10.0.0
version: 10.0.0
+ winston:
+ specifier: ^3.14.2
+ version: 3.14.2
+ winston-transport:
+ specifier: ^4.8.0
+ version: 4.8.0
wordpos:
specifier: ^2.1.0
version: 2.1.0
@@ -237,6 +243,9 @@ importers:
'@types/cors':
specifier: ^2.8.13
version: 2.8.17
+ '@types/escape-html':
+ specifier: ^1.0.4
+ version: 1.0.4
'@types/express':
specifier: ^4.17.17
version: 4.17.21
@@ -246,6 +255,9 @@ importers:
'@types/node':
specifier: ^20.14.1
version: 20.14.1
+ '@types/pdf-parse':
+ specifier: ^1.1.4
+ version: 1.1.4
body-parser:
specifier: ^1.20.1
version: 1.20.2
@@ -494,42 +506,22 @@ packages:
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
engines: {node: '>=12'}
+ '@dabh/diagnostics@2.0.3':
+ resolution: {integrity: sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==}
+
'@devil7softwares/pos@1.0.2':
resolution: {integrity: sha512-49Ke26+++Ix8C5LChi4uga7aWgMuc5zV1NqjGxXxE7DralZwe+hvUuSJmBDWS+HHZaK9rFzLNdufV4HAvvOxPA==}
engines: {node: '>=0'}
deprecated: This package has been renamed to `fast-tag-pos`
- '@dqbd/tiktoken@1.0.15':
- resolution: {integrity: sha512-a6I67K1xUkuqcuwulobIJiLikkoE7egMaviI1Jg5bxSn2V7QGqXsGE3jTKr8UIOU/o74mAAd5TkeXFNBtaKF4A==}
+ '@dqbd/tiktoken@1.0.16':
+ resolution: {integrity: sha512-4uIrs5qxAwFVFFEP507HZIZhGOsgfaEMEWDXWalr+v+XP+wJwP60EVmkZtQyQe70IsKGVkx5umBxw4NfmU0pPg==}
'@flydotio/dockerfile@0.4.11':
resolution: {integrity: sha512-L52UAfrOhmAn3T4TxpeRofQOSO+Kctg+uraB4nLzo4mvvh+4Z7HYxSi7Dnq0Kirz+xx6fDIc4OMNT1EdaORecA==}
engines: {node: '>=16.0.0'}
hasBin: true
- '@grpc/grpc-js@1.10.10':
- resolution: {integrity: sha512-HPa/K5NX6ahMoeBv15njAc/sfF4/jmiXLar9UlC2UfHFKZzsCVLc3wbe7+7qua7w9VPh2/L6EBxyAV7/E8Wftg==}
- engines: {node: '>=12.10.0'}
-
- '@grpc/proto-loader@0.7.13':
- resolution: {integrity: sha512-AiXO/bfe9bmxBjxxtYxFAXGZvMaN5s8kO+jBHAJCON8rJoB5YS/D6X7ZNc6XQkuHNmyl4CYaMI1fJ/Gn27RGGw==}
- engines: {node: '>=6'}
- hasBin: true
-
- '@hyperdx/instrumentation-exception@0.1.0':
- resolution: {integrity: sha512-Jgk7JY5J07Mq9fgXApGVhSkS4+WdzzRcWLReAZhxgo46KShxE6w614mFqUSnuo+z6ghlehsy4ForViUfxrFyew==}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@hyperdx/instrumentation-sentry-node@0.1.0':
- resolution: {integrity: sha512-n8d/K/8M2owL2w4FNfV+lSVW6yoznEj5SdRCysV/ZIfyrZwpijiiSn7gkRcrOfKHmrxrupyp7DVg5L19cGuH6A==}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@hyperdx/node-opentelemetry@0.8.1':
- resolution: {integrity: sha512-wNw0yQf54j/9KXVWeEOu8G6C5FT5EFlrz4dcmscTkwCvo6fQOLRZa/NbGcqugt0LSFMc0/6/Q5RDWVqDpEn0LQ==}
- hasBin: true
-
'@ioredis/commands@1.2.0':
resolution: {integrity: sha512-Sx1pU8EM64o2BrqNpEO1CNLtKQwyhuXuqyfH7oGKCk+1a33d2r5saW8zNwm3j6BTExtjrv2BxTgzzkMwts6vGg==}
@@ -636,9 +628,6 @@ packages:
'@jridgewell/trace-mapping@0.3.9':
resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
- '@js-sdsl/ordered-map@4.4.2':
- resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==}
-
'@langchain/core@0.2.12':
resolution: {integrity: sha512-zaKvUcWU1Cxcpd/fxklygY6iUrxls10KTRzyHZGBAIKJq1JD/B10vX59YlFgBs7nqqVTEvaChfIE0O0e2qBttA==}
engines: {node: '>=18'}
@@ -651,28 +640,12 @@ packages:
resolution: {integrity: sha512-cXWgKE3sdWLSqAa8ykbCcUsUF1Kyr5J3HOWYGuobhPEycXW4WI++d5DhzdpL238mzoEXTi90VqfSCra37l5YqA==}
engines: {node: '>=18'}
- '@logtail/core@0.4.21':
- resolution: {integrity: sha512-QDq194+24bwi4e+a/pxyf4X67NewhTvBmh9iwM2NhbSVSQz4Fo8xQn1Ul8zuUrXETycu/Od2D8wT2tZFNFx/7A==}
-
- '@logtail/node@0.4.21':
- resolution: {integrity: sha512-zpwkhJgcYaM+vsjotHRJthc0ot1vP0CAVy+fwrkL8XjfdC3NHiWb6f0agQpHlqdRX8RTsAbcYpWNXKPpFB5U9Q==}
-
- '@logtail/tools@0.4.21':
- resolution: {integrity: sha512-xIaolScUwJEikllopGphxBX0lVlN/rA8pLAZiNCMNJXpPbwitoFKLW3w4qRuYdKoFCCJZKwOdwEqU2Fv0i9Cuw==}
-
- '@logtail/types@0.4.20':
- resolution: {integrity: sha512-nYsum10eJMTo+ySBlYXvSrvgD1NDCVUeOlxLBbelq3XUmHu9L48VNR3P0BOmhLamYCTEgjatTj0PyPLfjL1W9g==}
-
'@mixmark-io/domino@2.2.0':
resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==}
'@mongodb-js/saslprep@1.1.7':
resolution: {integrity: sha512-dCHW/oEX0KJ4NjDULBo3JiOaK5+6axtpBbS+ao2ZInoAL9/YRQLhXzSNAFz7hP4nzLkIqsfYAK/PDE3+XHny0Q==}
- '@msgpack/msgpack@2.8.0':
- resolution: {integrity: sha512-h9u4u/jiIRKbq25PM+zymTyW6bhTzELvOoUd+AvYriWOAKpLGnIamaET3pnHYoI5iYphAHBI4ayx0MehR+VVPQ==}
- engines: {node: '>= 10'}
-
'@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3':
resolution: {integrity: sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw==}
cpu: [arm64]
@@ -710,10 +683,6 @@ packages:
'@one-ini/wasm@0.1.1':
resolution: {integrity: sha512-XuySG1E38YScSJoMlqovLru4KTUNSjgVTIjyh7qMX6aNN5HY5Ct5LhRJdxO79JtTzKfzV/bnWpz+zquYrISsvw==}
- '@opentelemetry/api-logs@0.51.1':
- resolution: {integrity: sha512-E3skn949Pk1z2XtXu/lxf6QAZpawuTM/IUEXcAzpiUkTd73Hmvw26FiN3cJuTmkpM5hZzHwkomVdtrh/n/zzwA==}
- engines: {node: '>=14'}
-
'@opentelemetry/api-logs@0.52.1':
resolution: {integrity: sha512-qnSqB2DQ9TPP96dl8cDubDvrUyWc0/sK81xHTK8eSUspzDM3bsewX903qclQFvVhgStjRWdC5bLb3kQqMkfV5A==}
engines: {node: '>=14'}
@@ -722,567 +691,148 @@ packages:
resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==}
engines: {node: '>=8.0.0'}
- '@opentelemetry/auto-instrumentations-node@0.46.1':
- resolution: {integrity: sha512-s0CwmY9KYtPawOhV5YO2Gf62uVOQRNvT6Or8IZ0S4gr/kPVNhoMehTsQvqBwSWQfoFrkmW3KKOHiKJEp4dVGXg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.4.1
-
- '@opentelemetry/context-async-hooks@1.24.1':
- resolution: {integrity: sha512-R5r6DO4kgEOVBxFXhXjwospLQkv+sYxwCfjvoZBe7Zm6KKXAV9kDSJhi/D1BweowdZmO+sdbENLs374gER8hpQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.9.0'
-
'@opentelemetry/context-async-hooks@1.25.1':
resolution: {integrity: sha512-UW/ge9zjvAEmRWVapOP0qyCvPulWU6cQxGxDbWEFfGOj1VBBZAuOqTo3X6yWmDTD3Xe15ysCZChHncr2xFMIfQ==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': '>=1.0.0 <1.10.0'
- '@opentelemetry/core@1.24.1':
- resolution: {integrity: sha512-wMSGfsdmibI88K9wB498zXY04yThPexo8jvwNNlm542HZB7XrrMRBbAyKJqG8qDRJwIBdBrPMi4V9ZPW/sqrcg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.9.0'
-
'@opentelemetry/core@1.25.1':
resolution: {integrity: sha512-GeT/l6rBYWVQ4XArluLVB6WWQ8flHbdb6r2FCHC3smtdOAbrJBIv35tpV/yp9bmYUJf+xmZpu9DRTIeJVhFbEQ==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': '>=1.0.0 <1.10.0'
- '@opentelemetry/exporter-logs-otlp-http@0.51.1':
- resolution: {integrity: sha512-cd6GZ9IqCrmvOJwi1HjRR7o9ihF7xhZTekgxUsoyTsPF+SjKMsLF9ur6HeBYkYhk+YjZ1ken3XUMH47oUTvu8Q==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/exporter-metrics-otlp-http@0.51.1':
- resolution: {integrity: sha512-oFXvif9iksHUxrzG3P8ohMLt7xSrl+oDMqxD/3XXndU761RFAKSbRDpfrQs25U5D+A2aMV3qk+4kfUWdJhZ77g==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/exporter-metrics-otlp-proto@0.51.1':
- resolution: {integrity: sha512-jhj8xD6S4cydXGCuf2tp56+4QI0DbDH6g+0MiPPJVdXjxLj+iycQuqB2cwljWpByblFaOjyUsL/VKtm8C7sQ9A==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/exporter-trace-otlp-grpc@0.51.1':
- resolution: {integrity: sha512-P9+Hkszih95ITvldGZ+kXvj9HpD1QfS+PwooyHK72GYA+Bgm+yUSAsDkUkDms8+s9HW6poxURv3LcjaMuBBpVQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/exporter-trace-otlp-http@0.51.1':
- resolution: {integrity: sha512-n+LhLPsX07URh+HhV2SHVSvz1t4G/l/CE5BjpmhAPqeTceFac1VpyQkavWEJbvnK5bUEXijWt4LxAxFpt2fXyw==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/exporter-trace-otlp-proto@0.51.1':
- resolution: {integrity: sha512-SE9f0/6V6EeXC9i+WA4WFjS1EYgaBCpAnI5+lxWvZ7iO7EU1IvHvZhP6Kojr0nLldo83gqg6G7OWFqsID3uF+w==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/exporter-zipkin@1.24.1':
- resolution: {integrity: sha512-+Rl/VFmu2n6eaRMnVbyfZx1DqR/1KNyWebYuHyQBZaEAVIn/ZLgmofRpXN1X2nhJ4BNaptQUNxAstCYYz6dKoQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/instrumentation-amqplib@0.37.0':
- resolution: {integrity: sha512-XjOHeAOreh0XX4jlzTTUWWqu1dIGvMWM8yvd43JJdRMAmTZisezjKsxLjMEMIvF0PzQdoXwh9DiS9nYE4/QmpA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-aws-lambda@0.41.1':
- resolution: {integrity: sha512-/BLG+0DQr2tCILFGJKJH2Fg6eyjhqOlVflYpNddUEXnzyQ/PAhTdgirkqbICFgeSW2XYcEY9zXpuRldrVNw9cA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-aws-sdk@0.41.0':
- resolution: {integrity: sha512-7+8WMY0LQeqv6KIObXK+Py44qNFLeCU0ZLLxSZtXEbZ2wJlQISP1St65jRto0NV7isnZoyuOxb2+ZpypPPNv7Q==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-bunyan@0.38.0':
- resolution: {integrity: sha512-ThNcgTE22W7PKzTzz5qfGxb5Gf7rA3EORousYo2nJWHHcF6gqiMNv2+GXY3MdpjLBr8IgCfhtvbQdD6rlIPUpA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-cassandra-driver@0.38.0':
- resolution: {integrity: sha512-ML4Vw0it2uIpETfX6skuSIGLHF9D3TUKOfdfrk9lnrzzWSzg2aS6pl3UeepkQX4wXHdzlxVRB0USrUqsmxMd5Q==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-connect@0.36.1':
- resolution: {integrity: sha512-xI5Q/CMmzBmHshPnzzjD19ptFaYO/rQWzokpNio4QixZYWhJsa35QgRvN9FhPkwgtuJIbt/CWWAufJ3egJNHEA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-connect@0.38.0':
resolution: {integrity: sha512-2/nRnx3pjYEmdPIaBwtgtSviTKHWnDZN3R+TkRUnhIVrvBKVcq+I5B2rtd6mr6Fe9cHlZ9Ojcuh7pkNh/xdWWg==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-cucumber@0.6.0':
- resolution: {integrity: sha512-90eAF2JPSbPAsOuGfYyctYaoYXqy4Clbxt0j/uUgg6dto4oqwUw3AvTyHQEztLGxeXwEzC1EQigDtVPg5ZexYA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/instrumentation-dataloader@0.9.0':
- resolution: {integrity: sha512-fiyCOAw+tlbneok1x7P5UseoGW5nS60CWWx7NXzYW+WOexpSmDQQW7olttGa8fqE6/sVCoi1l+QdfVoETZi/NQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-dns@0.36.1':
- resolution: {integrity: sha512-NWRbQ7q0E3co/CNTWLZZvUzZoKhB1iTitY282IM8HDTXkA6VRssCfOcvaHw5ezOh23TJbAeYxmmpVj4hFvDPYQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-express@0.39.0':
- resolution: {integrity: sha512-AG8U7z7D0JcBu/7dDcwb47UMEzj9/FMiJV2iQZqrsZnxR3FjB9J9oIH2iszJYci2eUdp2WbdvtpD9RV/zmME5A==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-express@0.41.1':
resolution: {integrity: sha512-uRx0V3LPGzjn2bxAnV8eUsDT82vT7NTwI0ezEuPMBOTOsnPpGhWdhcdNdhH80sM4TrWrOfXm9HGEdfWE3TRIww==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-fastify@0.36.1':
- resolution: {integrity: sha512-3Nfm43PI0I+3EX+1YbSy6xbDu276R1Dh1tqAk68yd4yirnIh52Kd5B+nJ8CgHA7o3UKakpBjj6vSzi5vNCzJIA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-fastify@0.38.0':
resolution: {integrity: sha512-HBVLpTSYpkQZ87/Df3N0gAw7VzYZV3n28THIBrJWfuqw3Or7UqdhnjeuMIPQ04BKk3aZc0cWn2naSQObbh5vXw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-fs@0.12.0':
- resolution: {integrity: sha512-Waf+2hekJRxIwq1PmivxOWLdMOtYbY22hKr34gEtfbv2CArSv8FBJH4BmQxB9o5ZcwkdKu589qs009dbuSfNmQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-fs@0.14.0':
resolution: {integrity: sha512-pVc8P5AgliC1DphyyBUgsxXlm2XaPH4BpYvt7rAZDMIqUpRk8gs19SioABtKqqxvFzg5jPtgJfJsdxq0Y+maLw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-generic-pool@0.36.0':
- resolution: {integrity: sha512-CExAEqJvK8jYxrhN8cl6EaGg57EGJi+qsSKouLC5lndXi68gZLOKbZIMZg4pF0kNfp/D4BFaGmA6Ap7d5WoPTw==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-graphql@0.40.0':
- resolution: {integrity: sha512-LVRdEHWACWOczv2imD+mhUrLMxsEjPPi32vIZJT57zygR5aUiA4em8X3aiGOCycgbMWkIu8xOSGSxdx3JmzN+w==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-graphql@0.42.0':
resolution: {integrity: sha512-N8SOwoKL9KQSX7z3gOaw5UaTeVQcfDO1c21csVHnmnmGUoqsXbArK2B8VuwPWcv6/BC/i3io+xTo7QGRZ/z28Q==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-grpc@0.51.1':
- resolution: {integrity: sha512-coRTugFL7De/VNH/1NqPlxnfik87jS+jBXsny+Y/lMhXIA3x8t71IyL9ihuewkD+lNtIxIz6Y7Sq6kPuOqz5dQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-hapi@0.38.0':
- resolution: {integrity: sha512-ZcOqEuwuutTDYIjhDIStix22ECblG/i9pHje23QGs4Q4YS4RMaZ5hKCoQJxW88Z4K7T53rQkdISmoXFKDV8xMg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-hapi@0.40.0':
resolution: {integrity: sha512-8U/w7Ifumtd2bSN1OLaSwAAFhb9FyqWUki3lMMB0ds+1+HdSxYBe9aspEJEgvxAqOkrQnVniAPTEGf1pGM7SOw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-http@0.51.1':
- resolution: {integrity: sha512-6b3nZnFFEz/3xZ6w8bVxctPUWIPWiXuPQ725530JgxnN1cvYFd8CJ75PrHZNjynmzSSnqBkN3ef4R9N+RpMh8Q==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-http@0.52.1':
resolution: {integrity: sha512-dG/aevWhaP+7OLv4BQQSEKMJv8GyeOp3Wxl31NHqE8xo9/fYMfEljiZphUHIfyg4gnZ9swMyWjfOQs5GUQe54Q==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-ioredis@0.40.0':
- resolution: {integrity: sha512-Jv/fH7KhpWe4KBirsiqeUJIYrsdR2iu2l4nWhfOlRvaZ+zYIiLEzTQR6QhBbyRoAbU4OuYJzjWusOmmpGBnwng==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-ioredis@0.42.0':
resolution: {integrity: sha512-P11H168EKvBB9TUSasNDOGJCSkpT44XgoM6d3gRIWAa9ghLpYhl0uRkS8//MqPzcJVHr3h3RmfXIpiYLjyIZTw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-knex@0.36.1':
- resolution: {integrity: sha512-6bEuiI+yMf3D0+ZWZE2AKmXhIhBvZ0brdO/0A8lUqeqeS+sS4fTcjA1F2CclsCNxYWEgcs8o3QyQqPceBeVRlg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-koa@0.40.0':
- resolution: {integrity: sha512-dJc3H/bKMcgUYcQpLF+1IbmUKus0e5Fnn/+ru/3voIRHwMADT3rFSUcGLWSczkg68BCgz0vFWGDTvPtcWIFr7A==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-koa@0.42.0':
resolution: {integrity: sha512-H1BEmnMhho8o8HuNRq5zEI4+SIHDIglNB7BPKohZyWG4fWNuR7yM4GTlR01Syq21vODAS7z5omblScJD/eZdKw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-lru-memoizer@0.37.0':
- resolution: {integrity: sha512-dHLrn55qVWsHJQYdForPWPUWDk2HZ2jjzkT+WoQSqpYT1j4HxfoiLfBTF+I3EbEYFAJnDRmRAUfA6nU5GPdCLQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-memcached@0.36.0':
- resolution: {integrity: sha512-5efkT8ZfN8il5z+yfKYFGm2YR3mhlhaJoGfNOAylKE/6tUH3WDTTWaP7nrURtWGc+fuvDktcEch18Se8qsGS7w==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-mongodb@0.43.0':
- resolution: {integrity: sha512-bMKej7Y76QVUD3l55Q9YqizXybHUzF3pujsBFjqbZrRn2WYqtsDtTUlbCK7fvXNPwFInqZ2KhnTqd0gwo8MzaQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-mongodb@0.46.0':
resolution: {integrity: sha512-VF/MicZ5UOBiXrqBslzwxhN7TVqzu1/LN/QDpkskqM0Zm0aZ4CVRbUygL8d7lrjLn15x5kGIe8VsSphMfPJzlA==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-mongoose@0.38.1':
- resolution: {integrity: sha512-zaeiasdnRjXe6VhYCBMdkmAVh1S5MmXC/0spet+yqoaViGnYst/DOxPvhwg3yT4Yag5crZNWsVXnA538UjP6Ow==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-mongoose@0.40.0':
resolution: {integrity: sha512-niRi5ZUnkgzRhIGMOozTyoZIvJKNJyhijQI4nF4iFSb+FUx2v5fngfR+8XLmdQAO7xmsD8E5vEGdDVYVtKbZew==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-mysql2@0.38.1':
- resolution: {integrity: sha512-qkpHMgWSDTYVB1vlZ9sspf7l2wdS5DDq/rbIepDwX5BA0N0068JTQqh0CgAh34tdFqSCnWXIhcyOXC2TtRb0sg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-mysql2@0.40.0':
resolution: {integrity: sha512-0xfS1xcqUmY7WE1uWjlmI67Xg3QsSUlNT+AcXHeA4BDUPwZtWqF4ezIwLgpVZfHOnkAEheqGfNSWd1PIu3Wnfg==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-mysql@0.38.1':
- resolution: {integrity: sha512-+iBAawUaTfX/HAlvySwozx0C2B6LBfNPXX1W8Z2On1Uva33AGkw2UjL9XgIg1Pj4eLZ9R4EoJ/aFz+Xj4E/7Fw==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-mysql@0.40.0':
resolution: {integrity: sha512-d7ja8yizsOCNMYIJt5PH/fKZXjb/mS48zLROO4BzZTtDfhNCl2UM/9VIomP2qkGIFVouSJrGr/T00EzY7bPtKA==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-nestjs-core@0.37.1':
- resolution: {integrity: sha512-ebYQjHZEmGHWEALwwDGhSQVLBaurFnuLIkZD5igPXrt7ohfF4lc5/4al1LO+vKc0NHk8SJWStuRueT86ISA8Vg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-nestjs-core@0.39.0':
resolution: {integrity: sha512-mewVhEXdikyvIZoMIUry8eb8l3HUjuQjSjVbmLVTt4NQi35tkpnHQrG9bTRBrl3403LoWZ2njMPJyg4l6HfKvA==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-net@0.36.0':
- resolution: {integrity: sha512-rZlbSgwAJys8lpug+xIeAdO98ypYMAPVqrHqc4AHuUl5S4MULHEcjGLMZLoE/guEGO4xAQ5XUezpRFGM1SAnsg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-pg@0.41.0':
- resolution: {integrity: sha512-BSlhpivzBD77meQNZY9fS4aKgydA8AJBzv2dqvxXFy/Hq64b7HURgw/ztbmwFeYwdF5raZZUifiiNSMLpOJoSA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-pg@0.43.0':
resolution: {integrity: sha512-og23KLyoxdnAeFs1UWqzSonuCkePUzCX30keSYigIzJe/6WSYA8rnEI5lobcxPEzg+GcU06J7jzokuEHbjVJNw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-pino@0.39.0':
- resolution: {integrity: sha512-uA17F2iP77o3NculB63QD2zv3jkJ093Gfb0GxHLEqTIqpYs1ToJ53ybWwjJwqFByxk7GrliaxaxVtWC23PKzBg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-redis-4@0.39.0':
- resolution: {integrity: sha512-Zpfqfi83KeKgVQ0C2083GZPon3ZPYQ5E59v9FAbhubtOoUb9Rh7n111YD8FPW3sgx6JKp1odXmBmfQhWCaTOpQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation-redis-4@0.41.0':
resolution: {integrity: sha512-H7IfGTqW2reLXqput4yzAe8YpDC0fmVNal95GHMLOrS89W+qWUKIqxolSh63hJyfmwPSFwXASzj7wpSk8Az+Dg==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation-redis@0.39.1':
- resolution: {integrity: sha512-HUjTerD84jRJnSyDrRPqn6xQ7K91o9qLflRPZqzRvq0GRj5PMfc6TJ/z3q/ayWy/2Kzffhrp7HCIVp0u0TkgUg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-restify@0.38.0':
- resolution: {integrity: sha512-VYK47Z9GBaZX5MQLL7kZDdzQDdyUtHRD4J/GSr6kdwmIpdpUQXLsV3EnboeB8P+BlpucF57FyJKE8yWTOEMfnA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-router@0.37.0':
- resolution: {integrity: sha512-+OPcm7C9I5oPqnpStE+1WkdPWjRx0k5XKratxQmIDFZrmhRcqvMte3vrrzE/OBPg9iqh2tKrSe0y7+0sRfTJyQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-runtime-node@0.4.0':
- resolution: {integrity: sha512-/NOgUF5gf3T5c3GMyy6fnQxaVzbOf9j2xcetgymIIX2HSN3Gk7o64G7KDvwHwhaa20ZiF0QDLb3m4AT+tn9eRg==}
- engines: {node: '>=14.10.0'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-socket.io@0.39.0':
- resolution: {integrity: sha512-4J2ehk5mJyDT6j2yJCOuPxAjit5QB1Fwzhx0LID5jjvhI9LxzZIGDNAPTTHyghSiaRDeNMzceXKkkEQJkg2MNw==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-tedious@0.10.1':
- resolution: {integrity: sha512-maSXMxgS0szU52khQzAROV4nWr+3M8mZajMQOc3/7tYjo+Q3HlWAowOuagPvp4pwROK4x6oDaFYlY+ZSj1qjYA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
- '@opentelemetry/instrumentation-undici@0.2.0':
- resolution: {integrity: sha512-RH9WdVRtpnyp8kvya2RYqKsJouPxvHl7jKPsIfrbL8u2QCKloAGi0uEqDHoOS15ZRYPQTDXZ7d8jSpUgSQmvpA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.7.0
-
- '@opentelemetry/instrumentation-winston@0.37.0':
- resolution: {integrity: sha512-vOx55fxdNjo2XojJf8JN4jP7VVvQCh7UQzzQ2Q2FpGJpt8Z3EErKaY8xOBkOuJH0TtL/Q72rmIn9c+mRG46BxA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation@0.46.0':
resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/instrumentation@0.51.1':
- resolution: {integrity: sha512-JIrvhpgqY6437QIqToyozrUG1h5UhwHkaGK/WAX+fkrpyPtc+RO5FkRtUd9BH0MibabHHvqsnBGKfKVijbmp8w==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.3.0
-
'@opentelemetry/instrumentation@0.52.1':
resolution: {integrity: sha512-uXJbYU/5/MBHjMp1FqrILLRuiJCs3Ofk0MeRDk8g1S1gD47U8X3JnSwcMO1rtRo1x1a7zKaQHaoYu49p/4eSKw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
- '@opentelemetry/otlp-exporter-base@0.51.1':
- resolution: {integrity: sha512-UYlnOYyDdzo1Gw559EHCzru0RwhvuXCwoH8jGo9J4gO1TE58GjnEmIjomMsKBCym3qWNJfIQXw+9SZCV0DdQNg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/otlp-grpc-exporter-base@0.51.1':
- resolution: {integrity: sha512-ZAS+4pq8o7dsugGTwV9s6JMKSxi+guIHdn0acOv0bqj26e9pWDFx5Ky+bI0aY46uR9Y0JyXqY+KAEYM/SO3DFA==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/otlp-proto-exporter-base@0.51.1':
- resolution: {integrity: sha512-gxxxwfk0inDMb5DLeuxQ3L8TtptxSiTNHE4nnAJH34IQXAVRhXSXW1rK8PmDKDngRPIZ6J7ncUCjjIn8b+AgqQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/otlp-transformer@0.51.1':
- resolution: {integrity: sha512-OppYOXwV9LQqqtYUCywqoOqX/JT9LQ5/FMuPZ//eTkvuHdUC4ZMwz2c6uSoT2R90GWvvGnF1iEqTGyTT3xAt2Q==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.3.0 <1.9.0'
-
- '@opentelemetry/propagation-utils@0.30.10':
- resolution: {integrity: sha512-hhTW8pFp9PSyosYzzuUL9rdm7HF97w3OCyElufFHyUnYnKkCBbu8ne2LyF/KSdI/xZ81ubxWZs78hX4S7pLq5g==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/propagator-aws-xray@1.25.1':
- resolution: {integrity: sha512-soZQdO9EAROMwa9bL2C0VLadbrfRjSA9t7g6X8sL0X1B8V59pzOayYMyTW9qTECn9uuJV98A7qOnJm6KH6yk8w==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.10.0'
-
- '@opentelemetry/propagator-b3@1.24.1':
- resolution: {integrity: sha512-nda97ZwhpZKyUJTXqQuKzNhPMUgMLunbbGWn8kroBwegn+nh6OhtyGkrVQsQLNdVKJl0KeB5z0ZgeWszrYhwFw==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.9.0'
-
- '@opentelemetry/propagator-jaeger@1.24.1':
- resolution: {integrity: sha512-7bRBJn3FG1l195A1m+xXRHvgzAOBsfmRi9uZ5Da18oTh7BLmNDiA8+kpk51FpTsU1PCikPVpRDNPhKVB6lyzZg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.9.0'
-
'@opentelemetry/redis-common@0.36.2':
resolution: {integrity: sha512-faYX1N0gpLhej/6nyp6bgRjzAKXn5GOEMYY7YhciSfCoITAktLUtQ36d24QEWNA1/WA1y6qQunCe0OhHRkVl9g==}
engines: {node: '>=14'}
- '@opentelemetry/resource-detector-alibaba-cloud@0.28.10':
- resolution: {integrity: sha512-TZv/1Y2QCL6sJ+X9SsPPBXe4786bc/Qsw0hQXFsNTbJzDTGGUmOAlSZ2qPiuqAd4ZheUYfD+QA20IvAjUz9Hhg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/resource-detector-aws@1.5.1':
- resolution: {integrity: sha512-+IUh4gAwJf49vOJM6PIjmgOapRH5zr21ZpFnNU0QZmxRi52AXVhZN7A89pKW6GAQheWnVQLD7iUN87ieYt70tw==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/resource-detector-azure@0.2.9':
- resolution: {integrity: sha512-16Z6kyrmszoa7J1uj1kbSAgZuk11K07yEDj6fa3I9XBf8Debi8y4K8ex94kpxbCfEraWagXji3bCWvaq3k4dRg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/resource-detector-container@0.3.11':
- resolution: {integrity: sha512-22ndMDakxX+nuhAYwqsciexV8/w26JozRUV0FN9kJiqSWtA1b5dCVtlp3J6JivG5t8kDN9UF5efatNnVbqRT9Q==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/resource-detector-gcp@0.29.10':
- resolution: {integrity: sha512-rm2HKJ9lsdoVvrbmkr9dkOzg3Uk0FksXNxvNBgrCprM1XhMoJwThI5i0h/5sJypISUAJlEeJS6gn6nROj/NpkQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': ^1.0.0
-
- '@opentelemetry/resources@1.24.1':
- resolution: {integrity: sha512-cyv0MwAaPF7O86x5hk3NNgenMObeejZFLJJDVuSeSMIsknlsj3oOZzRv3qSzlwYomXsICfBeFFlxwHQte5mGXQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.9.0'
-
'@opentelemetry/resources@1.25.1':
resolution: {integrity: sha512-pkZT+iFYIZsVn6+GzM0kSX+u3MSLCY9md+lIJOoKl/P+gJFfxJte/60Usdp8Ce4rOs8GduUpSPNe1ddGyDT1sQ==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': '>=1.0.0 <1.10.0'
- '@opentelemetry/sdk-logs@0.51.1':
- resolution: {integrity: sha512-ULQQtl82b673PpZc5/0EtH4V+BrwVOgKJZEB7tYZnGTG3I98tQVk89S9/JSixomDr++F4ih+LSJTCqIKBz+MQQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.4.0 <1.9.0'
- '@opentelemetry/api-logs': '>=0.39.1'
-
- '@opentelemetry/sdk-metrics@1.24.1':
- resolution: {integrity: sha512-FrAqCbbGao9iKI+Mgh+OsC9+U2YMoXnlDHe06yH7dvavCKzE3S892dGtX54+WhSFVxHR/TMRVJiK/CV93GR0TQ==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.3.0 <1.9.0'
-
'@opentelemetry/sdk-metrics@1.25.1':
resolution: {integrity: sha512-9Mb7q5ioFL4E4dDrc4wC/A3NTHDat44v4I3p2pLPSxRvqUbDIQyMVr9uK+EU69+HWhlET1VaSrRzwdckWqY15Q==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': '>=1.3.0 <1.10.0'
- '@opentelemetry/sdk-node@0.51.1':
- resolution: {integrity: sha512-GgmNF9C+6esr8PIJxCqHw84rEOkYm6XdFWZ2+Wyc3qaUt92ACoN7uSw5iKNvaUq62W0xii1wsGxwHzyENtPP8w==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.3.0 <1.9.0'
-
- '@opentelemetry/sdk-trace-base@1.24.1':
- resolution: {integrity: sha512-zz+N423IcySgjihl2NfjBf0qw1RWe11XIAWVrTNOSSI6dtSPJiVom2zipFB2AEEtJWpv0Iz6DY6+TjnyTV5pWg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.9.0'
-
'@opentelemetry/sdk-trace-base@1.25.1':
resolution: {integrity: sha512-C8k4hnEbc5FamuZQ92nTOp8X/diCY56XUTnMiv9UTuJitCzaNNHAVsdm5+HLCdI8SLQsLWIrG38tddMxLVoftw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': '>=1.0.0 <1.10.0'
- '@opentelemetry/sdk-trace-node@1.24.1':
- resolution: {integrity: sha512-/FZX8uWaGIAwsDhqI8VvQ+qWtfMNlXjaFYGc+vmxgdRFppCSSIRwrPyIhJO1qx61okyYhoyxVEZAfoiNxrfJCg==}
- engines: {node: '>=14'}
- peerDependencies:
- '@opentelemetry/api': '>=1.0.0 <1.9.0'
-
- '@opentelemetry/semantic-conventions@1.24.1':
- resolution: {integrity: sha512-VkliWlS4/+GHLLW7J/rVBA00uXus1SWvwFvcUDxDwmFxYfg/2VI6ekwdXS28cjI8Qz2ky2BzG8OUHo+WeYIWqw==}
- engines: {node: '>=14'}
-
'@opentelemetry/semantic-conventions@1.25.1':
resolution: {integrity: sha512-ZDjMJJQRlyk8A1KZFCc+bCbsyrn1wTwdNt56F7twdfUfnHUZUq77/WfONCj8p72NZOyP7pNTdUWSTYC3GTbuuQ==}
engines: {node: '>=14'}
@@ -1306,36 +856,6 @@ packages:
'@prisma/instrumentation@5.17.0':
resolution: {integrity: sha512-c1Sle4ji8aasMcYfBBHFM56We4ljfenVtRmS8aY06BllS7SoU6SmJBwG7vil+GHiR0Yrh+t9iBwt4AY0Jr4KNQ==}
- '@protobufjs/aspromise@1.1.2':
- resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==}
-
- '@protobufjs/base64@1.1.2':
- resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==}
-
- '@protobufjs/codegen@2.0.4':
- resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==}
-
- '@protobufjs/eventemitter@1.1.0':
- resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==}
-
- '@protobufjs/fetch@1.1.0':
- resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==}
-
- '@protobufjs/float@1.0.2':
- resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==}
-
- '@protobufjs/inquire@1.1.0':
- resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==}
-
- '@protobufjs/path@1.1.2':
- resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==}
-
- '@protobufjs/pool@1.1.0':
- resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==}
-
- '@protobufjs/utf8@1.1.0':
- resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==}
-
'@puppeteer/browsers@2.2.3':
resolution: {integrity: sha512-bJ0UBsk0ESOs6RFcLXOt99a3yTDcOKlzfjad+rhFwdaG1Lu/Wzq58GHYCDTlZ9z6mldf4g+NTb+TXEfe0PpnsQ==}
engines: {node: '>=18'}
@@ -1423,10 +943,6 @@ packages:
engines: {node: '>= 10'}
hasBin: true
- '@sentry/core@8.13.0':
- resolution: {integrity: sha512-N9Qg4ZGxZWp8eb2eUUHVVKgjBLtFIjS805nG92s6yJmkvOpKm6mLtcUaT/iDf3Hta6nG+xRkhbE3r+Z4cbXG8w==}
- engines: {node: '>=14.18'}
-
'@sentry/core@8.26.0':
resolution: {integrity: sha512-g/tVmTZD4GNbLFf++hKJfBpcCAtduFEMLnbfa9iT/QEZjlmP+EzY+GsH9bafM5VsNe8DiOUp+kJKWtShzlVdBA==}
engines: {node: '>=14.18'}
@@ -1450,18 +966,10 @@ packages:
engines: {node: '>=14.18'}
hasBin: true
- '@sentry/types@8.13.0':
- resolution: {integrity: sha512-r63s/H5gvQnQM9tTGBXz2xErUbxZALh4e2Lg/1aHj4zIvGLBjA2z5qWsh6TEZYbpmgAyGShLDr6+rWeUVf9yBQ==}
- engines: {node: '>=14.18'}
-
'@sentry/types@8.26.0':
resolution: {integrity: sha512-zKmh6SWsJh630rpt7a9vP4Cm4m1C2gDTUqUiH565CajCL/4cePpNWYrNwalSqsOSL7B9OrczA1+n6a6XvND+ng==}
engines: {node: '>=14.18'}
- '@sentry/utils@8.13.0':
- resolution: {integrity: sha512-PxV0v9VbGWH9zP37P5w2msLUFDr287nYjoY2XVF+RSolyiTs1CQNI5ZMUO3o4MsSac/dpXxjyrZXQd72t/jRYA==}
- engines: {node: '>=14.18'}
-
'@sentry/utils@8.26.0':
resolution: {integrity: sha512-xvlPU9Hd2BlyT+FhWHGNwnxWqdVRk2AHnDtVcW4Ma0Ri5EwS+uy4Jeik5UkSv8C5RVb9VlxFmS8LN3I1MPJsLw==}
engines: {node: '>=14.18'}
@@ -1613,12 +1121,6 @@ packages:
'@tsconfig/recommended@1.0.6':
resolution: {integrity: sha512-0IKu9GHYF1NGTJiYgfWwqnOQSlnE9V9R7YohHNNf0/fj/SyOZWzdd06JFr0fLpg1Mqw0kGbYg8w5xdkSqLKM9g==}
- '@types/accepts@1.3.7':
- resolution: {integrity: sha512-Pay9fq2lM2wXPWbteBsRAGiWH2hig4ZE2asK+mm7kUzlxRTfL961rj89I6zV/E3PcIkDqyuBEcMxFT7rccugeQ==}
-
- '@types/aws-lambda@8.10.122':
- resolution: {integrity: sha512-vBkIh9AY22kVOCEKo5CJlyCgmSWvasC+SWUxL/x/vOwRobMpI/HG1xp/Ae3AqmSiZeLUbOhW0FCD3ZjqqUxmXw==}
-
'@types/babel__core@7.20.5':
resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==}
@@ -1634,24 +1136,18 @@ packages:
'@types/body-parser@1.19.5':
resolution: {integrity: sha512-fB3Zu92ucau0iQ0JMCFQE7b/dv8Ot07NI3KaZIkIUNXq82k4eBAqUaneXfleGY9JWskeS9y+u0nXMyspcuQrCg==}
- '@types/bunyan@1.8.9':
- resolution: {integrity: sha512-ZqS9JGpBxVOvsawzmVt30sP++gSQMTejCkIAQ3VdadOcRE8izTyW66hufvwLeH+YEGP6Js2AW7Gz+RMyvrEbmw==}
-
'@types/connect@3.4.36':
resolution: {integrity: sha512-P63Zd/JUGq+PdrM1lv0Wv5SBYeA2+CORvbrXbngriYY0jzLUWfQMQQxOhjONEz/wlHOAxOdY7CY65rgQdTjq2w==}
'@types/connect@3.4.38':
resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==}
- '@types/content-disposition@0.5.8':
- resolution: {integrity: sha512-QVSSvno3dE0MgO76pJhmv4Qyi/j0Yk9pBp0Y7TJ2Tlj+KCgJWY6qX7nnxCOLkZ3VYRSIk1WTxCvwUSdx6CCLdg==}
-
- '@types/cookies@0.9.0':
- resolution: {integrity: sha512-40Zk8qR147RABiQ7NQnBzWzDcjKzNrntB5BAmeGCb2p/MIyOE+4BVvc17wumsUqUw00bJYqoXFHYygQnEFh4/Q==}
-
'@types/cors@2.8.17':
resolution: {integrity: sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==}
+ '@types/escape-html@1.0.4':
+ resolution: {integrity: sha512-qZ72SFTgUAZ5a7Tj6kf2SHLetiH5S6f8G5frB2SPQ3EyF02kxdyBFf4Tz4banE3xCgGnKgWLt//a6VuYHKYJTg==}
+
'@types/express-serve-static-core@4.19.3':
resolution: {integrity: sha512-KOzM7MhcBFlmnlr/fzISFF5vGWVSvN6fTd4T+ExOt08bA/dA5kpSzY52nMsI1KDFmUREpJelPYyuslLRSjjgCg==}
@@ -1664,9 +1160,6 @@ packages:
'@types/graceful-fs@4.1.9':
resolution: {integrity: sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==}
- '@types/http-assert@1.5.5':
- resolution: {integrity: sha512-4+tE/lwdAahgZT1g30Jkdm9PzFRde0xwxBNUyRsCitRvCQB90iuA2uJYdUnhnANRcqGXaWOGY4FEoxeElNAK2g==}
-
'@types/http-errors@2.0.4':
resolution: {integrity: sha512-D0CFMMtydbJAegzOyHjtiKPLlvnm3iTZyZRSZoLq2mRhDdmLfIWOCYPfQJ4cu2erKghU++QvjcUjp/5h7hESpA==}
@@ -1682,21 +1175,6 @@ packages:
'@types/jest@29.5.12':
resolution: {integrity: sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==}
- '@types/keygrip@1.0.6':
- resolution: {integrity: sha512-lZuNAY9xeJt7Bx4t4dx0rYCDqGPW8RXhQZK1td7d4H6E9zYbLoOtjBvfwdTKpsyxQI/2jv+armjX/RW+ZNpXOQ==}
-
- '@types/koa-compose@3.2.8':
- resolution: {integrity: sha512-4Olc63RY+MKvxMwVknCUDhRQX1pFQoBZ/lXcRLP69PQkEpze/0cr8LNqJQe5NFb/b19DWi2a5bTi2VAlQzhJuA==}
-
- '@types/koa@2.14.0':
- resolution: {integrity: sha512-DTDUyznHGNHAl+wd1n0z1jxNajduyTh8R53xoewuerdBzGo6Ogj6F2299BFtrexJw4NtgjsI5SMPCmV9gZwGXA==}
-
- '@types/koa__router@12.0.3':
- resolution: {integrity: sha512-5YUJVv6NwM1z7m6FuYpKfNLTZ932Z6EF6xy2BbtpJSyn13DKNQEkXVffFVSnJHxvwwWh2SAeumpjAYUELqgjyw==}
-
- '@types/memcached@2.2.10':
- resolution: {integrity: sha512-AM9smvZN55Gzs2wRrqeMHVP7KE8KWgCJO/XL5yCly2xF6EKa4YlbpK+cLSAH4NG/Ah64HrlegmGqW8kYws7Vxg==}
-
'@types/mime@1.3.5':
resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==}
@@ -1712,6 +1190,9 @@ packages:
'@types/node@20.14.1':
resolution: {integrity: sha512-T2MzSGEu+ysB/FkWfqmhV3PLyQlowdptmmgD20C6QxsS8Fmv5SjpZ1ayXaEC0S21/h5UJ9iA6W/5vSNU5l00OA==}
+ '@types/pdf-parse@1.1.4':
+ resolution: {integrity: sha512-+gbBHbNCVGGYw1S9lAIIvrHW47UYOhMIFUsJcMkMrzy1Jf0vulBN3XQIjPgnoOXveMuHnF3b57fXROnY/Or7eg==}
+
'@types/pg-pool@2.0.4':
resolution: {integrity: sha512-qZAvkv1K3QbmHHFYSNRYPkRjOWRLBYrL4B9c+wG0GSVGBw0NtJwPcgx/DSddeDJvRGMHCEQ4VMEVfuJ/0gZ3XQ==}
@@ -1739,15 +1220,9 @@ packages:
'@types/shimmer@1.0.5':
resolution: {integrity: sha512-9Hp0ObzwwO57DpLFF0InUjUm/II8GmKAvzbefxQTihCb7KI6yc9yzf0nLc4mVdby5N4DRCgQM2wCup9KTieeww==}
- '@types/stack-trace@0.0.29':
- resolution: {integrity: sha512-TgfOX+mGY/NyNxJLIbDWrO9DjGoVSW9+aB8H2yy1fy32jsvxijhmyJI9fDFgvz3YP4lvJaq9DzdR/M1bOgVc9g==}
-
'@types/stack-utils@2.0.3':
resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==}
- '@types/tedious@4.0.14':
- resolution: {integrity: sha512-KHPsfX/FoVbUGbyYvk1q9MMQHLPeRZhRJZdO45Q4YjvFkv4hMNghCWTvy7rdKessBsmtz4euWCWAB6/tVpI1Iw==}
-
'@types/triple-beam@1.3.5':
resolution: {integrity: sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==}
@@ -1974,9 +1449,6 @@ packages:
resolution: {integrity: sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==}
engines: {node: '>=10.0.0'}
- bignumber.js@9.1.2:
- resolution: {integrity: sha512-2/mKyZH9K85bzOEfhXDBFZTGd1CTs+5IHpeFQo9luiBG7hghdC851Pj2WAhb6E3R6b9tZj/XKhbg4fum+Kepug==}
-
bin-links@4.0.4:
resolution: {integrity: sha512-cMtq4W5ZsEwcutJrVId+a/tjt8GSbS+h0oNkdl6+6rBuEv8Ot33Bevj5KPm40t309zuhVic8NjpuL42QCiJWWA==}
engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
@@ -1988,9 +1460,6 @@ packages:
binary-search@1.3.6:
resolution: {integrity: sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA==}
- bl@4.1.0:
- resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==}
-
bluebird@3.4.7:
resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==}
@@ -2118,22 +1587,10 @@ packages:
cjs-module-lexer@1.3.1:
resolution: {integrity: sha512-a3KdPAANPbNE4ZUv9h6LckSl9zLsYOP4MBmhIPkRaeyybt+r4UghLvq+xw/YwUcC1gqylCkL4rdVs3Lwupjm4Q==}
- cli-cursor@3.1.0:
- resolution: {integrity: sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw==}
- engines: {node: '>=8'}
-
- cli-spinners@2.9.2:
- resolution: {integrity: sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==}
- engines: {node: '>=6'}
-
cliui@8.0.1:
resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==}
engines: {node: '>=12'}
- clone@1.0.4:
- resolution: {integrity: sha512-JQHZ2QMW6l3aH/j6xCqQThY/9OH4D/9ls34cgkUBiEeocRTU04tHfKPBsUK1PqZCUQM7GiA0IIXJSuXHI64Kbg==}
- engines: {node: '>=0.8'}
-
cluster-key-slot@1.1.2:
resolution: {integrity: sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==}
engines: {node: '>=0.10.0'}
@@ -2165,6 +1622,15 @@ packages:
color-name@1.1.4:
resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
+ color-string@1.9.1:
+ resolution: {integrity: sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==}
+
+ color@3.2.1:
+ resolution: {integrity: sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==}
+
+ colorspace@1.1.4:
+ resolution: {integrity: sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==}
+
combined-stream@1.0.8:
resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==}
engines: {node: '>= 0.8'}
@@ -2323,17 +1789,10 @@ packages:
resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==}
engines: {node: '>=0.10.0'}
- defaults@1.0.4:
- resolution: {integrity: sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A==}
-
define-data-property@1.1.4:
resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==}
engines: {node: '>= 0.4'}
- define-lazy-prop@2.0.0:
- resolution: {integrity: sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==}
- engines: {node: '>=8'}
-
degenerator@5.0.1:
resolution: {integrity: sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==}
engines: {node: '>= 14'}
@@ -2440,6 +1899,9 @@ packages:
emoji-regex@9.2.2:
resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
+ enabled@2.0.0:
+ resolution: {integrity: sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==}
+
encodeurl@1.0.2:
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
engines: {node: '>= 0.8'}
@@ -2542,9 +2004,6 @@ packages:
resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==}
engines: {node: '>= 0.10.0'}
- extend@3.0.2:
- resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==}
-
extract-zip@2.0.1:
resolution: {integrity: sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==}
engines: {node: '>= 10.17.0'}
@@ -2597,6 +2056,9 @@ packages:
resolution: {integrity: sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==}
hasBin: true
+ fn.name@1.1.0:
+ resolution: {integrity: sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==}
+
follow-redirects@1.15.6:
resolution: {integrity: sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==}
engines: {node: '>=4.0'}
@@ -2651,14 +2113,6 @@ packages:
function-bind@1.1.2:
resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
- gaxios@6.7.0:
- resolution: {integrity: sha512-DSrkyMTfAnAm4ks9Go20QGOcXEyW/NmZhvTYBU2rb4afBB393WIMQPWPEDMl/k8xqiNN9HYq2zao3oWXsdl2Tg==}
- engines: {node: '>=14'}
-
- gcp-metadata@6.1.0:
- resolution: {integrity: sha512-Jh/AIwwgaxan+7ZUUmRLCjtchyDiqh4KjBJ5tW3plBZb5iL/BPcso8A5DlzeD9qlw0duCamnNdpFjxwaT0KyKg==}
- engines: {node: '>=14'}
-
generic-pool@3.9.0:
resolution: {integrity: sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==}
engines: {node: '>= 4'}
@@ -2831,9 +2285,6 @@ packages:
import-in-the-middle@1.7.1:
resolution: {integrity: sha512-1LrZPDtW+atAxH42S6288qyDFNQ2YCty+2mxEPRtfazH6Z5QwkaBSTS2ods7hnVJioF6rkRfNoA6A/MstpFXLg==}
- import-in-the-middle@1.7.4:
- resolution: {integrity: sha512-Lk+qzWmiQuRPPulGQeK5qq0v32k2bHnWrRPFgqyvhw7Kkov5L6MOLOIU3pcWeujc9W4q54Cp3Q2WV16eQkc7Bg==}
-
import-local@3.1.0:
resolution: {integrity: sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==}
engines: {node: '>=8'}
@@ -2871,6 +2322,9 @@ packages:
is-arrayish@0.2.1:
resolution: {integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==}
+ is-arrayish@0.3.2:
+ resolution: {integrity: sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==}
+
is-binary-path@2.1.0:
resolution: {integrity: sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==}
engines: {node: '>=8'}
@@ -2881,11 +2335,6 @@ packages:
is-core-module@2.13.1:
resolution: {integrity: sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==}
- is-docker@2.2.1:
- resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==}
- engines: {node: '>=8'}
- hasBin: true
-
is-extglob@2.1.1:
resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==}
engines: {node: '>=0.10.0'}
@@ -2902,10 +2351,6 @@ packages:
resolution: {integrity: sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==}
engines: {node: '>=0.10.0'}
- is-interactive@1.0.0:
- resolution: {integrity: sha512-2HvIEKRoqS62guEC+qBjpvRubdX910WCMuJTZ+I9yvqKU2/12eSL549HMwtabb4oupdj2sMP50k+XJfB/8JE6w==}
- engines: {node: '>=8'}
-
is-number@7.0.0:
resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==}
engines: {node: '>=0.12.0'}
@@ -2922,14 +2367,6 @@ packages:
resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==}
engines: {node: '>=8'}
- is-unicode-supported@0.1.0:
- resolution: {integrity: sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==}
- engines: {node: '>=10'}
-
- is-wsl@2.2.0:
- resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==}
- engines: {node: '>=8'}
-
isarray@1.0.0:
resolution: {integrity: sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==}
@@ -3138,9 +2575,6 @@ packages:
engines: {node: '>=4'}
hasBin: true
- json-bigint@1.0.0:
- resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==}
-
json-parse-even-better-errors@2.3.1:
resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==}
@@ -3151,9 +2585,6 @@ packages:
json-schema-traverse@1.0.0:
resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
- json-stringify-safe@5.0.1:
- resolution: {integrity: sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==}
-
json5@2.2.3:
resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==}
engines: {node: '>=6'}
@@ -3184,6 +2615,9 @@ packages:
koffi@2.9.0:
resolution: {integrity: sha512-KCsuJ2gM58n6bNdR2Z7gqsh/3TchxxQFbVgax2/UvAjRTgwNSYAJDx9E3jrkBP4jEDHWRCfE47Y2OG+/fiSvEw==}
+ kuler@2.0.0:
+ resolution: {integrity: sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==}
+
langchain@0.2.8:
resolution: {integrity: sha512-kb2IOMA71xH8e6EXFg0l4S+QSMC/c796pj1+7mPBkR91HHwoyHZhFRrBaZv4tV+Td+Ba91J2uEDBmySklZLpNQ==}
engines: {node: '>=18'}
@@ -3378,24 +2812,12 @@ packages:
resolution: {integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==}
engines: {node: '>=8'}
- lodash.camelcase@4.3.0:
- resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==}
-
lodash.defaults@4.2.0:
resolution: {integrity: sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==}
lodash.isarguments@3.1.0:
resolution: {integrity: sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==}
- lodash.isobject@3.0.2:
- resolution: {integrity: sha512-3/Qptq2vr7WeJbB4KHUSKlq8Pl7ASXi3UG6CMbBm8WRtXi8+GHm7mKaU3urfpSEzWe2wCIChs6/sdocUsTKJiA==}
-
- lodash.isplainobject@4.0.6:
- resolution: {integrity: sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==}
-
- lodash.isstring@4.0.1:
- resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==}
-
lodash.memoize@4.1.2:
resolution: {integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==}
@@ -3408,14 +2830,14 @@ packages:
lodash@4.17.21:
resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
- log-symbols@4.1.0:
- resolution: {integrity: sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==}
- engines: {node: '>=10'}
-
logform@2.6.0:
resolution: {integrity: sha512-1ulHeNPp6k/LD8H91o7VYFBng5i1BDE7HoKxVbZiGFidS1Rj65qcywLxX+pVfAPoQJEjRdvKcusKwOupHCVOVQ==}
engines: {node: '>= 12.0.0'}
+ logform@2.6.1:
+ resolution: {integrity: sha512-CdaO738xRapbKIMVn2m4F6KTj4j7ooJ8POVnebSgKo3KBz5axNXRAL7ZdRjIV6NOr2Uf4vjtRkxrFETOioCqSA==}
+ engines: {node: '>= 12.0.0'}
+
loglevel@1.9.1:
resolution: {integrity: sha512-hP3I3kCrDIMuRwAwHltphhDM1r8i55H33GgqjXbrisuJhF4kRhW1dNuxsRklp4bXl8DSdLaNLuiL4A/LWRfxvg==}
engines: {node: '>= 0.6.0'}
@@ -3423,9 +2845,6 @@ packages:
logsnag@1.0.0:
resolution: {integrity: sha512-HMzjh75OR5EVY7Be4Rw8TcDTIY5UPsrXF1HvQ6EzDi21x5cQcDzi4Ts0Y/ruPCbxKY2KG17YjeeTzErXFewFBg==}
- long@5.2.3:
- resolution: {integrity: sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==}
-
loose-envify@1.4.0:
resolution: {integrity: sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==}
hasBin: true
@@ -3467,6 +2886,11 @@ packages:
engines: {node: '>=12.0.0'}
hasBin: true
+ marked@14.1.2:
+ resolution: {integrity: sha512-f3r0yqpz31VXiDB/wj9GaOB0a2PRLQl6vJmXiFrniNwjkKdvakqJRULhjFKJpxOchlCRiG5fcacoUZY5Xa6PEQ==}
+ engines: {node: '>= 18'}
+ hasBin: true
+
md5@2.3.0:
resolution: {integrity: sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==}
@@ -3736,14 +3160,13 @@ packages:
once@1.4.0:
resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
+ one-time@1.0.0:
+ resolution: {integrity: sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==}
+
onetime@5.1.2:
resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
engines: {node: '>=6'}
- open@8.4.2:
- resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
- engines: {node: '>=12'}
-
openai@3.3.0:
resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==}
@@ -3772,10 +3195,6 @@ packages:
option@0.2.4:
resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==}
- ora@5.4.1:
- resolution: {integrity: sha512-5b6Y85tPxZZ7QytO+BQzysW31HJku27cRIlkbAXaNx+BdcVi+LlRFmVXzeF6a7JCwJpyw5c4b+YSVImQIrBpuQ==}
- engines: {node: '>=10'}
-
p-finally@1.0.0:
resolution: {integrity: sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==}
engines: {node: '>=4'}
@@ -3919,9 +3338,6 @@ packages:
resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==}
engines: {node: '>=8.6'}
- pino-abstract-transport@1.2.0:
- resolution: {integrity: sha512-Guhh8EZfPCfH+PMXAb6rKOjGQEoy0xlAIn+irODG5kgfYV+BQ0rGYYWTIel3P5mmyXqkYkPmdIkywsn6QKUR1Q==}
-
pirates@4.0.6:
resolution: {integrity: sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==}
engines: {node: '>= 6'}
@@ -3986,10 +3402,6 @@ packages:
proto-list@1.2.4:
resolution: {integrity: sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA==}
- protobufjs@7.3.2:
- resolution: {integrity: sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==}
- engines: {node: '>=12.0.0'}
-
proxy-addr@2.0.7:
resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
engines: {node: '>= 0.10'}
@@ -4139,10 +3551,6 @@ packages:
resolution: {integrity: sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==}
hasBin: true
- restore-cursor@3.1.0:
- resolution: {integrity: sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA==}
- engines: {node: '>=8'}
-
retry@0.13.1:
resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==}
engines: {node: '>= 4'}
@@ -4213,10 +3621,6 @@ packages:
resolution: {integrity: sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==}
engines: {node: '>= 0.8.0'}
- serialize-error@8.1.0:
- resolution: {integrity: sha512-3NnuWfM6vBYoy5gZFvHiYsVbafvI9vZv/+jlIigFn4oP4zjNPK3LhcY0xSCgeb1a5L8jO71Mit9LlNoi2UfDDQ==}
- engines: {node: '>=10'}
-
serve-static@1.15.0:
resolution: {integrity: sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==}
engines: {node: '>= 0.8.0'}
@@ -4259,6 +3663,9 @@ packages:
resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==}
engines: {node: '>=14'}
+ simple-swizzle@0.2.2:
+ resolution: {integrity: sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==}
+
simple-update-notifier@1.1.0:
resolution: {integrity: sha512-VpsrsJSUcJEseSbMHkrsrAVSdvVS5I96Qo1QAQ4FxQ9wXFcB+pjj7FB7/us9+GcgfW4ziHtYMc1J0PLczb55mg==}
engines: {node: '>=8.10.0'}
@@ -4422,6 +3829,9 @@ packages:
text-decoder@1.1.0:
resolution: {integrity: sha512-TmLJNj6UgX8xcUZo4UDStGQtDiTzF7BzWlzn9g7UWrjkpHr5uJTK1ld16wZ3LXb2vb6jH8qU89dW5whuMdXYdw==}
+ text-hex@1.0.0:
+ resolution: {integrity: sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==}
+
through@2.3.8:
resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
@@ -4509,10 +3919,6 @@ packages:
resolution: {integrity: sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==}
engines: {node: '>=4'}
- type-fest@0.20.2:
- resolution: {integrity: sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==}
- engines: {node: '>=10'}
-
type-fest@0.21.3:
resolution: {integrity: sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==}
engines: {node: '>=10'}
@@ -4615,9 +4021,6 @@ packages:
walker@1.0.8:
resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==}
- wcwidth@1.0.1:
- resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==}
-
web-streams-polyfill@3.3.3:
resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==}
engines: {node: '>= 8'}
@@ -4652,8 +4055,12 @@ packages:
engines: {node: '>= 8'}
hasBin: true
- winston-transport@4.7.0:
- resolution: {integrity: sha512-ajBj65K5I7denzer2IYW6+2bNIVqLGDHqDw3Ow8Ohh+vdW+rv4MZ6eiDvHoKhfJFZ2auyN8byXieDDJ96ViONg==}
+ winston-transport@4.8.0:
+ resolution: {integrity: sha512-qxSTKswC6llEMZKgCQdaWgDuMJQnhuvF5f2Nk3SNXc4byfQ+voo2mX1Px9dkNOuR8p0KAjfPG29PuYUSIb+vSA==}
+ engines: {node: '>= 12.0.0'}
+
+ winston@3.14.2:
+ resolution: {integrity: sha512-CO8cdpBB2yqzEf8v895L+GNKYJiEq8eKlHU38af3snQBQ+sdAIUepjMSguOIJC7ICbzm0ZI+Af2If4vIJrtmOg==}
engines: {node: '>= 12.0.0'}
wordnet-db@3.1.14:
@@ -5038,9 +4445,15 @@ snapshots:
dependencies:
'@jridgewell/trace-mapping': 0.3.9
+ '@dabh/diagnostics@2.0.3':
+ dependencies:
+ colorspace: 1.1.4
+ enabled: 2.0.0
+ kuler: 2.0.0
+
'@devil7softwares/pos@1.0.2': {}
- '@dqbd/tiktoken@1.0.15': {}
+ '@dqbd/tiktoken@1.0.16': {}
'@flydotio/dockerfile@0.4.11':
dependencies:
@@ -5050,83 +4463,6 @@ snapshots:
shell-quote: 1.8.1
yargs: 17.7.2
- '@grpc/grpc-js@1.10.10':
- dependencies:
- '@grpc/proto-loader': 0.7.13
- '@js-sdsl/ordered-map': 4.4.2
-
- '@grpc/proto-loader@0.7.13':
- dependencies:
- lodash.camelcase: 4.3.0
- long: 5.2.3
- protobufjs: 7.3.2
- yargs: 17.7.2
-
- '@hyperdx/instrumentation-exception@0.1.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@hyperdx/instrumentation-sentry-node': 0.1.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@sentry/core': 8.13.0
- '@sentry/types': 8.13.0
- '@sentry/utils': 8.13.0
- json-stringify-safe: 5.0.1
- shimmer: 1.2.1
- tslib: 2.6.3
- transitivePeerDependencies:
- - supports-color
-
- '@hyperdx/instrumentation-sentry-node@0.1.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- json-stringify-safe: 5.0.1
- shimmer: 1.2.1
- tslib: 2.6.3
- transitivePeerDependencies:
- - supports-color
-
- '@hyperdx/node-opentelemetry@0.8.1':
- dependencies:
- '@hyperdx/instrumentation-exception': 0.1.0(@opentelemetry/api@1.9.0)
- '@hyperdx/instrumentation-sentry-node': 0.1.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@opentelemetry/auto-instrumentations-node': 0.46.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-logs-otlp-http': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-metrics-otlp-proto': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-trace-otlp-proto': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-http': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-runtime-node': 0.4.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-metrics': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-node': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- cli-spinners: 2.9.2
- json-stringify-safe: 5.0.1
- lodash.isobject: 3.0.2
- lodash.isplainobject: 4.0.6
- lodash.isstring: 4.0.1
- node-fetch: 2.7.0
- open: 8.4.2
- ora: 5.4.1
- pino-abstract-transport: 1.2.0
- semver: 7.6.2
- shimmer: 1.2.1
- tslib: 2.6.3
- winston-transport: 4.7.0
- transitivePeerDependencies:
- - encoding
- - supports-color
-
'@ioredis/commands@1.2.0': {}
'@isaacs/cliui@8.0.2':
@@ -5336,8 +4672,6 @@ snapshots:
'@jridgewell/resolve-uri': 3.1.2
'@jridgewell/sourcemap-codec': 1.4.15
- '@js-sdsl/ordered-map@4.4.2': {}
-
'@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
dependencies:
ansi-styles: 5.2.0
@@ -5375,39 +4709,12 @@ snapshots:
- langchain
- openai
- '@logtail/core@0.4.21':
- dependencies:
- '@logtail/tools': 0.4.21
- '@logtail/types': 0.4.20
- serialize-error: 8.1.0
-
- '@logtail/node@0.4.21':
- dependencies:
- '@logtail/core': 0.4.21
- '@logtail/types': 0.4.20
- '@msgpack/msgpack': 2.8.0
- '@types/stack-trace': 0.0.29
- cross-fetch: 3.1.8
- minimatch: 3.1.2
- serialize-error: 8.1.0
- stack-trace: 0.0.10
- transitivePeerDependencies:
- - encoding
-
- '@logtail/tools@0.4.21':
- dependencies:
- '@logtail/types': 0.4.20
-
- '@logtail/types@0.4.20': {}
-
'@mixmark-io/domino@2.2.0': {}
'@mongodb-js/saslprep@1.1.7':
dependencies:
sparse-bitfield: 3.0.3
- '@msgpack/msgpack@2.8.0': {}
-
'@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3':
optional: true
@@ -5434,210 +4741,21 @@ snapshots:
'@one-ini/wasm@0.1.1': {}
- '@opentelemetry/api-logs@0.51.1':
- dependencies:
- '@opentelemetry/api': 1.9.0
-
'@opentelemetry/api-logs@0.52.1':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/api@1.9.0': {}
- '@opentelemetry/auto-instrumentations-node@0.46.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-amqplib': 0.37.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-aws-lambda': 0.41.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-aws-sdk': 0.41.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-bunyan': 0.38.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-cassandra-driver': 0.38.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-connect': 0.36.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-cucumber': 0.6.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-dataloader': 0.9.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-dns': 0.36.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-express': 0.39.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-fastify': 0.36.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-fs': 0.12.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-generic-pool': 0.36.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-graphql': 0.40.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-grpc': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-hapi': 0.38.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-http': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-ioredis': 0.40.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-knex': 0.36.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-koa': 0.40.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-lru-memoizer': 0.37.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-memcached': 0.36.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-mongodb': 0.43.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-mongoose': 0.38.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-mysql': 0.38.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-mysql2': 0.38.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-nestjs-core': 0.37.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-net': 0.36.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-pg': 0.41.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-pino': 0.39.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-redis': 0.39.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-redis-4': 0.39.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-restify': 0.38.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-router': 0.37.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-socket.io': 0.39.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-tedious': 0.10.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-undici': 0.2.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation-winston': 0.37.0(@opentelemetry/api@1.9.0)
- '@opentelemetry/resource-detector-alibaba-cloud': 0.28.10(@opentelemetry/api@1.9.0)
- '@opentelemetry/resource-detector-aws': 1.5.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resource-detector-azure': 0.2.9(@opentelemetry/api@1.9.0)
- '@opentelemetry/resource-detector-container': 0.3.11(@opentelemetry/api@1.9.0)
- '@opentelemetry/resource-detector-gcp': 0.29.10(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-node': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - encoding
- - supports-color
-
- '@opentelemetry/context-async-hooks@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
-
'@opentelemetry/context-async-hooks@1.25.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
- '@opentelemetry/core@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/semantic-conventions': 1.24.1
-
'@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/semantic-conventions': 1.25.1
- '@opentelemetry/exporter-logs-otlp-http@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/exporter-metrics-otlp-http@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/exporter-metrics-otlp-proto@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-metrics-otlp-http': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-proto-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/exporter-trace-otlp-grpc@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@grpc/grpc-js': 1.10.10
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-grpc-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/exporter-trace-otlp-http@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/exporter-trace-otlp-proto@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-proto-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/exporter-zipkin@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.24.1
-
- '@opentelemetry/instrumentation-amqplib@0.37.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-aws-lambda@0.41.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/propagator-aws-xray': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@types/aws-lambda': 8.10.122
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-aws-sdk@0.41.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/propagation-utils': 0.30.10(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-bunyan@0.38.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@types/bunyan': 1.8.9
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-cassandra-driver@0.38.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-connect@0.36.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@types/connect': 3.4.36
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-connect@0.38.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5648,39 +4766,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-cucumber@0.6.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-dataloader@0.9.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-dns@0.36.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- semver: 7.6.2
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-express@0.39.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-express@0.41.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5690,15 +4775,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-fastify@0.36.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-fastify@0.38.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5708,14 +4784,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-fs@0.12.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-fs@0.14.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5724,21 +4792,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-generic-pool@0.36.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-graphql@0.40.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-graphql@0.42.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5746,23 +4799,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-grpc@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.24.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-hapi@0.38.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-hapi@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5772,16 +4808,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-http@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.24.1
- semver: 7.6.2
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-http@0.52.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5792,15 +4818,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-ioredis@0.40.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/redis-common': 0.36.2
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-ioredis@0.42.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5810,25 +4827,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-knex@0.36.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-koa@0.40.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@types/koa': 2.14.0
- '@types/koa__router': 12.0.3
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-koa@0.42.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5838,31 +4836,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-lru-memoizer@0.37.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-memcached@0.36.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@types/memcached': 2.2.10
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-mongodb@0.43.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-metrics': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-mongodb@0.46.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5872,15 +4845,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-mongoose@0.38.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-mongoose@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5890,15 +4854,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-mysql2@0.38.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-mysql2@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5908,15 +4863,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-mysql@0.38.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@types/mysql': 2.15.22
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-mysql@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5926,14 +4872,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-nestjs-core@0.37.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-nestjs-core@0.39.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5942,25 +4880,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-net@0.36.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-pg@0.41.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0)
- '@types/pg': 8.6.1
- '@types/pg-pool': 2.0.4
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-pg@0.43.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5972,22 +4891,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-pino@0.39.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-redis-4@0.39.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/redis-common': 0.36.2
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation-redis-4@0.41.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -5997,72 +4900,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/instrumentation-redis@0.39.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/redis-common': 0.36.2
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-restify@0.38.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-router@0.37.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-runtime-node@0.4.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-socket.io@0.39.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-tedious@0.10.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- '@types/tedious': 4.0.14
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-undici@0.2.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/instrumentation-winston@0.37.0(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -6075,18 +4912,6 @@ snapshots:
- supports-color
optional: true
- '@opentelemetry/instrumentation@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@types/shimmer': 1.0.5
- import-in-the-middle: 1.7.4
- require-in-the-middle: 7.3.0
- semver: 7.6.2
- shimmer: 1.2.1
- transitivePeerDependencies:
- - supports-color
-
'@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -6099,119 +4924,14 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@opentelemetry/otlp-exporter-base@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/otlp-grpc-exporter-base@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@grpc/grpc-js': 1.10.10
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- protobufjs: 7.3.2
-
- '@opentelemetry/otlp-proto-exporter-base@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0)
- protobufjs: 7.3.2
-
- '@opentelemetry/otlp-transformer@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/propagation-utils@0.30.10(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
-
- '@opentelemetry/propagator-aws-xray@1.25.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/propagator-b3@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/propagator-jaeger@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
-
'@opentelemetry/redis-common@0.36.2': {}
- '@opentelemetry/resource-detector-alibaba-cloud@0.28.10(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
-
- '@opentelemetry/resource-detector-aws@1.5.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
-
- '@opentelemetry/resource-detector-azure@0.2.9(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
-
- '@opentelemetry/resource-detector-container@0.3.11(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
-
- '@opentelemetry/resource-detector-gcp@0.29.10(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.25.1
- gcp-metadata: 6.1.0
- transitivePeerDependencies:
- - encoding
- - supports-color
-
- '@opentelemetry/resources@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.24.1
-
'@opentelemetry/resources@1.25.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.25.1
- '@opentelemetry/sdk-logs@0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
-
- '@opentelemetry/sdk-metrics@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- lodash.merge: 4.6.2
-
'@opentelemetry/sdk-metrics@1.25.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -6219,32 +4939,6 @@ snapshots:
'@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
lodash.merge: 4.6.2
- '@opentelemetry/sdk-node@0.51.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/api-logs': 0.51.1
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-trace-otlp-grpc': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-trace-otlp-http': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-trace-otlp-proto': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/exporter-zipkin': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-node': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.24.1
- transitivePeerDependencies:
- - supports-color
-
- '@opentelemetry/sdk-trace-base@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/semantic-conventions': 1.24.1
-
'@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@@ -6252,18 +4946,6 @@ snapshots:
'@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.25.1
- '@opentelemetry/sdk-trace-node@1.24.1(@opentelemetry/api@1.9.0)':
- dependencies:
- '@opentelemetry/api': 1.9.0
- '@opentelemetry/context-async-hooks': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/propagator-b3': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/propagator-jaeger': 1.24.1(@opentelemetry/api@1.9.0)
- '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0)
- semver: 7.6.2
-
- '@opentelemetry/semantic-conventions@1.24.1': {}
-
'@opentelemetry/semantic-conventions@1.25.1': {}
'@opentelemetry/sql-common@0.40.1(@opentelemetry/api@1.9.0)':
@@ -6290,29 +4972,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@protobufjs/aspromise@1.1.2': {}
-
- '@protobufjs/base64@1.1.2': {}
-
- '@protobufjs/codegen@2.0.4': {}
-
- '@protobufjs/eventemitter@1.1.0': {}
-
- '@protobufjs/fetch@1.1.0':
- dependencies:
- '@protobufjs/aspromise': 1.1.2
- '@protobufjs/inquire': 1.1.0
-
- '@protobufjs/float@1.0.2': {}
-
- '@protobufjs/inquire@1.1.0': {}
-
- '@protobufjs/path@1.1.2': {}
-
- '@protobufjs/pool@1.1.0': {}
-
- '@protobufjs/utf8@1.1.0': {}
-
'@puppeteer/browsers@2.2.3':
dependencies:
debug: 4.3.4
@@ -6405,11 +5064,6 @@ snapshots:
- encoding
- supports-color
- '@sentry/core@8.13.0':
- dependencies:
- '@sentry/types': 8.13.0
- '@sentry/utils': 8.13.0
-
'@sentry/core@8.26.0':
dependencies:
'@sentry/types': 8.26.0
@@ -6473,14 +5127,8 @@ snapshots:
transitivePeerDependencies:
- supports-color
- '@sentry/types@8.13.0': {}
-
'@sentry/types@8.26.0': {}
- '@sentry/utils@8.13.0':
- dependencies:
- '@sentry/types': 8.13.0
-
'@sentry/utils@8.26.0':
dependencies:
'@sentry/types': 8.26.0
@@ -6631,12 +5279,6 @@ snapshots:
'@tsconfig/recommended@1.0.6': {}
- '@types/accepts@1.3.7':
- dependencies:
- '@types/node': 20.14.1
-
- '@types/aws-lambda@8.10.122': {}
-
'@types/babel__core@7.20.5':
dependencies:
'@babel/parser': 7.24.6
@@ -6663,10 +5305,6 @@ snapshots:
'@types/connect': 3.4.38
'@types/node': 20.14.1
- '@types/bunyan@1.8.9':
- dependencies:
- '@types/node': 20.14.1
-
'@types/connect@3.4.36':
dependencies:
'@types/node': 20.14.1
@@ -6675,19 +5313,12 @@ snapshots:
dependencies:
'@types/node': 20.14.1
- '@types/content-disposition@0.5.8': {}
-
- '@types/cookies@0.9.0':
- dependencies:
- '@types/connect': 3.4.38
- '@types/express': 4.17.21
- '@types/keygrip': 1.0.6
- '@types/node': 20.14.1
-
'@types/cors@2.8.17':
dependencies:
'@types/node': 20.14.1
+ '@types/escape-html@1.0.4': {}
+
'@types/express-serve-static-core@4.19.3':
dependencies:
'@types/node': 20.14.1
@@ -6712,8 +5343,6 @@ snapshots:
dependencies:
'@types/node': 20.14.1
- '@types/http-assert@1.5.5': {}
-
'@types/http-errors@2.0.4': {}
'@types/istanbul-lib-coverage@2.0.6': {}
@@ -6731,31 +5360,6 @@ snapshots:
expect: 29.7.0
pretty-format: 29.7.0
- '@types/keygrip@1.0.6': {}
-
- '@types/koa-compose@3.2.8':
- dependencies:
- '@types/koa': 2.14.0
-
- '@types/koa@2.14.0':
- dependencies:
- '@types/accepts': 1.3.7
- '@types/content-disposition': 0.5.8
- '@types/cookies': 0.9.0
- '@types/http-assert': 1.5.5
- '@types/http-errors': 2.0.4
- '@types/keygrip': 1.0.6
- '@types/koa-compose': 3.2.8
- '@types/node': 20.14.1
-
- '@types/koa__router@12.0.3':
- dependencies:
- '@types/koa': 2.14.0
-
- '@types/memcached@2.2.10':
- dependencies:
- '@types/node': 20.14.1
-
'@types/mime@1.3.5': {}
'@types/mysql@2.15.22':
@@ -6775,6 +5379,8 @@ snapshots:
dependencies:
undici-types: 5.26.5
+ '@types/pdf-parse@1.1.4': {}
+
'@types/pg-pool@2.0.4':
dependencies:
'@types/pg': 8.6.1
@@ -6806,14 +5412,8 @@ snapshots:
'@types/shimmer@1.0.5': {}
- '@types/stack-trace@0.0.29': {}
-
'@types/stack-utils@2.0.3': {}
- '@types/tedious@4.0.14':
- dependencies:
- '@types/node': 20.14.1
-
'@types/triple-beam@1.3.5': {}
'@types/uuid@9.0.8': {}
@@ -7064,8 +5664,6 @@ snapshots:
basic-ftp@5.0.5: {}
- bignumber.js@9.1.2: {}
-
bin-links@4.0.4:
dependencies:
cmd-shim: 6.0.3
@@ -7077,12 +5675,6 @@ snapshots:
binary-search@1.3.6: {}
- bl@4.1.0:
- dependencies:
- buffer: 5.7.1
- inherits: 2.0.4
- readable-stream: 3.6.2
-
bluebird@3.4.7: {}
body-parser@1.20.2:
@@ -7243,20 +5835,12 @@ snapshots:
cjs-module-lexer@1.3.1: {}
- cli-cursor@3.1.0:
- dependencies:
- restore-cursor: 3.1.0
-
- cli-spinners@2.9.2: {}
-
cliui@8.0.1:
dependencies:
string-width: 4.2.3
strip-ansi: 6.0.1
wrap-ansi: 7.0.0
- clone@1.0.4: {}
-
cluster-key-slot@1.1.2: {}
cmd-shim@6.0.3: {}
@@ -7279,6 +5863,21 @@ snapshots:
color-name@1.1.4: {}
+ color-string@1.9.1:
+ dependencies:
+ color-name: 1.1.4
+ simple-swizzle: 0.2.2
+
+ color@3.2.1:
+ dependencies:
+ color-convert: 1.9.3
+ color-string: 1.9.1
+
+ colorspace@1.1.4:
+ dependencies:
+ color: 3.2.1
+ text-hex: 1.0.0
+
combined-stream@1.0.8:
dependencies:
delayed-stream: 1.0.0
@@ -7407,18 +6006,12 @@ snapshots:
deepmerge@4.3.1: {}
- defaults@1.0.4:
- dependencies:
- clone: 1.0.4
-
define-data-property@1.1.4:
dependencies:
es-define-property: 1.0.0
es-errors: 1.3.0
gopd: 1.0.1
- define-lazy-prop@2.0.0: {}
-
degenerator@5.0.1:
dependencies:
ast-types: 0.13.4
@@ -7508,6 +6101,8 @@ snapshots:
emoji-regex@9.2.2: {}
+ enabled@2.0.0: {}
+
encodeurl@1.0.2: {}
end-of-stream@1.4.4:
@@ -7628,8 +6223,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- extend@3.0.2: {}
-
extract-zip@2.0.1:
dependencies:
debug: 4.3.4
@@ -7692,6 +6285,8 @@ snapshots:
flat@5.0.2: {}
+ fn.name@1.1.0: {}
+
follow-redirects@1.15.6: {}
foreground-child@3.2.1:
@@ -7740,25 +6335,6 @@ snapshots:
function-bind@1.1.2: {}
- gaxios@6.7.0:
- dependencies:
- extend: 3.0.2
- https-proxy-agent: 7.0.5
- is-stream: 2.0.1
- node-fetch: 2.7.0
- uuid: 10.0.0
- transitivePeerDependencies:
- - encoding
- - supports-color
-
- gcp-metadata@6.1.0:
- dependencies:
- gaxios: 6.7.0
- json-bigint: 1.0.0
- transitivePeerDependencies:
- - encoding
- - supports-color
-
generic-pool@3.9.0: {}
gensync@1.0.0-beta.2: {}
@@ -7978,13 +6554,6 @@ snapshots:
module-details-from-path: 1.0.3
optional: true
- import-in-the-middle@1.7.4:
- dependencies:
- acorn: 8.12.0
- acorn-import-attributes: 1.9.5(acorn@8.12.0)
- cjs-module-lexer: 1.3.1
- module-details-from-path: 1.0.3
-
import-local@3.1.0:
dependencies:
pkg-dir: 4.2.0
@@ -8026,6 +6595,8 @@ snapshots:
is-arrayish@0.2.1: {}
+ is-arrayish@0.3.2: {}
+
is-binary-path@2.1.0:
dependencies:
binary-extensions: 2.3.0
@@ -8036,8 +6607,6 @@ snapshots:
dependencies:
hasown: 2.0.2
- is-docker@2.2.1: {}
-
is-extglob@2.1.1: {}
is-fullwidth-code-point@3.0.0: {}
@@ -8048,8 +6617,6 @@ snapshots:
dependencies:
is-extglob: 2.1.1
- is-interactive@1.0.0: {}
-
is-number@7.0.0: {}
is-plain-obj@2.1.0: {}
@@ -8058,12 +6625,6 @@ snapshots:
is-stream@2.0.1: {}
- is-unicode-supported@0.1.0: {}
-
- is-wsl@2.2.0:
- dependencies:
- is-docker: 2.2.1
-
isarray@1.0.0: {}
isexe@2.0.0: {}
@@ -8476,18 +7037,12 @@ snapshots:
jsesc@2.5.2: {}
- json-bigint@1.0.0:
- dependencies:
- bignumber.js: 9.1.2
-
json-parse-even-better-errors@2.3.1: {}
json-schema-to-zod@2.3.0: {}
json-schema-traverse@1.0.0: {}
- json-stringify-safe@5.0.1: {}
-
json5@2.2.3: {}
jsonfile@6.1.0:
@@ -8513,6 +7068,8 @@ snapshots:
koffi@2.9.0: {}
+ kuler@2.0.0: {}
+
langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
@@ -8579,18 +7136,10 @@ snapshots:
dependencies:
p-locate: 4.1.0
- lodash.camelcase@4.3.0: {}
-
lodash.defaults@4.2.0: {}
lodash.isarguments@3.1.0: {}
- lodash.isobject@3.0.2: {}
-
- lodash.isplainobject@4.0.6: {}
-
- lodash.isstring@4.0.1: {}
-
lodash.memoize@4.1.2: {}
lodash.merge@4.6.2: {}
@@ -8599,11 +7148,6 @@ snapshots:
lodash@4.17.21: {}
- log-symbols@4.1.0:
- dependencies:
- chalk: 4.1.2
- is-unicode-supported: 0.1.0
-
logform@2.6.0:
dependencies:
'@colors/colors': 1.6.0
@@ -8613,6 +7157,15 @@ snapshots:
safe-stable-stringify: 2.4.3
triple-beam: 1.4.1
+ logform@2.6.1:
+ dependencies:
+ '@colors/colors': 1.6.0
+ '@types/triple-beam': 1.3.5
+ fecha: 4.2.3
+ ms: 2.1.3
+ safe-stable-stringify: 2.4.3
+ triple-beam: 1.4.1
+
loglevel@1.9.1: {}
logsnag@1.0.0:
@@ -8621,8 +7174,6 @@ snapshots:
transitivePeerDependencies:
- encoding
- long@5.2.3: {}
-
loose-envify@1.4.0:
dependencies:
js-tokens: 4.0.0
@@ -8670,6 +7221,8 @@ snapshots:
underscore: 1.13.6
xmlbuilder: 10.1.1
+ marked@14.1.2: {}
+
md5@2.3.0:
dependencies:
charenc: 0.0.2
@@ -8937,16 +7490,14 @@ snapshots:
dependencies:
wrappy: 1.0.2
+ one-time@1.0.0:
+ dependencies:
+ fn.name: 1.1.0
+
onetime@5.1.2:
dependencies:
mimic-fn: 2.1.0
- open@8.4.2:
- dependencies:
- define-lazy-prop: 2.0.0
- is-docker: 2.2.1
- is-wsl: 2.2.0
-
openai@3.3.0:
dependencies:
axios: 0.26.1
@@ -8985,18 +7536,6 @@ snapshots:
option@0.2.4: {}
- ora@5.4.1:
- dependencies:
- bl: 4.1.0
- chalk: 4.1.2
- cli-cursor: 3.1.0
- cli-spinners: 2.9.2
- is-interactive: 1.0.0
- is-unicode-supported: 0.1.0
- log-symbols: 4.1.0
- strip-ansi: 6.0.1
- wcwidth: 1.0.1
-
p-finally@1.0.0: {}
p-limit@2.3.0:
@@ -9148,11 +7687,6 @@ snapshots:
picomatch@2.3.1: {}
- pino-abstract-transport@1.2.0:
- dependencies:
- readable-stream: 4.5.2
- split2: 4.2.0
-
pirates@4.0.6: {}
pkg-dir@4.2.0:
@@ -9221,21 +7755,6 @@ snapshots:
proto-list@1.2.4: {}
- protobufjs@7.3.2:
- dependencies:
- '@protobufjs/aspromise': 1.1.2
- '@protobufjs/base64': 1.1.2
- '@protobufjs/codegen': 2.0.4
- '@protobufjs/eventemitter': 1.1.0
- '@protobufjs/fetch': 1.1.0
- '@protobufjs/float': 1.0.2
- '@protobufjs/inquire': 1.1.0
- '@protobufjs/path': 1.1.2
- '@protobufjs/pool': 1.1.0
- '@protobufjs/utf8': 1.1.0
- '@types/node': 20.14.1
- long: 5.2.3
-
proxy-addr@2.0.7:
dependencies:
forwarded: 0.2.0
@@ -9417,11 +7936,6 @@ snapshots:
path-parse: 1.0.7
supports-preserve-symlinks-flag: 1.0.0
- restore-cursor@3.1.0:
- dependencies:
- onetime: 5.1.2
- signal-exit: 3.0.7
-
retry@0.13.1: {}
rimraf@5.0.7:
@@ -9490,10 +8004,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
- serialize-error@8.1.0:
- dependencies:
- type-fest: 0.20.2
-
serve-static@1.15.0:
dependencies:
encodeurl: 1.0.2
@@ -9539,6 +8049,10 @@ snapshots:
signal-exit@4.1.0: {}
+ simple-swizzle@0.2.2:
+ dependencies:
+ is-arrayish: 0.3.2
+
simple-update-notifier@1.1.0:
dependencies:
semver: 7.0.0
@@ -9725,6 +8239,8 @@ snapshots:
dependencies:
b4a: 1.6.6
+ text-hex@1.0.0: {}
+
through@2.3.8: {}
tmpl@1.0.5: {}
@@ -9795,8 +8311,6 @@ snapshots:
type-detect@4.0.8: {}
- type-fest@0.20.2: {}
-
type-fest@0.21.3: {}
type-is@1.6.18:
@@ -9880,10 +8394,6 @@ snapshots:
dependencies:
makeerror: 1.0.12
- wcwidth@1.0.1:
- dependencies:
- defaults: 1.0.4
-
web-streams-polyfill@3.3.3: {}
web-streams-polyfill@4.0.0-beta.3: {}
@@ -9912,12 +8422,26 @@ snapshots:
dependencies:
isexe: 2.0.0
- winston-transport@4.7.0:
+ winston-transport@4.8.0:
dependencies:
- logform: 2.6.0
- readable-stream: 3.6.2
+ logform: 2.6.1
+ readable-stream: 4.5.2
triple-beam: 1.4.1
+ winston@3.14.2:
+ dependencies:
+ '@colors/colors': 1.6.0
+ '@dabh/diagnostics': 2.0.3
+ async: 3.2.5
+ is-stream: 2.0.1
+ logform: 2.6.0
+ one-time: 1.0.0
+ readable-stream: 3.6.2
+ safe-stable-stringify: 2.4.3
+ stack-trace: 0.0.10
+ triple-beam: 1.4.1
+ winston-transport: 4.8.0
+
wordnet-db@3.1.14: {}
wordpos@2.1.0:
diff --git a/apps/api/requests.http b/apps/api/requests.http
index 3e7bd2b7..809bae7b 100644
--- a/apps/api/requests.http
+++ b/apps/api/requests.http
@@ -1,15 +1,15 @@
### Crawl Website
POST http://localhost:3002/v0/scrape HTTP/1.1
-Authorization: Bearer fc-
+Authorization: Bearer {{$dotenv TEST_API_KEY}}
content-type: application/json
{
- "url":"corterix.com"
+ "url":"firecrawl.dev"
}
### Check Job Status
GET http://localhost:3002/v1/crawl/1dd0f924-a36f-4b96-94ea-32ed954dac67 HTTP/1.1
-Authorization: Bearer fc-
+Authorization: Bearer {{$dotenv TEST_API_KEY}}
### Check Job Status
@@ -18,7 +18,7 @@ GET http://localhost:3002/v0/jobs/active HTTP/1.1
### Scrape Website
POST http://localhost:3002/v0/crawl HTTP/1.1
-Authorization: Bearer fc-
+Authorization: Bearer {{$dotenv TEST_API_KEY}}
content-type: application/json
{
@@ -45,7 +45,7 @@ content-type: application/json
### Scrape Website
POST http://localhost:3002/v0/scrape HTTP/1.1
-Authorization: Bearer
+Authorization: Bearer {{$dotenv TEST_API_KEY}}
content-type: application/json
{
@@ -56,12 +56,12 @@ content-type: application/json
### Check Job Status
GET http://localhost:3002/v0/crawl/status/a6053912-d602-4709-841f-3d2cb46fea0a HTTP/1.1
-Authorization: Bearer
+Authorization: Bearer {{$dotenv TEST_API_KEY}}
### Get Job Result
POST https://api.firecrawl.dev/v0/crawl HTTP/1.1
-Authorization: Bearer
+Authorization: Bearer {{$dotenv TEST_API_KEY}}
content-type: application/json
{
@@ -70,7 +70,7 @@ content-type: application/json
### Check Job Status
GET https://api.firecrawl.dev/v0/crawl/status/cfcb71ac-23a3-4da5-bd85-d4e58b871d66
-Authorization: Bearer
+Authorization: Bearer {{$dotenv TEST_API_KEY}}
### Get Active Jobs Count
GET http://localhost:3002/serverHealthCheck
diff --git a/apps/api/sharedLibs/go-html-to-md/.gitignore b/apps/api/sharedLibs/go-html-to-md/.gitignore
new file mode 100644
index 00000000..bdab47c6
--- /dev/null
+++ b/apps/api/sharedLibs/go-html-to-md/.gitignore
@@ -0,0 +1,2 @@
+html-to-markdown.so
+html-to-markdown.h
\ No newline at end of file
diff --git a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
index b1708abc..dec77131 100644
--- a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
@@ -844,7 +844,7 @@ describe("E2E Tests for API Routes", () => {
expect(crawlInitResponse.statusCode).toBe(200);
expect(crawlInitResponse.body).toHaveProperty("jobId");
- let crawlStatus: string;
+ let crawlStatus: string = "scraping";
let crawlData = [];
while (crawlStatus !== "completed") {
const statusResponse = await request(TEST_URL)
diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts
index acb22780..83f676b8 100644
--- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts
@@ -20,7 +20,6 @@ describe("E2E Tests for API Routes with No Authentication", () => {
process.env.SCRAPING_BEE_API_KEY = "";
process.env.OPENAI_API_KEY = "";
process.env.BULL_AUTH_KEY = "";
- process.env.LOGTAIL_KEY = "";
process.env.PLAYWRIGHT_MICROSERVICE_URL = "";
process.env.LLAMAPARSE_API_KEY = "";
process.env.TEST_API_KEY = "";
diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
index a4163472..8c3d1731 100644
--- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
@@ -1,7 +1,7 @@
import request from "supertest";
import { configDotenv } from "dotenv";
import {
- ScrapeRequest,
+ ScrapeRequestInput,
ScrapeResponseRequestTest,
} from "../../controllers/v1/types";
@@ -44,7 +44,7 @@ describe("E2E Tests for v1 API Routes", () => {
});
it.concurrent("should throw error for blocklisted URL", async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://facebook.com/fake-test",
};
@@ -73,7 +73,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent(
"should return a successful response with a valid API key",
async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
};
@@ -125,7 +125,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent(
"should return a successful response with a valid API key",
async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://arxiv.org/abs/2410.04840",
};
@@ -167,7 +167,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent(
"should return a successful response with a valid API key and includeHtml set to true",
async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
formats: ["markdown", "html"],
};
@@ -194,7 +194,7 @@ describe("E2E Tests for v1 API Routes", () => {
30000
);
it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://arxiv.org/pdf/astro-ph/9301001.pdf"
// formats: ["markdown", "html"],
};
@@ -217,7 +217,7 @@ describe("E2E Tests for v1 API Routes", () => {
}, 60000);
it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://arxiv.org/pdf/astro-ph/9301001"
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -240,7 +240,7 @@ describe("E2E Tests for v1 API Routes", () => {
}, 60000);
it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://www.scrapethissite.com/",
onlyMainContent: false // default is true
};
@@ -261,7 +261,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(responseWithoutRemoveTags.body.data.markdown).toContain("[FAQ](/faq/)"); // .nav
expect(responseWithoutRemoveTags.body.data.markdown).toContain("Hartley Brody 2023"); // #footer
- const scrapeRequestWithRemoveTags: ScrapeRequest = {
+ const scrapeRequestWithRemoveTags: ScrapeRequestInput = {
url: "https://www.scrapethissite.com/",
excludeTags: ['.nav', '#footer', 'strong'],
onlyMainContent: false // default is true
@@ -407,7 +407,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent(
"should return a successful response with a valid API key and includeHtml set to true",
async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
formats: ["html","rawHtml"],
};
@@ -438,7 +438,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent(
"should return a successful response with waitFor",
async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://ycombinator.com/companies",
formats: ["markdown"],
waitFor: 8000
@@ -471,7 +471,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent(
"should return a successful response with a valid links on page",
async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
formats: ["links"],
};
@@ -672,7 +672,7 @@ describe("POST /v1/crawl", () => {
});
it.concurrent("should throw error for blocklisted URL", async () => {
- const scrapeRequest: ScrapeRequest = {
+ const scrapeRequest: ScrapeRequestInput = {
url: "https://facebook.com/fake-test",
};
diff --git a/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
new file mode 100644
index 00000000..5c7feb1f
--- /dev/null
+++ b/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
@@ -0,0 +1,603 @@
+import request from "supertest";
+import { configDotenv } from "dotenv";
+import {
+ ScrapeRequest,
+ ScrapeResponseRequestTest,
+} from "../../controllers/v1/types";
+
+configDotenv();
+const FIRECRAWL_API_URL = "http://127.0.0.1:3002";
+const E2E_TEST_SERVER_URL = "http://firecrawl-e2e-test.vercel.app"; // @rafaelsideguide/firecrawl-e2e-test
+
+describe("E2E Tests for v1 API Routes", () => {
+
+ it.concurrent('should return a successful response for a scrape with 403 page', async () => {
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post('/v1/scrape')
+ .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+ .set('Content-Type', 'application/json')
+ .send({ url: 'https://httpstat.us/403' });
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty('data');
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data).toHaveProperty('markdown');
+ expect(response.body.data).toHaveProperty('metadata');
+ expect(response.body.data.metadata.statusCode).toBe(403);
+ }, 30000);
+
+ it.concurrent("should handle 'formats:markdown (default)' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ expect(response.body.data).toHaveProperty("markdown");
+
+ expect(response.body.data.markdown).toContain("This page is used for end-to-end (e2e) testing with Firecrawl.");
+ expect(response.body.data.markdown).toContain("Content with id #content-1");
+ // expect(response.body.data.markdown).toContain("Loading...");
+ expect(response.body.data.markdown).toContain("Click me!");
+ expect(response.body.data.markdown).toContain("Power your AI apps with clean data crawled from any website. It's also open-source."); // firecrawl.dev inside an iframe
+ expect(response.body.data.markdown).toContain("This content loads only when you see it. Don't blink! 👼"); // the browser always scroll to the bottom
+ expect(response.body.data.markdown).not.toContain("Header"); // Only main content is returned by default
+ expect(response.body.data.markdown).not.toContain("footer"); // Only main content is returned by default
+ expect(response.body.data.markdown).not.toContain("This content is only visible on mobile");
+ },
+ 30000);
+
+ it.concurrent("should handle 'formats:html' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ formats: ["html"]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+
+ expect(response.body.data).not.toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("html");
+
+ expect(response.body.data.html).not.toContain("");
+ expect(response.body.data.html).toContain("This page is used for end-to-end (e2e) testing with Firecrawl.
");
+ },
+ 30000);
+
+ it.concurrent("should handle 'rawHtml' in 'formats' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ formats: ["rawHtml"]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ expect(response.body.data).not.toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("rawHtml");
+
+ expect(response.body.data.rawHtml).toContain(">This page is used for end-to-end (e2e) testing with Firecrawl.
");
+ expect(response.body.data.rawHtml).toContain(">Header");
+ },
+ 30000);
+
+ // - TODO: tests for links
+ // - TODO: tests for screenshot
+ // - TODO: tests for screenshot@fullPage
+
+ it.concurrent("should handle 'headers' parameter correctly", async () => {
+ // @ts-ignore
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ headers: { "e2e-header-test": "firecrawl" }
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ expect(response.body.data.markdown).toContain("e2e-header-test: firecrawl");
+ }, 30000);
+
+ it.concurrent("should handle 'includeTags' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ includeTags: ['#content-1']
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ expect(response.body.data.markdown).not.toContain("This page is used for end-to-end (e2e) testing with Firecrawl.
");
+ expect(response.body.data.markdown).toContain("Content with id #content-1");
+ },
+ 30000);
+
+ it.concurrent("should handle 'excludeTags' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ excludeTags: ['#content-1']
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ expect(response.body.data.markdown).toContain("This page is used for end-to-end (e2e) testing with Firecrawl.");
+ expect(response.body.data.markdown).not.toContain("Content with id #content-1");
+ },
+ 30000);
+
+ it.concurrent("should handle 'onlyMainContent' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ formats: ["html", "markdown"],
+ onlyMainContent: false
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ expect(response.body.data.markdown).toContain("This page is used for end-to-end (e2e) testing with Firecrawl.");
+ expect(response.body.data.html).toContain("");
+ },
+ 30000);
+
+ it.concurrent("should handle 'timeout' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ timeout: 500
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(408);
+
+ if (!("error" in response.body)) {
+ throw new Error("Expected response body to have 'error' property");
+ }
+ expect(response.body.error).toBe("Request timed out");
+ expect(response.body.success).toBe(false);
+ }, 30000);
+
+
+ it.concurrent("should handle 'mobile' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ mobile: true
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data.markdown).toContain("This content is only visible on mobile");
+ },
+ 30000);
+
+ it.concurrent("should handle 'parsePDF' parameter correctly",
+ async () => {
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001.pdf'});
+ await new Promise((r) => setTimeout(r, 6000));
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty('data');
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ expect(response.body.data.markdown).toContain('arXiv:astro-ph/9301001v1 7 Jan 1993');
+ expect(response.body.data.markdown).not.toContain('h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm');
+
+ const responseNoParsePDF: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001.pdf', parsePDF: false });
+ await new Promise((r) => setTimeout(r, 6000));
+
+ expect(responseNoParsePDF.statusCode).toBe(200);
+ expect(responseNoParsePDF.body).toHaveProperty('data');
+ if (!("data" in responseNoParsePDF.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(responseNoParsePDF.body.data.markdown).toContain('h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm');
+ },
+ 30000);
+
+ // it.concurrent("should handle 'location' parameter correctly",
+ // async () => {
+ // const scrapeRequest: ScrapeRequest = {
+ // url: "https://roastmywebsite.ai",
+ // location: {
+ // country: "US",
+ // languages: ["en"]
+ // }
+ // };
+
+ // const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ // .post("/v1/scrape")
+ // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ // .set("Content-Type", "application/json")
+ // .send(scrapeRequest);
+
+ // expect(response.statusCode).toBe(200);
+ // // Add assertions to verify location is handled correctly
+ // },
+ // 30000);
+
+ it.concurrent("should handle 'skipTlsVerification' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: "https://expired.badssl.com/",
+ timeout: 120000
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+ console.log("Error1a")
+ // console.log(response.body)
+ expect(response.statusCode).toBe(200);
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data.metadata.pageStatusCode).toBe(500);
+ console.log("Error?")
+
+ const scrapeRequestWithSkipTlsVerification = {
+ url: "https://expired.badssl.com/",
+ skipTlsVerification: true,
+ timeout: 120000
+
+ } as ScrapeRequest;
+
+ const responseWithSkipTlsVerification: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequestWithSkipTlsVerification);
+
+ console.log("Error1b")
+ // console.log(responseWithSkipTlsVerification.body)
+ expect(responseWithSkipTlsVerification.statusCode).toBe(200);
+ if (!("data" in responseWithSkipTlsVerification.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ // console.log(responseWithSkipTlsVerification.body.data)
+ expect(responseWithSkipTlsVerification.body.data.markdown).toContain("badssl.com");
+ },
+ 60000);
+
+ it.concurrent("should handle 'removeBase64Images' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ removeBase64Images: true
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ // console.log(response.body.data.markdown)
+ // - TODO: not working for every image
+ // expect(response.body.data.markdown).toContain("Image-Removed");
+ },
+ 30000);
+
+ it.concurrent("should handle 'action wait' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ actions: [{
+ type: "wait",
+ milliseconds: 10000
+ }]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data.markdown).not.toContain("Loading...");
+ expect(response.body.data.markdown).toContain("Content loaded after 5 seconds!");
+ },
+ 30000);
+
+ // screenshot
+ it.concurrent("should handle 'action screenshot' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ actions: [{
+ type: "screenshot"
+ }]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ if (!response.body.data.actions?.screenshots) {
+ throw new Error("Expected response body to have screenshots array");
+ }
+ expect(response.body.data.actions.screenshots[0].length).toBeGreaterThan(0);
+ expect(response.body.data.actions.screenshots[0]).toContain("https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-");
+
+ // TODO compare screenshot with expected screenshot
+ },
+ 30000);
+
+ it.concurrent("should handle 'action screenshot@fullPage' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ actions: [{
+ type: "screenshot",
+ fullPage: true
+ },
+ {
+ type:"scrape"
+ }]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ // console.log(response.body.data.actions?.screenshots[0])
+ if (!response.body.data.actions?.screenshots) {
+ throw new Error("Expected response body to have screenshots array");
+ }
+ expect(response.body.data.actions.screenshots[0].length).toBeGreaterThan(0);
+ expect(response.body.data.actions.screenshots[0]).toContain("https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-");
+
+ if (!response.body.data.actions?.scrapes) {
+ throw new Error("Expected response body to have scrapes array");
+ }
+ expect(response.body.data.actions.scrapes[0].url).toBe("https://firecrawl-e2e-test.vercel.app/");
+ expect(response.body.data.actions.scrapes[0].html).toContain("This page is used for end-to-end (e2e) testing with Firecrawl.");
+ // TODO compare screenshot with expected full page screenshot
+ },
+ 30000);
+
+ it.concurrent("should handle 'action click' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ actions: [{
+ type: "click",
+ selector: "#click-me"
+ }]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data.markdown).not.toContain("Click me!");
+ expect(response.body.data.markdown).toContain("Text changed after click!");
+ },
+ 30000);
+
+ it.concurrent("should handle 'action write' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ formats: ["html"],
+ actions: [{
+ type: "click",
+ selector: "#input-1"
+ },
+ {
+ type: "write",
+ text: "Hello, world!"
+ }
+ ]} as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+
+ // TODO: fix this test (need to fix fire-engine first)
+ // uncomment the following line:
+ // expect(response.body.data.html).toContain("");
+ },
+ 30000);
+
+ // TODO: fix this test (need to fix fire-engine first)
+ it.concurrent("should handle 'action pressKey' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ formats: ["markdown"],
+ actions: [
+ {
+ type: "press",
+ key: "ArrowDown"
+ }
+ ]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ // // TODO: fix this test (need to fix fire-engine first)
+ // // right now response.body is: { success: false, error: '(Internal server error) - null' }
+ // expect(response.statusCode).toBe(200);
+ // if (!("data" in response.body)) {
+ // throw new Error("Expected response body to have 'data' property");
+ // }
+ // expect(response.body.data.markdown).toContain("Last Key Clicked: ArrowDown")
+ },
+ 30000);
+
+ // TODO: fix this test (need to fix fire-engine first)
+ it.concurrent("should handle 'action scroll' parameter correctly",
+ async () => {
+ const scrapeRequest = {
+ url: E2E_TEST_SERVER_URL,
+ formats: ["markdown"],
+ actions: [
+ {
+ type: "click",
+ selector: "#scroll-bottom-loader"
+ },
+ {
+ type: "scroll",
+ direction: "down",
+ amount: 2000
+ }
+ ]
+ } as ScrapeRequest;
+
+ const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ // TODO: uncomment this tests
+ // expect(response.statusCode).toBe(200);
+ // if (!("data" in response.body)) {
+ // throw new Error("Expected response body to have 'data' property");
+ // }
+ //
+ // expect(response.body.data.markdown).toContain("You have reached the bottom!")
+ },
+ 30000);
+
+ // TODO: test scrape action
+
+});
\ No newline at end of file
diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts
index 26caf63e..1646843f 100644
--- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts
@@ -776,7 +776,8 @@ describe("E2E Tests for v0 API Routes", () => {
await new Promise((r) => setTimeout(r, 10000));
const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .maxResponseSize(4000000000);
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status");
diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts
index c9f693c5..de74fed0 100644
--- a/apps/api/src/controllers/auth.ts
+++ b/apps/api/src/controllers/auth.ts
@@ -9,9 +9,8 @@ import {
import { supabase_service } from "../services/supabase";
import { withAuth } from "../lib/withAuth";
import { RateLimiterRedis } from "rate-limiter-flexible";
-import { setTraceAttributes } from "@hyperdx/node-opentelemetry";
import { sendNotification } from "../services/notification/email_notification";
-import { Logger } from "../lib/logger";
+import { logger } from "../lib/logger";
import { redlock } from "../services/redlock";
import { deleteKey, getValue } from "../services/redis";
import { setValue } from "../services/redis";
@@ -40,8 +39,8 @@ function normalizedApiIsUuid(potentialUuid: string): boolean {
export async function setCachedACUC(
api_key: string,
acuc:
- | AuthCreditUsageChunk
- | ((acuc: AuthCreditUsageChunk) => AuthCreditUsageChunk)
+ | AuthCreditUsageChunk | null
+ | ((acuc: AuthCreditUsageChunk) => AuthCreditUsageChunk | null)
) {
const cacheKeyACUC = `acuc_${api_key}`;
const redLockKey = `lock_${cacheKeyACUC}`;
@@ -49,7 +48,7 @@ export async function setCachedACUC(
try {
await redlock.using([redLockKey], 10000, {}, async (signal) => {
if (typeof acuc === "function") {
- acuc = acuc(JSON.parse(await getValue(cacheKeyACUC)));
+ acuc = acuc(JSON.parse(await getValue(cacheKeyACUC) ?? "null"));
if (acuc === null) {
if (signal.aborted) {
@@ -69,7 +68,7 @@ export async function setCachedACUC(
await setValue(cacheKeyACUC, JSON.stringify(acuc), 600, true);
});
} catch (error) {
- Logger.error(`Error updating cached ACUC ${cacheKeyACUC}: ${error}`);
+ logger.error(`Error updating cached ACUC ${cacheKeyACUC}: ${error}`);
}
}
@@ -103,7 +102,7 @@ export async function getACUC(
break;
}
- Logger.warn(
+ logger.warn(
`Failed to retrieve authentication and credit usage data after ${retries}, trying again...`
);
retries++;
@@ -146,33 +145,14 @@ export async function authenticateUser(
res,
mode?: RateLimiterMode
): Promise {
- return withAuth(supaAuthenticateUser)(req, res, mode);
-}
-
-function setTrace(team_id: string, api_key: string) {
- try {
- setTraceAttributes({
- team_id,
- api_key,
- });
- } catch (error) {
- Sentry.captureException(error);
- Logger.error(`Error setting trace attributes: ${error.message}`);
- }
+ return withAuth(supaAuthenticateUser, { success: true, chunk: null, team_id: "bypass" })(req, res, mode);
}
export async function supaAuthenticateUser(
req,
res,
mode?: RateLimiterMode
-): Promise<{
- success: boolean;
- team_id?: string;
- error?: string;
- status?: number;
- plan?: PlanType;
- chunk?: AuthCreditUsageChunk;
-}> {
+): Promise {
const authHeader =
req.headers.authorization ??
(req.headers["sec-websocket-protocol"]
@@ -200,7 +180,7 @@ export async function supaAuthenticateUser(
let teamId: string | null = null;
let priceId: string | null = null;
- let chunk: AuthCreditUsageChunk;
+ let chunk: AuthCreditUsageChunk | null = null;
if (token == "this_is_just_a_preview_token") {
if (mode == RateLimiterMode.CrawlStatus) {
@@ -233,8 +213,6 @@ export async function supaAuthenticateUser(
priceId = chunk.price_id;
const plan = getPlanByPriceId(priceId);
- // HyperDX Logging
- setTrace(teamId, normalizedApi);
subscriptionData = {
team_id: teamId,
plan,
@@ -291,7 +269,7 @@ export async function supaAuthenticateUser(
try {
await rateLimiter.consume(team_endpoint_token);
} catch (rateLimiterRes) {
- Logger.error(`Rate limit exceeded: ${rateLimiterRes}`);
+ logger.error(`Rate limit exceeded: ${rateLimiterRes}`);
const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1;
const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext);
@@ -318,7 +296,7 @@ export async function supaAuthenticateUser(
mode === RateLimiterMode.CrawlStatus ||
mode === RateLimiterMode.Search)
) {
- return { success: true, team_id: "preview" };
+ return { success: true, team_id: "preview", chunk: null };
// check the origin of the request and make sure its from firecrawl.dev
// const origin = req.headers.origin;
// if (origin && origin.includes("firecrawl.dev")){
@@ -333,12 +311,12 @@ export async function supaAuthenticateUser(
return {
success: true,
- team_id: subscriptionData.team_id,
- plan: (subscriptionData.plan ?? "") as PlanType,
+ team_id: teamId ?? undefined,
+ plan: (subscriptionData?.plan ?? "") as PlanType,
chunk,
};
}
-function getPlanByPriceId(price_id: string): PlanType {
+function getPlanByPriceId(price_id: string | null): PlanType {
switch (price_id) {
case process.env.STRIPE_PRICE_ID_STARTER:
return "starter";
diff --git a/apps/api/src/controllers/v0/admin/acuc-cache-clear.ts b/apps/api/src/controllers/v0/admin/acuc-cache-clear.ts
index 876ca98a..75acd60a 100644
--- a/apps/api/src/controllers/v0/admin/acuc-cache-clear.ts
+++ b/apps/api/src/controllers/v0/admin/acuc-cache-clear.ts
@@ -1,7 +1,7 @@
import { Request, Response } from "express";
import { supabase_service } from "../../../services/supabase";
import { clearACUC } from "../../auth";
-import { Logger } from "../../../lib/logger";
+import { logger } from "../../../lib/logger";
export async function acucCacheClearController(req: Request, res: Response) {
try {
@@ -12,11 +12,11 @@ export async function acucCacheClearController(req: Request, res: Response) {
.select("*")
.eq("team_id", team_id);
- await Promise.all(keys.data.map((x) => clearACUC(x.key)));
+ await Promise.all((keys.data ?? []).map((x) => clearACUC(x.key)));
res.json({ ok: true });
} catch (error) {
- Logger.error(`Error clearing ACUC cache via API route: ${error}`);
+ logger.error(`Error clearing ACUC cache via API route: ${error}`);
res.status(500).json({ error: "Internal server error" });
}
}
diff --git a/apps/api/src/controllers/v0/admin/queue.ts b/apps/api/src/controllers/v0/admin/queue.ts
index 71748002..6ef8a992 100644
--- a/apps/api/src/controllers/v0/admin/queue.ts
+++ b/apps/api/src/controllers/v0/admin/queue.ts
@@ -1,7 +1,7 @@
import { Request, Response } from "express";
import { Job } from "bullmq";
-import { Logger } from "../../../lib/logger";
+import { logger } from "../../../lib/logger";
import { getScrapeQueue } from "../../../services/queue-service";
import { checkAlerts } from "../../../services/alerts";
import { sendSlackWebhook } from "../../../services/alerts/slack";
@@ -10,7 +10,7 @@ export async function cleanBefore24hCompleteJobsController(
req: Request,
res: Response
) {
- Logger.info("🐂 Cleaning jobs older than 24h");
+ logger.info("🐂 Cleaning jobs older than 24h");
try {
const scrapeQueue = getScrapeQueue();
const batchSize = 10;
@@ -31,7 +31,7 @@ export async function cleanBefore24hCompleteJobsController(
).flat();
const before24hJobs =
completedJobs.filter(
- (job) => job.finishedOn < Date.now() - 24 * 60 * 60 * 1000
+ (job) => job.finishedOn !== undefined && job.finishedOn < Date.now() - 24 * 60 * 60 * 1000
) || [];
let count = 0;
@@ -45,12 +45,12 @@ export async function cleanBefore24hCompleteJobsController(
await job.remove();
count++;
} catch (jobError) {
- Logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}`);
+ logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}`);
}
}
return res.status(200).send(`Removed ${count} completed jobs.`);
} catch (error) {
- Logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`);
+ logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`);
return res.status(500).send("Failed to clean jobs");
}
}
@@ -60,7 +60,7 @@ export async function checkQueuesController(req: Request, res: Response) {
await checkAlerts();
return res.status(200).send("Alerts initialized");
} catch (error) {
- Logger.debug(`Failed to initialize alerts: ${error}`);
+ logger.debug(`Failed to initialize alerts: ${error}`);
return res.status(500).send("Failed to initialize alerts");
}
}
@@ -81,7 +81,7 @@ export async function queuesController(req: Request, res: Response) {
noActiveJobs,
});
} catch (error) {
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
}
@@ -165,7 +165,7 @@ export async function autoscalerController(req: Request, res: Response) {
}
if (targetMachineCount !== activeMachines) {
- Logger.info(
+ logger.info(
`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`
);
@@ -193,7 +193,7 @@ export async function autoscalerController(req: Request, res: Response) {
count: activeMachines,
});
} catch (error) {
- Logger.error(error);
+ logger.error(error);
return res.status(500).send("Failed to initialize autoscaler");
}
}
diff --git a/apps/api/src/controllers/v0/admin/redis-health.ts b/apps/api/src/controllers/v0/admin/redis-health.ts
index dc58d745..dc587606 100644
--- a/apps/api/src/controllers/v0/admin/redis-health.ts
+++ b/apps/api/src/controllers/v0/admin/redis-health.ts
@@ -1,6 +1,6 @@
import { Request, Response } from "express";
import Redis from "ioredis";
-import { Logger } from "../../../lib/logger";
+import { logger } from "../../../lib/logger";
import { redisRateLimitClient } from "../../../services/rate-limiter";
export async function redisHealthController(req: Request, res: Response) {
@@ -10,14 +10,14 @@ export async function redisHealthController(req: Request, res: Response) {
return await operation();
} catch (error) {
if (attempt === retries) throw error;
- Logger.warn(`Attempt ${attempt} failed: ${error.message}. Retrying...`);
+ logger.warn(`Attempt ${attempt} failed: ${error.message}. Retrying...`);
await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds before retrying
}
}
};
try {
- const queueRedis = new Redis(process.env.REDIS_URL);
+ const queueRedis = new Redis(process.env.REDIS_URL!);
const testKey = "test";
const testValue = "test";
@@ -29,7 +29,7 @@ export async function redisHealthController(req: Request, res: Response) {
queueRedisHealth = await retryOperation(() => queueRedis.get(testKey));
await retryOperation(() => queueRedis.del(testKey));
} catch (error) {
- Logger.error(`queueRedis health check failed: ${error}`);
+ logger.error(`queueRedis health check failed: ${error}`);
queueRedisHealth = null;
}
@@ -42,7 +42,7 @@ export async function redisHealthController(req: Request, res: Response) {
);
await retryOperation(() => redisRateLimitClient.del(testKey));
} catch (error) {
- Logger.error(`redisRateLimitClient health check failed: ${error}`);
+ logger.error(`redisRateLimitClient health check failed: ${error}`);
redisRateLimitHealth = null;
}
@@ -56,10 +56,10 @@ export async function redisHealthController(req: Request, res: Response) {
healthStatus.queueRedis === "healthy" &&
healthStatus.redisRateLimitClient === "healthy"
) {
- Logger.info("Both Redis instances are healthy");
+ logger.info("Both Redis instances are healthy");
return res.status(200).json({ status: "healthy", details: healthStatus });
} else {
- Logger.info(
+ logger.info(
`Redis instances health check: ${JSON.stringify(healthStatus)}`
);
// await sendSlackWebhook(
@@ -73,7 +73,7 @@ export async function redisHealthController(req: Request, res: Response) {
.json({ status: "unhealthy", details: healthStatus });
}
} catch (error) {
- Logger.error(`Redis health check failed: ${error}`);
+ logger.error(`Redis health check failed: ${error}`);
// await sendSlackWebhook(
// `[REDIS DOWN] Redis instances health check: ${error.message}`,
// true
diff --git a/apps/api/src/controllers/v0/crawl-cancel.ts b/apps/api/src/controllers/v0/crawl-cancel.ts
index efcd454a..e81064f2 100644
--- a/apps/api/src/controllers/v0/crawl-cancel.ts
+++ b/apps/api/src/controllers/v0/crawl-cancel.ts
@@ -2,7 +2,7 @@ import { Request, Response } from "express";
import { authenticateUser } from "../auth";
import { RateLimiterMode } from "../../../src/types";
import { supabase_service } from "../../../src/services/supabase";
-import { Logger } from "../../../src/lib/logger";
+import { logger } from "../../../src/lib/logger";
import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis";
import * as Sentry from "@sentry/node";
import { configDotenv } from "dotenv";
@@ -12,15 +12,17 @@ export async function crawlCancelController(req: Request, res: Response) {
try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
- const { success, team_id, error, status } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res,
RateLimiterMode.CrawlStatus
);
- if (!success) {
- return res.status(status).json({ error });
+ if (!auth.success) {
+ return res.status(auth.status).json({ error: auth.error });
}
+ const { team_id } = auth;
+
const sc = await getCrawl(req.params.jobId);
if (!sc) {
return res.status(404).json({ error: "Job not found" });
@@ -46,7 +48,7 @@ export async function crawlCancelController(req: Request, res: Response) {
sc.cancelled = true;
await saveCrawl(req.params.jobId, sc);
} catch (error) {
- Logger.error(error);
+ logger.error(error);
}
res.json({
@@ -54,7 +56,7 @@ export async function crawlCancelController(req: Request, res: Response) {
});
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
}
diff --git a/apps/api/src/controllers/v0/crawl-status.ts b/apps/api/src/controllers/v0/crawl-status.ts
index 66522bcf..7b6e610a 100644
--- a/apps/api/src/controllers/v0/crawl-status.ts
+++ b/apps/api/src/controllers/v0/crawl-status.ts
@@ -2,15 +2,17 @@ import { Request, Response } from "express";
import { authenticateUser } from "../auth";
import { RateLimiterMode } from "../../../src/types";
import { getScrapeQueue } from "../../../src/services/queue-service";
-import { Logger } from "../../../src/lib/logger";
+import { logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { supabaseGetJobsByCrawlId } from "../../../src/lib/supabase-jobs";
import * as Sentry from "@sentry/node";
import { configDotenv } from "dotenv";
+import { Job } from "bullmq";
+import { toLegacyDocument } from "../v1/types";
configDotenv();
export async function getJobs(crawlId: string, ids: string[]) {
- const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
+ const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x) as Job[];
if (process.env.USE_DB_AUTHENTICATION === "true") {
const supabaseData = await supabaseGetJobsByCrawlId(crawlId);
@@ -32,15 +34,17 @@ export async function getJobs(crawlId: string, ids: string[]) {
export async function crawlStatusController(req: Request, res: Response) {
try {
- const { success, team_id, error, status } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res,
RateLimiterMode.CrawlStatus
);
- if (!success) {
- return res.status(status).json({ error });
+ if (!auth.success) {
+ return res.status(auth.status).json({ error: auth.error });
}
+ const { team_id } = auth;
+
const sc = await getCrawl(req.params.jobId);
if (!sc) {
return res.status(404).json({ error: "Job not found" });
@@ -90,12 +94,12 @@ export async function crawlStatusController(req: Request, res: Response) {
status: jobStatus,
current: jobStatuses.filter(x => x === "completed" || x === "failed").length,
total: jobs.length,
- data: jobStatus === "completed" ? data : null,
- partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
+ data: jobStatus === "completed" ? data.map(x => toLegacyDocument(x, sc.internalOptions)) : null,
+ partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null).map(x => toLegacyDocument(x, sc.internalOptions)),
});
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
}
diff --git a/apps/api/src/controllers/v0/crawl.ts b/apps/api/src/controllers/v0/crawl.ts
index fb412147..d502d142 100644
--- a/apps/api/src/controllers/v0/crawl.ts
+++ b/apps/api/src/controllers/v0/crawl.ts
@@ -9,24 +9,28 @@ import { validateIdempotencyKey } from "../../../src/services/idempotency/valida
import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../../src/lib/logger";
+import { logger } from "../../../src/lib/logger";
import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../../src/services/queue-service";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import * as Sentry from "@sentry/node";
import { getJobPriority } from "../../lib/job-priority";
+import { fromLegacyScrapeOptions, url as urlSchema } from "../v1/types";
+import { ZodError } from "zod";
export async function crawlController(req: Request, res: Response) {
try {
- const { success, team_id, error, status, plan, chunk } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res,
RateLimiterMode.Crawl
);
- if (!success) {
- return res.status(status).json({ error });
+ if (!auth.success) {
+ return res.status(auth.status).json({ error: auth.error });
}
+ const { team_id, plan, chunk } = auth;
+
if (req.headers["x-idempotency-key"]) {
const isIdempotencyValid = await validateIdempotencyKey(req);
if (!isIdempotencyValid) {
@@ -35,7 +39,7 @@ export async function crawlController(req: Request, res: Response) {
try {
createIdempotencyKey(req);
} catch (error) {
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
}
@@ -77,7 +81,7 @@ export async function crawlController(req: Request, res: Response) {
// TODO: need to do this to v1
crawlerOptions.limit = Math.min(remainingCredits, crawlerOptions.limit);
- let url = req.body.url;
+ let url = urlSchema.parse(req.body.url);
if (!url) {
return res.status(400).json({ error: "Url is required" });
}
@@ -123,7 +127,7 @@ export async function crawlController(req: Request, res: Response) {
// documents: docs,
// });
// } catch (error) {
- // Logger.error(error);
+ // logger.error(error);
// return res.status(500).json({ error: error.message });
// }
// }
@@ -132,10 +136,13 @@ export async function crawlController(req: Request, res: Response) {
await logCrawl(id, team_id);
+ const { scrapeOptions, internalOptions } = fromLegacyScrapeOptions(pageOptions, undefined, undefined);
+
const sc: StoredCrawl = {
originUrl: url,
crawlerOptions,
- pageOptions,
+ scrapeOptions,
+ internalOptions,
team_id,
plan,
createdAt: Date.now(),
@@ -170,10 +177,11 @@ export async function crawlController(req: Request, res: Response) {
data: {
url,
mode: "single_urls",
- crawlerOptions: crawlerOptions,
+ crawlerOptions,
+ scrapeOptions,
+ internalOptions,
team_id,
plan,
- pageOptions: pageOptions,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
sitemapped: true,
@@ -208,10 +216,11 @@ export async function crawlController(req: Request, res: Response) {
{
url,
mode: "single_urls",
- crawlerOptions: crawlerOptions,
+ crawlerOptions,
+ scrapeOptions,
+ internalOptions,
team_id,
- plan,
- pageOptions: pageOptions,
+ plan: plan!,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
},
@@ -226,7 +235,9 @@ export async function crawlController(req: Request, res: Response) {
res.json({ jobId: id });
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
- return res.status(500).json({ error: error.message });
+ logger.error(error);
+ return res.status(500).json({ error: error instanceof ZodError
+ ? "Invalid URL"
+ : error.message });
}
}
diff --git a/apps/api/src/controllers/v0/crawlPreview.ts b/apps/api/src/controllers/v0/crawlPreview.ts
index e9f6e806..8b82bef8 100644
--- a/apps/api/src/controllers/v0/crawlPreview.ts
+++ b/apps/api/src/controllers/v0/crawlPreview.ts
@@ -3,15 +3,16 @@ import { authenticateUser } from "../auth";
import { RateLimiterMode } from "../../../src/types";
import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../../src/lib/logger";
+import { logger } from "../../../src/lib/logger";
import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { addScrapeJob } from "../../../src/services/queue-jobs";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import * as Sentry from "@sentry/node";
+import { fromLegacyScrapeOptions } from "../v1/types";
export async function crawlPreviewController(req: Request, res: Response) {
try {
- const { success, error, status, team_id:a, plan } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res,
RateLimiterMode.Preview
@@ -19,10 +20,12 @@ export async function crawlPreviewController(req: Request, res: Response) {
const team_id = "preview";
- if (!success) {
- return res.status(status).json({ error });
+ if (!auth.success) {
+ return res.status(auth.status).json({ error: auth.error });
}
+ const { plan } = auth;
+
let url = req.body.url;
if (!url) {
return res.status(400).json({ error: "Url is required" });
@@ -71,7 +74,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
// documents: docs,
// });
// } catch (error) {
- // Logger.error(error);
+ // logger.error(error);
// return res.status(500).json({ error: error.message });
// }
// }
@@ -84,10 +87,13 @@ export async function crawlPreviewController(req: Request, res: Response) {
robots = await this.getRobotsTxt();
} catch (_) {}
+ const { scrapeOptions, internalOptions } = fromLegacyScrapeOptions(pageOptions, undefined, undefined);
+
const sc: StoredCrawl = {
originUrl: url,
crawlerOptions,
- pageOptions,
+ scrapeOptions,
+ internalOptions,
team_id,
plan,
robots,
@@ -107,10 +113,11 @@ export async function crawlPreviewController(req: Request, res: Response) {
await addScrapeJob({
url,
mode: "single_urls",
- crawlerOptions: crawlerOptions,
team_id,
- plan,
- pageOptions: pageOptions,
+ plan: plan!,
+ crawlerOptions,
+ scrapeOptions,
+ internalOptions,
origin: "website-preview",
crawl_id: id,
sitemapped: true,
@@ -123,10 +130,11 @@ export async function crawlPreviewController(req: Request, res: Response) {
await addScrapeJob({
url,
mode: "single_urls",
- crawlerOptions: crawlerOptions,
team_id,
- plan,
- pageOptions: pageOptions,
+ plan: plan!,
+ crawlerOptions,
+ scrapeOptions,
+ internalOptions,
origin: "website-preview",
crawl_id: id,
}, {}, jobId);
@@ -136,7 +144,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
res.json({ jobId: id });
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
}
diff --git a/apps/api/src/controllers/v0/keyAuth.ts b/apps/api/src/controllers/v0/keyAuth.ts
index b70d672a..63915302 100644
--- a/apps/api/src/controllers/v0/keyAuth.ts
+++ b/apps/api/src/controllers/v0/keyAuth.ts
@@ -8,13 +8,14 @@ import { authenticateUser } from "../auth";
export const keyAuthController = async (req: Request, res: Response) => {
try {
// make sure to authenticate user first, Bearer
- const { success, team_id, error, status } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res
);
- if (!success) {
- return res.status(status).json({ error });
+ if (!auth.success) {
+ return res.status(auth.status).json({ error: auth.error });
}
+
// if success, return success: true
return res.status(200).json({ success: true });
} catch (error) {
diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts
index 5e6b7c6f..05fb3c41 100644
--- a/apps/api/src/controllers/v0/scrape.ts
+++ b/apps/api/src/controllers/v0/scrape.ts
@@ -7,7 +7,7 @@ import {
import { authenticateUser } from "../auth";
import { PlanType, RateLimiterMode } from "../../types";
import { logJob } from "../../services/logging/log_job";
-import { Document } from "../../lib/entities";
+import { Document, fromLegacyCombo, toLegacyDocument, url as urlSchema } from "../v1/types";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
import { numTokensFromString } from "../../lib/LLM-extraction/helpers";
import {
@@ -19,9 +19,11 @@ import {
import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
import { getScrapeQueue } from "../../services/queue-service";
import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../lib/logger";
+import { logger } from "../../lib/logger";
import * as Sentry from "@sentry/node";
import { getJobPriority } from "../../lib/job-priority";
+import { fromLegacyScrapeOptions } from "../v1/types";
+import { ZodError } from "zod";
export async function scrapeHelper(
jobId: string,
@@ -35,10 +37,10 @@ export async function scrapeHelper(
): Promise<{
success: boolean;
error?: string;
- data?: Document;
+ data?: Document | { url: string };
returnCode: number;
}> {
- const url = req.body.url;
+ const url = urlSchema.parse(req.body.url);
if (typeof url !== "string") {
return { success: false, error: "Url is required", returnCode: 400 };
}
@@ -54,15 +56,16 @@ export async function scrapeHelper(
const jobPriority = await getJobPriority({ plan, team_id, basePriority: 10 });
+ const { scrapeOptions, internalOptions } = fromLegacyCombo(pageOptions, extractorOptions, timeout, crawlerOptions);
+
await addScrapeJob(
{
url,
mode: "single_urls",
- crawlerOptions,
team_id,
- pageOptions,
- plan,
- extractorOptions,
+ scrapeOptions,
+ internalOptions,
+ plan: plan!,
origin: req.body.origin ?? defaultOrigin,
is_scrape: true,
},
@@ -81,7 +84,7 @@ export async function scrapeHelper(
},
async (span) => {
try {
- doc = (await waitForJob(jobId, timeout))[0];
+ doc = (await waitForJob(jobId, timeout));
} catch (e) {
if (e instanceof Error && e.message.startsWith("Job wait")) {
span.setAttribute("timedOut", true);
@@ -149,7 +152,7 @@ export async function scrapeHelper(
return {
success: true,
- data: doc,
+ data: toLegacyDocument(doc, internalOptions),
returnCode: 200,
};
}
@@ -158,15 +161,17 @@ export async function scrapeController(req: Request, res: Response) {
try {
let earlyReturn = false;
// make sure to authenticate user first, Bearer
- const { success, team_id, error, status, plan, chunk } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res,
RateLimiterMode.Scrape
);
- if (!success) {
- return res.status(status).json({ error });
+ if (!auth.success) {
+ return res.status(auth.status).json({ error: auth.error });
}
+ const { team_id, plan, chunk } = auth;
+
const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
const extractorOptions = {
@@ -200,7 +205,7 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(402).json({ error: "Insufficient credits. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing" });
}
} catch (error) {
- Logger.error(error);
+ logger.error(error);
earlyReturn = true;
return res.status(500).json({
error:
@@ -224,8 +229,8 @@ export async function scrapeController(req: Request, res: Response) {
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
const numTokens =
- result.data && result.data.markdown
- ? numTokensFromString(result.data.markdown, "gpt-3.5-turbo")
+ result.data && (result.data as Document).markdown
+ ? numTokensFromString((result.data as Document).markdown!, "gpt-3.5-turbo")
: 0;
if (result.success) {
@@ -246,7 +251,7 @@ export async function scrapeController(req: Request, res: Response) {
if (creditsToBeBilled > 0) {
// billing for doc done on queue end, bill only for llm extraction
billTeam(team_id, chunk?.sub_id, creditsToBeBilled).catch(error => {
- Logger.error(`Failed to bill team ${team_id} for ${creditsToBeBilled} credits: ${error}`);
+ logger.error(`Failed to bill team ${team_id} for ${creditsToBeBilled} credits: ${error}`);
// Optionally, you could notify an admin or add to a retry queue here
});
}
@@ -254,17 +259,19 @@ export async function scrapeController(req: Request, res: Response) {
let doc = result.data;
if (!pageOptions || !pageOptions.includeRawHtml) {
- if (doc && doc.rawHtml) {
- delete doc.rawHtml;
+ if (doc && (doc as Document).rawHtml) {
+ delete (doc as Document).rawHtml;
}
}
if(pageOptions && pageOptions.includeExtract) {
- if(!pageOptions.includeMarkdown && doc && doc.markdown) {
- delete doc.markdown;
+ if(!pageOptions.includeMarkdown && doc && (doc as Document).markdown) {
+ delete (doc as Document).markdown;
}
}
+ const { scrapeOptions } = fromLegacyScrapeOptions(pageOptions, extractorOptions, timeout);
+
logJob({
job_id: jobId,
success: result.success,
@@ -276,21 +283,22 @@ export async function scrapeController(req: Request, res: Response) {
mode: "scrape",
url: req.body.url,
crawlerOptions: crawlerOptions,
- pageOptions: pageOptions,
+ scrapeOptions,
origin: origin,
- extractor_options: extractorOptions,
num_tokens: numTokens,
});
return res.status(result.returnCode).json(result);
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({
error:
- typeof error === "string"
- ? error
- : error?.message ?? "Internal Server Error",
+ error instanceof ZodError
+ ? "Invalid URL"
+ : typeof error === "string"
+ ? error
+ : error?.message ?? "Internal Server Error",
});
}
}
diff --git a/apps/api/src/controllers/v0/search.ts b/apps/api/src/controllers/v0/search.ts
index 67cff8eb..e0102406 100644
--- a/apps/api/src/controllers/v0/search.ts
+++ b/apps/api/src/controllers/v0/search.ts
@@ -1,5 +1,4 @@
import { Request, Response } from "express";
-import { WebScraperDataProvider } from "../../scraper/WebScraper";
import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
import { authenticateUser } from "../auth";
import { PlanType, RateLimiterMode } from "../../types";
@@ -8,21 +7,23 @@ import { PageOptions, SearchOptions } from "../../lib/entities";
import { search } from "../../search";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../lib/logger";
+import { logger } from "../../lib/logger";
import { getScrapeQueue } from "../../services/queue-service";
import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
import * as Sentry from "@sentry/node";
import { getJobPriority } from "../../lib/job-priority";
+import { Job } from "bullmq";
+import { Document, fromLegacyCombo, fromLegacyScrapeOptions, toLegacyDocument } from "../v1/types";
export async function searchHelper(
jobId: string,
req: Request,
team_id: string,
- subscription_id: string,
+ subscription_id: string | null | undefined,
crawlerOptions: any,
pageOptions: PageOptions,
searchOptions: SearchOptions,
- plan: PlanType
+ plan: PlanType | undefined
): Promise<{
success: boolean;
error?: string;
@@ -35,8 +36,8 @@ export async function searchHelper(
return { success: false, error: "Query is required", returnCode: 400 };
}
- const tbs = searchOptions.tbs ?? null;
- const filter = searchOptions.filter ?? null;
+ const tbs = searchOptions.tbs ?? undefined;
+ const filter = searchOptions.filter ?? undefined;
let num_results = Math.min(searchOptions.limit ?? 7, 10);
if (team_id === "d97c4ceb-290b-4957-8432-2b2a02727d95") {
@@ -57,11 +58,12 @@ export async function searchHelper(
});
let justSearch = pageOptions.fetchPageContent === false;
-
+
+ const { scrapeOptions, internalOptions } = fromLegacyCombo(pageOptions, undefined, 60000, crawlerOptions);
if (justSearch) {
billTeam(team_id, subscription_id, res.length).catch(error => {
- Logger.error(`Failed to bill team ${team_id} for ${res.length} credits: ${error}`);
+ logger.error(`Failed to bill team ${team_id} for ${res.length} credits: ${error}`);
// Optionally, you could notify an admin or add to a retry queue here
});
return { success: true, data: res, returnCode: 200 };
@@ -88,9 +90,9 @@ export async function searchHelper(
data: {
url,
mode: "single_urls",
- crawlerOptions: crawlerOptions,
team_id: team_id,
- pageOptions: pageOptions,
+ scrapeOptions,
+ internalOptions,
},
opts: {
jobId: uuid,
@@ -104,7 +106,7 @@ export async function searchHelper(
await addScrapeJob(job.data as any, {}, job.opts.jobId, job.opts.priority)
}
- const docs = (await Promise.all(jobDatas.map(x => waitForJob(x.opts.jobId, 60000)))).map(x => x[0]);
+ const docs = (await Promise.all(jobDatas.map(x => waitForJob(x.opts.jobId, 60000)))).map(x => toLegacyDocument(x, internalOptions));
if (docs.length === 0) {
return { success: true, error: "No search results found", returnCode: 200 };
@@ -115,7 +117,7 @@ export async function searchHelper(
// make sure doc.content is not empty
const filteredDocs = docs.filter(
- (doc: { content?: string }) => doc && doc.content && doc.content.trim().length > 0
+ (doc: any) => doc && doc.content && doc.content.trim().length > 0
);
if (filteredDocs.length === 0) {
@@ -132,14 +134,15 @@ export async function searchHelper(
export async function searchController(req: Request, res: Response) {
try {
// make sure to authenticate user first, Bearer
- const { success, team_id, error, status, plan, chunk } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res,
RateLimiterMode.Search
);
- if (!success) {
- return res.status(status).json({ error });
+ if (!auth.success) {
+ return res.status(auth.status).json({ error: auth.error });
}
+ const { team_id, plan, chunk } = auth;
const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = req.body.pageOptions ?? {
includeHtml: req.body.pageOptions?.includeHtml ?? false,
@@ -162,7 +165,7 @@ export async function searchController(req: Request, res: Response) {
}
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: "Internal server error" });
}
const startTime = new Date().getTime();
@@ -189,7 +192,6 @@ export async function searchController(req: Request, res: Response) {
mode: "search",
url: req.body.query,
crawlerOptions: crawlerOptions,
- pageOptions: pageOptions,
origin: origin,
});
return res.status(result.returnCode).json(result);
@@ -199,7 +201,7 @@ export async function searchController(req: Request, res: Response) {
}
Sentry.captureException(error);
- Logger.error(error);
+ logger.error("Unhandled error occurred in search", { error });
return res.status(500).json({ error: error.message });
}
}
diff --git a/apps/api/src/controllers/v0/status.ts b/apps/api/src/controllers/v0/status.ts
index bf8d2834..c5eafc2d 100644
--- a/apps/api/src/controllers/v0/status.ts
+++ b/apps/api/src/controllers/v0/status.ts
@@ -1,5 +1,5 @@
import { Request, Response } from "express";
-import { Logger } from "../../../src/lib/logger";
+import { logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { getJobs } from "./crawl-status";
import * as Sentry from "@sentry/node";
@@ -37,7 +37,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
});
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
}
diff --git a/apps/api/src/controllers/v1/batch-scrape.ts b/apps/api/src/controllers/v1/batch-scrape.ts
index 286163df..9c6a288c 100644
--- a/apps/api/src/controllers/v1/batch-scrape.ts
+++ b/apps/api/src/controllers/v1/batch-scrape.ts
@@ -4,8 +4,6 @@ import {
BatchScrapeRequest,
batchScrapeRequestSchema,
CrawlResponse,
- legacyExtractorOptions,
- legacyScrapeOptions,
RequestWithAuth,
} from "./types";
import {
@@ -29,19 +27,16 @@ export async function batchScrapeController(
await logCrawl(id, req.auth.team_id);
- let { remainingCredits } = req.account;
+ let { remainingCredits } = req.account!;
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if(!useDbAuthentication){
remainingCredits = Infinity;
}
- const pageOptions = legacyScrapeOptions(req.body);
- const extractorOptions = req.body.extract ? legacyExtractorOptions(req.body.extract) : undefined;
-
-
const sc: StoredCrawl = {
crawlerOptions: null,
- pageOptions,
+ scrapeOptions: req.body,
+ internalOptions: {},
team_id: req.auth.team_id,
createdAt: Date.now(),
plan: req.auth.plan,
@@ -64,10 +59,9 @@ export async function batchScrapeController(
url: x,
mode: "single_urls" as const,
team_id: req.auth.team_id,
- plan: req.auth.plan,
+ plan: req.auth.plan!,
crawlerOptions: null,
- pageOptions,
- extractorOptions,
+ scrapeOptions: req.body,
origin: "api",
crawl_id: id,
sitemapped: true,
diff --git a/apps/api/src/controllers/v1/crawl-cancel.ts b/apps/api/src/controllers/v1/crawl-cancel.ts
index f8fba824..958318b5 100644
--- a/apps/api/src/controllers/v1/crawl-cancel.ts
+++ b/apps/api/src/controllers/v1/crawl-cancel.ts
@@ -1,6 +1,6 @@
import { Response } from "express";
import { supabase_service } from "../../services/supabase";
-import { Logger } from "../../lib/logger";
+import { logger } from "../../lib/logger";
import { getCrawl, saveCrawl } from "../../lib/crawl-redis";
import * as Sentry from "@sentry/node";
import { configDotenv } from "dotenv";
@@ -36,7 +36,7 @@ export async function crawlCancelController(req: RequestWithAuth<{ jobId: string
sc.cancelled = true;
await saveCrawl(req.params.jobId, sc);
} catch (error) {
- Logger.error(error);
+ logger.error(error);
}
res.json({
@@ -44,7 +44,7 @@ export async function crawlCancelController(req: RequestWithAuth<{ jobId: string
});
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
}
diff --git a/apps/api/src/controllers/v1/crawl-status-ws.ts b/apps/api/src/controllers/v1/crawl-status-ws.ts
index 3738e3a2..18222edc 100644
--- a/apps/api/src/controllers/v1/crawl-status-ws.ts
+++ b/apps/api/src/controllers/v1/crawl-status-ws.ts
@@ -1,14 +1,15 @@
import { authMiddleware } from "../../routes/v1";
import { RateLimiterMode } from "../../types";
import { authenticateUser } from "../auth";
-import { CrawlStatusParams, CrawlStatusResponse, Document, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types";
+import { CrawlStatusParams, CrawlStatusResponse, Document, ErrorResponse, RequestWithAuth } from "./types";
import { WebSocket } from "ws";
import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../lib/logger";
+import { logger } from "../../lib/logger";
import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength, getThrottledJobs, isCrawlFinished, isCrawlFinishedLocked } from "../../lib/crawl-redis";
import { getScrapeQueue } from "../../services/queue-service";
import { getJob, getJobs } from "./crawl-status";
import * as Sentry from "@sentry/node";
+import { Job, JobState } from "bullmq";
type ErrorMessage = {
type: "error",
@@ -56,7 +57,7 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth {
@@ -70,15 +71,14 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth !doneJobIDs.includes(x));
const jobStatuses = await Promise.all(notDoneJobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)]));
- const newlyDoneJobIDs = jobStatuses.filter(x => x[1] === "completed" || x[1] === "failed").map(x => x[0]);
-
- for (const jobID of newlyDoneJobIDs) {
- const job = await getJob(jobID);
+ const newlyDoneJobIDs: string[] = jobStatuses.filter(x => x[1] === "completed" || x[1] === "failed").map(x => x[0]);
+ const newlyDoneJobs: Job[] = (await Promise.all(newlyDoneJobIDs.map(x => getJob(x)))).filter(x => x !== undefined) as Job[]
+ for (const job of newlyDoneJobs) {
if (job.returnvalue) {
send(ws, {
type: "document",
- data: legacyDocumentConverter(job.returnvalue),
+ data: job.returnvalue,
})
} else {
return close(ws, 3000, { type: "error", error: job.failedReason });
@@ -100,8 +100,8 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth legacyDocumentConverter(x)),
+ data: data,
}
});
@@ -139,19 +139,21 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth) {
try {
- const { success, team_id, error, status, plan } = await authenticateUser(
+ const auth = await authenticateUser(
req,
null,
RateLimiterMode.CrawlStatus,
);
- if (!success) {
+ if (!auth.success) {
return close(ws, 3000, {
type: "error",
- error,
+ error: auth.error,
});
}
+ const { team_id, plan } = auth;
+
req.auth = { team_id, plan };
await crawlStatusWS(ws, req);
@@ -170,7 +172,7 @@ export async function crawlStatusWSController(ws: WebSocket, req: RequestWithAut
}
}
- Logger.error("Error occurred in WebSocket! (" + req.path + ") -- ID " + id + " -- " + verbose);
+ logger.error("Error occurred in WebSocket! (" + req.path + ") -- ID " + id + " -- " + verbose);
return close(ws, 1011, {
type: "error",
error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id
diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts
index a8d78293..f150ddc4 100644
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@@ -1,9 +1,10 @@
import { Response } from "express";
-import { CrawlStatusParams, CrawlStatusResponse, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types";
+import { CrawlStatusParams, CrawlStatusResponse, ErrorResponse, RequestWithAuth } from "./types";
import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength, getThrottledJobs } from "../../lib/crawl-redis";
import { getScrapeQueue } from "../../services/queue-service";
import { supabaseGetJobById, supabaseGetJobsById } from "../../lib/supabase-jobs";
import { configDotenv } from "dotenv";
+import { Job, JobState } from "bullmq";
configDotenv();
export async function getJob(id: string) {
@@ -24,7 +25,7 @@ export async function getJob(id: string) {
}
export async function getJobs(ids: string[]) {
- const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
+ const jobs: (Job & { id: string })[] = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x) as (Job & {id: string})[];
if (process.env.USE_DB_AUTHENTICATION === "true") {
const supabaseData = await supabaseGetJobsById(ids);
@@ -63,8 +64,8 @@ export async function crawlStatusController(req: RequestWithAuth 0) {
- if (!doneJobs[0].data.pageOptions.includeRawHtml) {
+ if (!doneJobs[0].data.scrapeOptions.formats.includes("rawHtml")) {
for (let ii = 0; ii < doneJobs.length; ii++) {
if (data[ii]) {
delete data[ii].rawHtml;
@@ -142,7 +143,7 @@ export async function crawlStatusController(req: RequestWithAuth legacyDocumentConverter(x)),
+ data: data,
});
}
diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts
index 53cc04e8..f9f60e71 100644
--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@@ -4,9 +4,8 @@ import {
CrawlRequest,
crawlRequestSchema,
CrawlResponse,
- legacyCrawlerOptions,
- legacyScrapeOptions,
RequestWithAuth,
+ toLegacyCrawlerOptions,
} from "./types";
import {
addCrawlJob,
@@ -20,9 +19,10 @@ import {
import { logCrawl } from "../../services/logging/crawl_log";
import { getScrapeQueue } from "../../services/queue-service";
import { addScrapeJob } from "../../services/queue-jobs";
-import { Logger } from "../../lib/logger";
+import { logger } from "../../lib/logger";
import { getJobPriority } from "../../lib/job-priority";
import { callWebhook } from "../../services/webhook";
+import { scrapeOptions as scrapeOptionsSchema } from "./types";
export async function crawlController(
req: RequestWithAuth<{}, CrawlResponse, CrawlRequest>,
@@ -34,18 +34,22 @@ export async function crawlController(
await logCrawl(id, req.auth.team_id);
- let { remainingCredits } = req.account;
+ let { remainingCredits } = req.account!;
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if(!useDbAuthentication){
remainingCredits = Infinity;
}
- const crawlerOptions = legacyCrawlerOptions(req.body);
- const pageOptions = legacyScrapeOptions(req.body.scrapeOptions);
+ const crawlerOptions = {
+ ...req.body,
+ url: undefined,
+ scrapeOptions: undefined,
+ };
+ const scrapeOptions = req.body.scrapeOptions;
// TODO: @rafa, is this right? copied from v0
- if (Array.isArray(crawlerOptions.includes)) {
- for (const x of crawlerOptions.includes) {
+ if (Array.isArray(crawlerOptions.includePaths)) {
+ for (const x of crawlerOptions.includePaths) {
try {
new RegExp(x);
} catch (e) {
@@ -54,8 +58,8 @@ export async function crawlController(
}
}
- if (Array.isArray(crawlerOptions.excludes)) {
- for (const x of crawlerOptions.excludes) {
+ if (Array.isArray(crawlerOptions.excludePaths)) {
+ for (const x of crawlerOptions.excludePaths) {
try {
new RegExp(x);
} catch (e) {
@@ -68,8 +72,9 @@ export async function crawlController(
const sc: StoredCrawl = {
originUrl: req.body.url,
- crawlerOptions,
- pageOptions,
+ crawlerOptions: toLegacyCrawlerOptions(crawlerOptions),
+ scrapeOptions,
+ internalOptions: {},
team_id: req.auth.team_id,
createdAt: Date.now(),
plan: req.auth.plan,
@@ -78,9 +83,9 @@ export async function crawlController(
const crawler = crawlToCrawler(id, sc);
try {
- sc.robots = await crawler.getRobotsTxt(pageOptions.skipTlsVerification);
+ sc.robots = await crawler.getRobotsTxt(scrapeOptions.skipTlsVerification);
} catch (e) {
- Logger.debug(
+ logger.debug(
`[Crawl] Failed to get robots.txt (this is probably fine!): ${JSON.stringify(
e
)}`
@@ -112,7 +117,7 @@ export async function crawlController(
team_id: req.auth.team_id,
plan: req.auth.plan,
crawlerOptions,
- pageOptions,
+ scrapeOptions,
origin: "api",
crawl_id: id,
sitemapped: true,
@@ -142,10 +147,10 @@ export async function crawlController(
{
url: req.body.url,
mode: "single_urls",
- crawlerOptions: crawlerOptions,
team_id: req.auth.team_id,
- plan: req.auth.plan,
- pageOptions: pageOptions,
+ crawlerOptions,
+ scrapeOptions: scrapeOptionsSchema.parse(scrapeOptions),
+ plan: req.auth.plan!,
origin: "api",
crawl_id: id,
webhook: req.body.webhook,
diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts
index 5ed3dd51..45856543 100644
--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@@ -1,9 +1,9 @@
import { Response } from "express";
import { v4 as uuidv4 } from "uuid";
import {
- legacyCrawlerOptions,
mapRequestSchema,
RequestWithAuth,
+ scrapeOptions,
} from "./types";
import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis";
import { MapResponse, MapRequest } from "./types";
@@ -18,11 +18,11 @@ import { fireEngineMap } from "../../search/fireEngine";
import { billTeam } from "../../services/billing/credit_billing";
import { logJob } from "../../services/logging/log_job";
import { performCosineSimilarity } from "../../lib/map-cosine";
-import { Logger } from "../../lib/logger";
+import { logger } from "../../lib/logger";
import Redis from "ioredis";
configDotenv();
-const redis = new Redis(process.env.REDIS_URL);
+const redis = new Redis(process.env.REDIS_URL!);
// Max Links that /map can return
const MAX_MAP_LIMIT = 5000;
@@ -44,8 +44,12 @@ export async function mapController(
const sc: StoredCrawl = {
originUrl: req.body.url,
- crawlerOptions: legacyCrawlerOptions(req.body),
- pageOptions: {},
+ crawlerOptions: {
+ ...req.body,
+ scrapeOptions: undefined,
+ },
+ scrapeOptions: scrapeOptions.parse({}),
+ internalOptions: {},
team_id: req.auth.team_id,
createdAt: Date.now(),
plan: req.auth.plan,
@@ -65,8 +69,8 @@ export async function mapController(
const cacheKey = `fireEngineMap:${mapUrl}`;
const cachedResult = null;
- let allResults: any[];
- let pagePromises: Promise[];
+ let allResults: any[] = [];
+ let pagePromises: Promise[] = [];
if (cachedResult) {
allResults = JSON.parse(cachedResult);
@@ -139,7 +143,7 @@ export async function mapController(
return null;
}
})
- .filter((x) => x !== null);
+ .filter((x) => x !== null) as string[];
// allows for subdomains to be included
links = links.filter((x) => isSameDomain(x, req.body.url));
@@ -153,7 +157,7 @@ export async function mapController(
links = removeDuplicateUrls(links);
billTeam(req.auth.team_id, req.acuc?.sub_id, 1).catch((error) => {
- Logger.error(
+ logger.error(
`Failed to bill team ${req.auth.team_id} for 1 credit: ${error}`
);
// Optionally, you could notify an admin or add to a retry queue here
@@ -175,9 +179,8 @@ export async function mapController(
mode: "map",
url: req.body.url,
crawlerOptions: {},
- pageOptions: {},
+ scrapeOptions: {},
origin: req.body.origin,
- extractor_options: { mode: "markdown" },
num_tokens: 0,
});
diff --git a/apps/api/src/controllers/v1/scrape-status.ts b/apps/api/src/controllers/v1/scrape-status.ts
index 5e0aecb6..db50f7d3 100644
--- a/apps/api/src/controllers/v1/scrape-status.ts
+++ b/apps/api/src/controllers/v1/scrape-status.ts
@@ -12,7 +12,7 @@ export async function scrapeStatusController(req: any, res: any) {
const job = await supabaseGetJobByIdOnlyData(req.params.jobId);
- if(job.team_id !== "41bdbfe1-0579-4d9b-b6d5-809f16be12f5"){
+ if(job?.team_id !== "41bdbfe1-0579-4d9b-b6d5-809f16be12f5"){
return res.status(403).json({
success: false,
error: "You are not allowed to access this resource.",
diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts
index 6fa855f7..3f6609af 100644
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@@ -1,10 +1,7 @@
-import { Request, Response } from "express";
-import { Logger } from "../../lib/logger";
+import { Response } from "express";
+import { logger } from "../../lib/logger";
import {
Document,
- legacyDocumentConverter,
- legacyExtractorOptions,
- legacyScrapeOptions,
RequestWithAuth,
ScrapeRequest,
scrapeRequestSchema,
@@ -12,7 +9,6 @@ import {
} from "./types";
import { billTeam } from "../../services/billing/credit_billing";
import { v4 as uuidv4 } from "uuid";
-import { numTokensFromString } from "../../lib/LLM-extraction/helpers";
import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
import { logJob } from "../../services/logging/log_job";
import { getJobPriority } from "../../lib/job-priority";
@@ -28,8 +24,6 @@ export async function scrapeController(
const origin = req.body.origin;
const timeout = req.body.timeout;
- const pageOptions = legacyScrapeOptions(req.body);
- const extractorOptions = req.body.extract ? legacyExtractorOptions(req.body.extract) : undefined;
const jobId = uuidv4();
const startTime = new Date().getTime();
@@ -43,11 +37,10 @@ export async function scrapeController(
{
url: req.body.url,
mode: "single_urls",
- crawlerOptions: {},
team_id: req.auth.team_id,
- plan: req.auth.plan,
- pageOptions,
- extractorOptions,
+ scrapeOptions: req.body,
+ internalOptions: {},
+ plan: req.auth.plan!,
origin: req.body.origin,
is_scrape: true,
},
@@ -56,13 +49,13 @@ export async function scrapeController(
jobPriority
);
- const totalWait = (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds : 0) + a, 0);
+ const totalWait = (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds ?? 0 : 0) + a, 0);
- let doc: any | undefined;
+ let doc: Document;
try {
- doc = (await waitForJob(jobId, timeout + totalWait))[0];
+ doc = await waitForJob(jobId, timeout + totalWait); // TODO: better types for this
} catch (e) {
- Logger.error(`Error in scrapeController: ${e}`);
+ logger.error(`Error in scrapeController: ${e}`);
if (e instanceof Error && e.message.startsWith("Job wait")) {
return res.status(408).json({
success: false,
@@ -71,34 +64,19 @@ export async function scrapeController(
} else {
return res.status(500).json({
success: false,
- error: `(Internal server error) - ${e && e?.message ? e.message : e} ${
- extractorOptions && extractorOptions.mode !== "markdown"
- ? " - Could be due to LLM parsing issues"
- : ""
- }`,
+ error: `(Internal server error) - ${e && e?.message ? e.message : e}`,
});
}
}
await getScrapeQueue().remove(jobId);
- if (!doc) {
- console.error("!!! PANIC DOC IS", doc);
- return res.status(200).json({
- success: true,
- warning: "No page found",
- data: doc,
- });
- }
-
- delete doc.index;
- delete doc.provider;
-
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
const numTokens =
- doc && doc.markdown
- ? numTokensFromString(doc.markdown, "gpt-3.5-turbo")
+ doc && doc.extract
+ // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo")
+ ? 0 // TODO: fix
: 0;
let creditsToBeBilled = 1; // Assuming 1 credit per document
@@ -111,22 +89,16 @@ export async function scrapeController(
}
billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(error => {
- Logger.error(`Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`);
+ logger.error(`Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`);
// Optionally, you could notify an admin or add to a retry queue here
});
- if (!pageOptions || !pageOptions.includeRawHtml) {
+ if (!req.body.formats.includes("rawHtml")) {
if (doc && doc.rawHtml) {
delete doc.rawHtml;
}
}
- if(pageOptions && pageOptions.includeExtract) {
- if(!pageOptions.includeMarkdown && doc && doc.markdown) {
- delete doc.markdown;
- }
- }
-
logJob({
job_id: jobId,
success: true,
@@ -137,16 +109,14 @@ export async function scrapeController(
team_id: req.auth.team_id,
mode: "scrape",
url: req.body.url,
- crawlerOptions: {},
- pageOptions: pageOptions,
+ scrapeOptions: req.body,
origin: origin,
- extractor_options: extractorOptions,
num_tokens: numTokens,
});
return res.status(200).json({
success: true,
- data: legacyDocumentConverter(doc),
+ data: doc,
scrape_id: origin?.includes("website") ? jobId : undefined,
});
}
diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts
index 9585175e..82b24c22 100644
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@@ -1,10 +1,11 @@
import { Request, Response } from "express";
import { z } from "zod";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
-import { Action, ExtractorOptions, PageOptions } from "../../lib/entities";
import { protocolIncluded, checkUrl } from "../../lib/validateUrl";
import { PlanType } from "../../types";
import { countries } from "../../lib/validate-country";
+import { ExtractorOptions, PageOptions, ScrapeActionContent, Document as V0Document } from "../../lib/entities";
+import { InternalOptions } from "../../scraper/scrapeURL";
export type Format =
| "markdown"
@@ -167,6 +168,7 @@ export const scrapeRequestSchema = scrapeOptions.extend({
});
export type ScrapeRequest = z.infer;
+export type ScrapeRequestInput = z.input;
export const batchScrapeRequestSchema = scrapeOptions.extend({
urls: url.array(),
@@ -240,7 +242,7 @@ export const mapRequestSchema = crawlerOptions.extend({
includeSubdomains: z.boolean().default(true),
search: z.string().optional(),
ignoreSitemap: z.boolean().default(false),
- limit: z.number().min(1).max(5000).default(5000).optional(),
+ limit: z.number().min(1).max(5000).default(5000),
}).strict(strictMessage);
// export type MapRequest = {
@@ -252,13 +254,14 @@ export type MapRequest = z.infer;
export type Document = {
markdown?: string;
- extract?: string;
+ extract?: any;
html?: string;
rawHtml?: string;
links?: string[];
screenshot?: string;
actions?: {
- screenshots: string[];
+ screenshots?: string[];
+ scrapes?: ScrapeActionContent[];
};
warning?: string;
metadata: {
@@ -291,11 +294,11 @@ export type Document = {
publishedTime?: string;
articleTag?: string;
articleSection?: string;
+ url?: string;
sourceURL?: string;
statusCode?: number;
error?: string;
[key: string]: string | string[] | number | undefined;
-
};
};
@@ -366,7 +369,7 @@ export type CrawlStatusResponse =
type AuthObject = {
team_id: string;
- plan: PlanType;
+ plan: PlanType | undefined;
};
type Account = {
@@ -439,7 +442,7 @@ export interface ResponseWithSentry<
sentry?: string,
}
-export function legacyCrawlerOptions(x: CrawlerOptions) {
+export function toLegacyCrawlerOptions(x: CrawlerOptions) {
return {
includes: x.includePaths,
excludes: x.excludePaths,
@@ -453,68 +456,90 @@ export function legacyCrawlerOptions(x: CrawlerOptions) {
};
}
-export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
+export function fromLegacyCrawlerOptions(x: any): { crawlOptions: CrawlerOptions; internalOptions: InternalOptions } {
return {
- includeMarkdown: x.formats.includes("markdown"),
- includeHtml: x.formats.includes("html"),
- includeRawHtml: x.formats.includes("rawHtml"),
- includeExtract: x.formats.includes("extract"),
- onlyIncludeTags: x.includeTags,
- removeTags: x.excludeTags,
- onlyMainContent: x.onlyMainContent,
- waitFor: x.waitFor,
- headers: x.headers,
- includeLinks: x.formats.includes("links"),
- screenshot: x.formats.includes("screenshot"),
- fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
- parsePDF: x.parsePDF,
- actions: x.actions as Action[], // no strict null checking grrrr - mogery
- geolocation: x.location ?? x.geolocation,
- skipTlsVerification: x.skipTlsVerification,
- removeBase64Images: x.removeBase64Images,
- mobile: x.mobile,
- };
-}
-
-export function legacyExtractorOptions(x: ExtractOptions): ExtractorOptions {
- return {
- mode: x.mode ? "llm-extraction" : "markdown",
- extractionPrompt: x.prompt ?? "Based on the information on the page, extract the information from the schema.",
- extractionSchema: x.schema,
- userPrompt: x.prompt ?? "",
- };
-}
-
-export function legacyDocumentConverter(doc: any): Document {
- if (doc === null || doc === undefined) return null;
-
- if (doc.metadata) {
- if (doc.metadata.screenshot) {
- doc.screenshot = doc.metadata.screenshot;
- delete doc.metadata.screenshot;
- }
-
- if (doc.metadata.fullPageScreenshot) {
- doc.fullPageScreenshot = doc.metadata.fullPageScreenshot;
- delete doc.metadata.fullPageScreenshot;
- }
- }
-
- return {
- markdown: doc.markdown,
- links: doc.linksOnPage,
- rawHtml: doc.rawHtml,
- html: doc.html,
- extract: doc.llm_extraction,
- screenshot: doc.screenshot ?? doc.fullPageScreenshot,
- actions: doc.actions ?? undefined,
- warning: doc.warning ?? undefined,
- metadata: {
- ...doc.metadata,
- pageError: undefined,
- pageStatusCode: undefined,
- error: doc.metadata?.pageError,
- statusCode: doc.metadata?.pageStatusCode,
+ crawlOptions: crawlerOptions.parse({
+ includePaths: x.includes,
+ excludePaths: x.excludes,
+ limit: x.maxCrawledLinks ?? x.limit,
+ maxDepth: x.maxDepth,
+ allowBackwardLinks: x.allowBackwardCrawling,
+ allowExternalLinks: x.allowExternalContentLinks,
+ ignoreSitemap: x.ignoreSitemap,
+ // TODO: returnOnlyUrls support
+ }),
+ internalOptions: {
+ v0CrawlOnlyUrls: x.returnOnlyUrls,
},
};
}
+
+export function fromLegacyScrapeOptions(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined): { scrapeOptions: ScrapeOptions, internalOptions: InternalOptions } {
+ return {
+ scrapeOptions: scrapeOptions.parse({
+ formats: [
+ (pageOptions.includeMarkdown ?? true) ? "markdown" as const : null,
+ (pageOptions.includeHtml ?? false) ? "html" as const : null,
+ (pageOptions.includeRawHtml ?? false) ? "rawHtml" as const : null,
+ (pageOptions.screenshot ?? false) ? "screenshot" as const : null,
+ (pageOptions.fullPageScreenshot ?? false) ? "screenshot@fullPage" as const : null,
+ (extractorOptions !== undefined && extractorOptions.mode.includes("llm-extraction")) ? "extract" as const : null,
+ "links"
+ ].filter(x => x !== null),
+ waitFor: pageOptions.waitFor,
+ headers: pageOptions.headers,
+ includeTags: (typeof pageOptions.onlyIncludeTags === "string" ? [pageOptions.onlyIncludeTags] : pageOptions.onlyIncludeTags),
+ excludeTags: (typeof pageOptions.removeTags === "string" ? [pageOptions.removeTags] : pageOptions.removeTags),
+ onlyMainContent: pageOptions.onlyMainContent ?? false,
+ timeout: timeout,
+ parsePDF: pageOptions.parsePDF,
+ actions: pageOptions.actions,
+ location: pageOptions.geolocation,
+ skipTlsVerification: pageOptions.skipTlsVerification,
+ removeBase64Images: pageOptions.removeBase64Images,
+ extract: extractorOptions !== undefined && extractorOptions.mode.includes("llm-extraction") ? {
+ systemPrompt: extractorOptions.extractionPrompt,
+ prompt: extractorOptions.userPrompt,
+ schema: extractorOptions.extractionSchema,
+ } : undefined,
+ mobile: pageOptions.mobile,
+ }),
+ internalOptions: {
+ atsv: pageOptions.atsv,
+ v0DisableJsDom: pageOptions.disableJsDom,
+ v0UseFastMode: pageOptions.useFastMode,
+ },
+ // TODO: fallback, fetchPageContent, replaceAllPathsWithAbsolutePaths, includeLinks
+ }
+}
+
+export function fromLegacyCombo(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined, crawlerOptions: any): { scrapeOptions: ScrapeOptions, internalOptions: InternalOptions} {
+ const { scrapeOptions, internalOptions: i1 } = fromLegacyScrapeOptions(pageOptions, extractorOptions, timeout);
+ const { internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
+ return { scrapeOptions, internalOptions: Object.assign(i1, i2) };
+}
+
+export function toLegacyDocument(document: Document, internalOptions: InternalOptions): V0Document | { url: string; } {
+ if (internalOptions.v0CrawlOnlyUrls) {
+ return { url: document.metadata.sourceURL! };
+ }
+
+ return {
+ content: document.markdown!,
+ markdown: document.markdown!,
+ html: document.html,
+ rawHtml: document.rawHtml,
+ linksOnPage: document.links,
+ llm_extraction: document.extract,
+ metadata: {
+ ...document.metadata,
+ error: undefined,
+ statusCode: undefined,
+ pageError: document.metadata.error,
+ pageStatusCode: document.metadata.statusCode,
+ screenshot: document.screenshot,
+ },
+ actions: document.actions ,
+ warning: document.warning,
+ }
+}
diff --git a/apps/api/src/example.ts b/apps/api/src/example.ts
deleted file mode 100644
index edf0faef..00000000
--- a/apps/api/src/example.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { WebScraperDataProvider } from "./scraper/WebScraper";
-
-async function example() {
- const example = new WebScraperDataProvider();
-
- await example.setOptions({
- jobId: "TEST",
- mode: "crawl",
- urls: ["https://mendable.ai"],
- crawlerOptions: {},
- });
- const docs = await example.getDocuments(false);
- docs.map((doc) => {
- console.log(doc.metadata.sourceURL);
- });
- console.log(docs.length);
-}
-
-// example();
diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts
index 5ccbb9cc..7f7ec036 100644
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@@ -6,28 +6,24 @@ import bodyParser from "body-parser";
import cors from "cors";
import { getScrapeQueue } from "./services/queue-service";
import { v0Router } from "./routes/v0";
-import { initSDK } from "@hyperdx/node-opentelemetry";
import os from "os";
-import { Logger } from "./lib/logger";
+import { logger } from "./lib/logger";
import { adminRouter } from "./routes/admin";
-import { ScrapeEvents } from "./lib/scrape-events";
import http from 'node:http';
import https from 'node:https';
import CacheableLookup from 'cacheable-lookup';
import { v1Router } from "./routes/v1";
import expressWs from "express-ws";
-import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws";
import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
import { ZodError } from "zod";
import { v4 as uuidv4 } from "uuid";
-import dns from 'node:dns';
const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter");
const { ExpressAdapter } = require("@bull-board/express");
const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length;
-Logger.info(`Number of CPUs: ${numCPUs} available`);
+logger.info(`Number of CPUs: ${numCPUs} available`);
const cacheable = new CacheableLookup()
@@ -55,7 +51,6 @@ const { addQueue, removeQueue, setQueues, replaceQueues } = createBullBoard({
serverAdapter: serverAdapter,
});
-
app.use(
`/admin/${process.env.BULL_AUTH_KEY}/queues`,
serverAdapter.getRouter()
@@ -78,15 +73,10 @@ app.use(adminRouter);
const DEFAULT_PORT = process.env.PORT ?? 3002;
const HOST = process.env.HOST ?? "localhost";
-// HyperDX OpenTelemetry
-if (process.env.ENV === "production") {
- initSDK({ consoleCapture: true, additionalInstrumentations: [] });
-}
-
function startServer(port = DEFAULT_PORT) {
const server = app.listen(Number(port), HOST, () => {
- Logger.info(`Worker ${process.pid} listening on port ${port}`);
- Logger.info(
+ logger.info(`Worker ${process.pid} listening on port ${port}`);
+ logger.info(
`For the Queue UI, open: http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues`
);
});
@@ -103,7 +93,6 @@ app.get(`/serverHealthCheck`, async (req, res) => {
const [waitingJobs] = await Promise.all([
scrapeQueue.getWaitingCount(),
]);
-
const noWaitingJobs = waitingJobs === 0;
// 200 if no active jobs, 503 if there are active jobs
return res.status(noWaitingJobs ? 200 : 500).json({
@@ -111,7 +100,7 @@ app.get(`/serverHealthCheck`, async (req, res) => {
});
} catch (error) {
Sentry.captureException(error);
- Logger.error(error);
+ logger.error(error);
return res.status(500).json({ error: error.message });
}
});
@@ -140,7 +129,7 @@ app.get("/serverHealthCheck/notify", async (req, res) => {
// Re-check the waiting jobs count after the timeout
waitingJobsCount = await getWaitingJobsCount();
if (waitingJobsCount >= treshold) {
- const slackWebhookUrl = process.env.SLACK_WEBHOOK_URL;
+ const slackWebhookUrl = process.env.SLACK_WEBHOOK_URL!;
const message = {
text: `⚠️ Warning: The number of active jobs (${waitingJobsCount}) has exceeded the threshold (${treshold}) for more than ${
timeout / 60000
@@ -156,14 +145,14 @@ app.get("/serverHealthCheck/notify", async (req, res) => {
});
if (!response.ok) {
- Logger.error("Failed to send Slack notification");
+ logger.error("Failed to send Slack notification");
}
}
}, timeout);
}
} catch (error) {
Sentry.captureException(error);
- Logger.debug(error);
+ logger.debug(error);
}
};
@@ -178,7 +167,7 @@ app.get("/is-production", (req, res) => {
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response, next: NextFunction) => {
if (err instanceof ZodError) {
if (Array.isArray(err.errors) && err.errors.find(x => x.message === "URL uses unsupported protocol")) {
- Logger.warn("Unsupported protocol error: " + JSON.stringify(req.body));
+ logger.warn("Unsupported protocol error: " + JSON.stringify(req.body));
}
res.status(400).json({ success: false, error: "Bad Request", details: err.errors });
@@ -206,11 +195,11 @@ app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response
}
}
- Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose);
+ logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose);
res.status(500).json({ success: false, error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id });
});
-Logger.info(`Worker ${process.pid} started`);
+logger.info(`Worker ${process.pid} started`);
// const sq = getScrapeQueue();
diff --git a/apps/api/src/lib/LLM-extraction/index.ts b/apps/api/src/lib/LLM-extraction/index.ts
index d05f9bd7..430dc1d4 100644
--- a/apps/api/src/lib/LLM-extraction/index.ts
+++ b/apps/api/src/lib/LLM-extraction/index.ts
@@ -4,19 +4,19 @@ const ajv = new Ajv(); // Initialize AJV for JSON schema validation
import { generateOpenAICompletions } from "./models";
import { Document, ExtractorOptions } from "../entities";
-import { Logger } from "../logger";
+import { logger } from "../logger";
// Generate completion using OpenAI
export async function generateCompletions(
documents: Document[],
- extractionOptions: ExtractorOptions,
+ extractionOptions: ExtractorOptions | undefined,
mode: "markdown" | "raw-html"
): Promise {
// const schema = zodToJsonSchema(options.schema)
- const schema = extractionOptions.extractionSchema;
- const systemPrompt = extractionOptions.extractionPrompt;
- const prompt = extractionOptions.userPrompt;
+ const schema = extractionOptions?.extractionSchema;
+ const systemPrompt = extractionOptions?.extractionPrompt;
+ const prompt = extractionOptions?.userPrompt;
const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider
@@ -51,7 +51,7 @@ export async function generateCompletions(
return completionResult;
} catch (error) {
- Logger.error(`Error generating completions: ${error}`);
+ logger.error(`Error generating completions: ${error}`);
throw error;
}
default:
diff --git a/apps/api/src/lib/LLM-extraction/models.ts b/apps/api/src/lib/LLM-extraction/models.ts
index 23147b12..f777dce9 100644
--- a/apps/api/src/lib/LLM-extraction/models.ts
+++ b/apps/api/src/lib/LLM-extraction/models.ts
@@ -95,7 +95,7 @@ export async function generateOpenAICompletions({
try {
llmExtraction = JSON.parse(
- jsonCompletion.choices[0].message.content.trim()
+ (jsonCompletion.choices[0].message.content ?? "").trim()
);
} catch (e) {
throw new Error("Invalid JSON");
diff --git a/apps/api/src/lib/batch-process.ts b/apps/api/src/lib/batch-process.ts
index 30289fd0..802d1eb1 100644
--- a/apps/api/src/lib/batch-process.ts
+++ b/apps/api/src/lib/batch-process.ts
@@ -3,7 +3,7 @@ export async function batchProcess(
batchSize: number,
asyncFunction: (item: T, index: number) => Promise
): Promise {
- const batches = [];
+ const batches: T[][] = [];
for (let i = 0; i < array.length; i += batchSize) {
const batch = array.slice(i, i + batchSize);
batches.push(batch);
diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts
index 379bc179..41cbb07c 100644
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@@ -1,13 +1,16 @@
+import { InternalOptions } from "../scraper/scrapeURL";
+import { ScrapeOptions } from "../controllers/v1/types";
import { WebCrawler } from "../scraper/WebScraper/crawler";
import { redisConnection } from "../services/queue-service";
-import { Logger } from "./logger";
+import { logger } from "./logger";
export type StoredCrawl = {
originUrl?: string;
crawlerOptions: any;
- pageOptions: any;
+ scrapeOptions: Omit;
+ internalOptions: InternalOptions;
team_id: string;
- plan: string;
+ plan?: string;
robots?: string;
cancelled?: boolean;
createdAt: number;
@@ -100,7 +103,7 @@ export async function lockURL(id: string, sc: StoredCrawl, url: string): Promise
urlO.hash = "";
url = urlO.href;
} catch (error) {
- Logger.warn("Failed to normalize URL " + JSON.stringify(url) + ": " + error);
+ logger.warn("Failed to normalize URL " + JSON.stringify(url) + ": " + error);
}
const res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0
@@ -117,7 +120,7 @@ export async function lockURLs(id: string, urls: string[]): Promise {
urlO.hash = "";
return urlO.href;
} catch (error) {
- Logger.warn("Failed to normalize URL " + JSON.stringify(url) + ": " + error);
+ logger.warn("Failed to normalize URL " + JSON.stringify(url) + ": " + error);
}
return url;
@@ -131,7 +134,7 @@ export async function lockURLs(id: string, urls: string[]): Promise {
export function crawlToCrawler(id: string, sc: StoredCrawl): WebCrawler {
const crawler = new WebCrawler({
jobId: id,
- initialUrl: sc.originUrl,
+ initialUrl: sc.originUrl!,
includes: sc.crawlerOptions?.includes ?? [],
excludes: sc.crawlerOptions?.excludes ?? [],
maxCrawledLinks: sc.crawlerOptions?.maxCrawledLinks ?? 1000,
diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts
index 2c8f376d..9b68f425 100644
--- a/apps/api/src/lib/entities.ts
+++ b/apps/api/src/lib/entities.ts
@@ -1,3 +1,5 @@
+import type { Document as V1Document } from "../controllers/v1/types";
+
export interface Progress {
current: number;
total: number;
@@ -129,7 +131,8 @@ export class Document {
provider?: string;
warning?: string;
actions?: {
- screenshots: string[];
+ screenshots?: string[];
+ scrapes?: ScrapeActionContent[];
}
index?: number;
diff --git a/apps/api/src/lib/html-to-markdown.ts b/apps/api/src/lib/html-to-markdown.ts
index 396b7fe7..8800d916 100644
--- a/apps/api/src/lib/html-to-markdown.ts
+++ b/apps/api/src/lib/html-to-markdown.ts
@@ -5,7 +5,7 @@ import "../services/sentry"
import * as Sentry from "@sentry/node";
import dotenv from 'dotenv';
-import { Logger } from './logger';
+import { logger } from './logger';
dotenv.config();
// TODO: add a timeout to the Go parser
@@ -40,7 +40,7 @@ class GoMarkdownConverter {
}
}
-export async function parseMarkdown(html: string): Promise {
+export async function parseMarkdown(html: string | null | undefined): Promise {
if (!html) {
return '';
}
@@ -52,12 +52,12 @@ export async function parseMarkdown(html: string): Promise {
markdownContent = processMultiLineLinks(markdownContent);
markdownContent = removeSkipToContentLinks(markdownContent);
- Logger.info(`HTML to Markdown conversion using Go parser successful`);
+ logger.info(`HTML to Markdown conversion using Go parser successful`);
return markdownContent;
}
} catch (error) {
Sentry.captureException(error);
- Logger.error(`Error converting HTML to Markdown with Go parser: ${error}`);
+ logger.error(`Error converting HTML to Markdown with Go parser: ${error}`);
}
// Fallback to TurndownService if Go parser fails or is not enabled
diff --git a/apps/api/src/lib/job-priority.ts b/apps/api/src/lib/job-priority.ts
index 9d046052..27e45230 100644
--- a/apps/api/src/lib/job-priority.ts
+++ b/apps/api/src/lib/job-priority.ts
@@ -1,6 +1,6 @@
import { redisConnection } from "../../src/services/queue-service";
import { PlanType } from "../../src/types";
-import { Logger } from "./logger";
+import { logger } from "./logger";
const SET_KEY_PREFIX = "limit_team_id:";
export async function addJobPriority(team_id, job_id) {
@@ -13,7 +13,7 @@ export async function addJobPriority(team_id, job_id) {
// This approach will reset the expiration time to 60 seconds every time a new job is added to the set.
await redisConnection.expire(setKey, 60);
} catch (e) {
- Logger.error(`Add job priority (sadd) failed: ${team_id}, ${job_id}`);
+ logger.error(`Add job priority (sadd) failed: ${team_id}, ${job_id}`);
}
}
@@ -24,7 +24,7 @@ export async function deleteJobPriority(team_id, job_id) {
// remove job_id from the set
await redisConnection.srem(setKey, job_id);
} catch (e) {
- Logger.error(`Delete job priority (srem) failed: ${team_id}, ${job_id}`);
+ logger.error(`Delete job priority (srem) failed: ${team_id}, ${job_id}`);
}
}
@@ -33,7 +33,7 @@ export async function getJobPriority({
team_id,
basePriority = 10,
}: {
- plan: PlanType;
+ plan: PlanType | undefined;
team_id: string;
basePriority?: number;
}): Promise {
@@ -95,7 +95,7 @@ export async function getJobPriority({
);
}
} catch (e) {
- Logger.error(
+ logger.error(
`Get job priority failed: ${team_id}, ${plan}, ${basePriority}`
);
return basePriority;
diff --git a/apps/api/src/lib/load-testing-example.ts b/apps/api/src/lib/load-testing-example.ts
deleted file mode 100644
index 01b61db9..00000000
--- a/apps/api/src/lib/load-testing-example.ts
+++ /dev/null
@@ -1,42 +0,0 @@
-// import { scrapWithFireEngine } from "../../src/scraper/WebScraper/single_url";
-
-// const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
-
-// const scrapInBatches = async (
-// urls: string[],
-// batchSize: number,
-// delayMs: number
-// ) => {
-// let successCount = 0;
-// let errorCount = 0;
-
-// for (let i = 0; i < urls.length; i += batchSize) {
-// const batch = urls
-// .slice(i, i + batchSize)
-// .map((url) => scrapWithFireEngine(url));
-// try {
-// const results = await Promise.all(batch);
-// results.forEach((data, index) => {
-// if (data.trim() === "") {
-// errorCount++;
-// } else {
-// successCount++;
-// console.log(
-// `Scraping result ${i + index + 1}:`,
-// data.trim().substring(0, 20) + "..."
-// );
-// }
-// });
-// } catch (error) {
-// console.error("Error during scraping:", error);
-// }
-// await delay(delayMs);
-// }
-
-// console.log(`Total successful scrapes: ${successCount}`);
-// console.log(`Total errored scrapes: ${errorCount}`);
-// };
-// function run() {
-// const urls = Array.from({ length: 200 }, () => "https://scrapethissite.com");
-// scrapInBatches(urls, 10, 1000);
-// }
diff --git a/apps/api/src/lib/logger.ts b/apps/api/src/lib/logger.ts
index 7eca1ef0..33aa949b 100644
--- a/apps/api/src/lib/logger.ts
+++ b/apps/api/src/lib/logger.ts
@@ -1,57 +1,82 @@
+import * as winston from "winston";
+
import { configDotenv } from "dotenv";
+import Transport from "winston-transport";
configDotenv();
-enum LogLevel {
- NONE = 'NONE', // No logs will be output.
- ERROR = 'ERROR', // For logging error messages that indicate a failure in a specific operation.
- WARN = 'WARN', // For logging potentially harmful situations that are not necessarily errors.
- INFO = 'INFO', // For logging informational messages that highlight the progress of the application.
- DEBUG = 'DEBUG', // For logging detailed information on the flow through the system, primarily used for debugging.
- TRACE = 'TRACE' // For logging more detailed information than the DEBUG level.
-}
-export class Logger {
- static colors = {
- ERROR: '\x1b[31m%s\x1b[0m', // Red
- WARN: '\x1b[33m%s\x1b[0m', // Yellow
- INFO: '\x1b[34m%s\x1b[0m', // Blue
- DEBUG: '\x1b[36m%s\x1b[0m', // Cyan
- TRACE: '\x1b[35m%s\x1b[0m' // Magenta
- };
-
- static log (message: string, level: LogLevel) {
- const logLevel: LogLevel = LogLevel[process.env.LOGGING_LEVEL as keyof typeof LogLevel] || LogLevel.TRACE;
- const levels = [LogLevel.NONE, LogLevel.ERROR, LogLevel.WARN, LogLevel.INFO, LogLevel.DEBUG, LogLevel.TRACE];
- const currentLevelIndex = levels.indexOf(logLevel);
- const messageLevelIndex = levels.indexOf(level);
-
- if (currentLevelIndex >= messageLevelIndex) {
- const color = Logger.colors[level];
- console[level.toLowerCase()](color, `[${new Date().toISOString()}]${level} - ${message}`);
-
- // const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
- // if (useDbAuthentication) {
- // save to supabase? another place?
- // supabase.from('logs').insert({ level: level, message: message, timestamp: new Date().toISOString(), success: boolean });
- // }
+const logFormat = winston.format.printf(info =>
+ `${info.timestamp} ${info.level} [${info.metadata.module ?? ""}:${info.metadata.method ?? ""}]: ${info.message} ${info.level.includes("error") || info.level.includes("warn") ? JSON.stringify(
+ info.metadata,
+ (_, value) => {
+ if (value instanceof Error) {
+ return {
+ ...value,
+ name: value.name,
+ message: value.message,
+ stack: value.stack,
+ cause: value.cause,
+ }
+ } else {
+ return value;
+ }
}
- }
- static error(message: string | any) {
- Logger.log(message, LogLevel.ERROR);
+ ) : ""}`
+)
+
+export const logger = winston.createLogger({
+ level: process.env.LOGGING_LEVEL?.toLowerCase() ?? "debug",
+ format: winston.format.json({
+ replacer(key, value) {
+ if (value instanceof Error) {
+ return {
+ ...value,
+ name: value.name,
+ message: value.message,
+ stack: value.stack,
+ cause: value.cause,
+ }
+ } else {
+ return value;
+ }
+ }
+ }),
+ transports: [
+ new winston.transports.Console({
+ format: winston.format.combine(
+ winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
+ winston.format.metadata({ fillExcept: ["message", "level", "timestamp"] }),
+ ...(((process.env.ENV === "production" && process.env.SENTRY_ENVIRONMENT === "dev") || (process.env.ENV !== "production")) ? [winston.format.colorize(), logFormat] : []),
+ ),
+ }),
+ ],
+});
+
+export type ArrayTransportOptions = Transport.TransportStreamOptions & {
+ array: any[];
+ scrapeId?: string;
+};
+
+export class ArrayTransport extends Transport {
+ private array: any[];
+ private scrapeId?: string;
+
+ constructor(opts: ArrayTransportOptions) {
+ super(opts);
+ this.array = opts.array;
+ this.scrapeId = opts.scrapeId;
}
- static warn(message: string) {
- Logger.log(message, LogLevel.WARN);
- }
+ log(info, next) {
+ setImmediate(() => {
+ this.emit("logged", info);
+ });
- static info(message: string) {
- Logger.log(message, LogLevel.INFO);
- }
+ if (this.scrapeId !== undefined && info.scrapeId !== this.scrapeId) {
+ return next();
+ }
- static debug(message: string) {
- Logger.log(message, LogLevel.DEBUG);
- }
+ this.array.push(info);
- static trace(message: string) {
- Logger.log(message, LogLevel.TRACE);
+ next();
}
-}
+}
\ No newline at end of file
diff --git a/apps/api/src/lib/map-cosine.ts b/apps/api/src/lib/map-cosine.ts
index db2491a9..2a089548 100644
--- a/apps/api/src/lib/map-cosine.ts
+++ b/apps/api/src/lib/map-cosine.ts
@@ -1,4 +1,4 @@
-import { Logger } from "./logger";
+import { logger } from "./logger";
export function performCosineSimilarity(links: string[], searchQuery: string) {
try {
@@ -40,7 +40,7 @@ export function performCosineSimilarity(links: string[], searchQuery: string) {
links = a.map((item) => item.link);
return links;
} catch (error) {
- Logger.error(`Error performing cosine similarity: ${error}`);
+ logger.error(`Error performing cosine similarity: ${error}`);
return links;
}
}
diff --git a/apps/api/src/lib/scrape-events.ts b/apps/api/src/lib/scrape-events.ts
index ad70dfef..83873a58 100644
--- a/apps/api/src/lib/scrape-events.ts
+++ b/apps/api/src/lib/scrape-events.ts
@@ -1,8 +1,8 @@
import { Job } from "bullmq";
-import type { baseScrapers } from "../scraper/WebScraper/single_url";
import { supabase_service as supabase } from "../services/supabase";
-import { Logger } from "./logger";
+import { logger } from "./logger";
import { configDotenv } from "dotenv";
+import { Engine } from "../scraper/scrapeURL/engines";
configDotenv();
export type ScrapeErrorEvent = {
@@ -15,7 +15,7 @@ export type ScrapeScrapeEvent = {
type: "scrape",
url: string,
worker?: string,
- method: (typeof baseScrapers)[number],
+ method: Engine,
result: null | {
success: boolean,
response_code?: number,
@@ -49,7 +49,7 @@ export class ScrapeEvents {
}).select().single();
return (result.data as any).id;
} catch (error) {
- // Logger.error(`Error inserting scrape event: ${error}`);
+ // logger.error(`Error inserting scrape event: ${error}`);
return null;
}
}
@@ -69,7 +69,7 @@ export class ScrapeEvents {
}
}).eq("id", logId);
} catch (error) {
- Logger.error(`Error updating scrape result: ${error}`);
+ logger.error(`Error updating scrape result: ${error}`);
}
}
@@ -81,7 +81,7 @@ export class ScrapeEvents {
worker: process.env.FLY_MACHINE_ID,
});
} catch (error) {
- Logger.error(`Error logging job event: ${error}`);
+ logger.error(`Error logging job event: ${error}`);
}
}
}
diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts
index c418a6e0..c9be72a3 100644
--- a/apps/api/src/lib/supabase-jobs.ts
+++ b/apps/api/src/lib/supabase-jobs.ts
@@ -1,5 +1,5 @@
import { supabase_service } from "../services/supabase";
-import { Logger } from "./logger";
+import { logger } from "./logger";
import * as Sentry from "@sentry/node";
/**
@@ -37,7 +37,7 @@ export const supabaseGetJobsById = async (jobIds: string[]) => {
.in("job_id", jobIds);
if (error) {
- Logger.error(`Error in supabaseGetJobsById: ${error}`);
+ logger.error(`Error in supabaseGetJobsById: ${error}`);
Sentry.captureException(error);
return [];
}
@@ -61,7 +61,7 @@ export const supabaseGetJobsByCrawlId = async (crawlId: string) => {
.eq("crawl_id", crawlId)
if (error) {
- Logger.error(`Error in supabaseGetJobsByCrawlId: ${error}`);
+ logger.error(`Error in supabaseGetJobsByCrawlId: ${error}`);
Sentry.captureException(error);
return [];
}
diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts
index b45b8973..a6cd539d 100644
--- a/apps/api/src/lib/withAuth.ts
+++ b/apps/api/src/lib/withAuth.ts
@@ -1,30 +1,25 @@
import { AuthResponse } from "../../src/types";
-import { Logger } from "./logger";
+import { logger } from "./logger";
import * as Sentry from "@sentry/node";
import { configDotenv } from "dotenv";
configDotenv();
let warningCount = 0;
-export function withAuth(
- originalFunction: (...args: U) => Promise
+export function withAuth(
+ originalFunction: (...args: U) => Promise,
+ mockSuccess: T,
) {
return async function (...args: U): Promise {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
if (warningCount < 5) {
- Logger.warn("You're bypassing authentication");
+ logger.warn("You're bypassing authentication");
warningCount++;
}
return { success: true } as T;
} else {
- try {
- return await originalFunction(...args);
- } catch (error) {
- Sentry.captureException(error);
- Logger.error(`Error in withAuth function: ${error}`);
- return { success: false, error: error.message } as T;
- }
+ return await originalFunction(...args);
}
};
}
diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts
index 8bd0c12c..90d4a47f 100644
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@@ -1,151 +1,127 @@
import { Job } from "bullmq";
import {
- CrawlResult,
WebScraperOptions,
RunWebScraperParams,
RunWebScraperResult,
} from "../types";
-import { WebScraperDataProvider } from "../scraper/WebScraper";
-import { DocumentUrl, Progress } from "../lib/entities";
import { billTeam } from "../services/billing/credit_billing";
-import { Document } from "../lib/entities";
+import { Document } from "../controllers/v1/types";
import { supabase_service } from "../services/supabase";
-import { Logger } from "../lib/logger";
+import { logger } from "../lib/logger";
import { ScrapeEvents } from "../lib/scrape-events";
import { configDotenv } from "dotenv";
+import { EngineResultsTracker, scrapeURL, ScrapeUrlResponse } from "../scraper/scrapeURL";
+import { Engine } from "../scraper/scrapeURL/engines";
configDotenv();
export async function startWebScraperPipeline({
job,
token,
}: {
- job: Job;
+ job: Job & { id: string };
token: string;
}) {
- let partialDocs: Document[] = [];
return (await runWebScraper({
url: job.data.url,
mode: job.data.mode,
- crawlerOptions: job.data.crawlerOptions,
- extractorOptions: job.data.extractorOptions,
- pageOptions: {
- ...job.data.pageOptions,
+ scrapeOptions: {
+ ...job.data.scrapeOptions,
...(job.data.crawl_id ? ({
- includeRawHtml: true,
+ formats: job.data.scrapeOptions.formats.concat(["rawHtml"]),
}): {}),
},
- inProgress: (progress) => {
- Logger.debug(`🐂 Job in progress ${job.id}`);
- if (progress.currentDocument) {
- partialDocs.push(progress.currentDocument);
- if (partialDocs.length > 50) {
- partialDocs = partialDocs.slice(-50);
- }
- // job.updateProgress({ ...progress, partialDocs: partialDocs });
- }
- },
- onSuccess: (result, mode) => {
- Logger.debug(`🐂 Job completed ${job.id}`);
- saveJob(job, result, token, mode);
- },
- onError: (error) => {
- Logger.error(`🐂 Job failed ${job.id}`);
- ScrapeEvents.logJobEvent(job, "failed");
- job.moveToFailed(error, token, false);
- },
+ internalOptions: job.data.internalOptions,
+ // onSuccess: (result, mode) => {
+ // logger.debug(`🐂 Job completed ${job.id}`);
+ // saveJob(job, result, token, mode);
+ // },
+ // onError: (error) => {
+ // logger.error(`🐂 Job failed ${job.id}`);
+ // ScrapeEvents.logJobEvent(job, "failed");
+ // },
team_id: job.data.team_id,
bull_job_id: job.id.toString(),
priority: job.opts.priority,
is_scrape: job.data.is_scrape ?? false,
- })) as { success: boolean; message: string; docs: Document[] };
+ }));
}
export async function runWebScraper({
url,
mode,
- crawlerOptions,
- pageOptions,
- extractorOptions,
- inProgress,
- onSuccess,
- onError,
+ scrapeOptions,
+ internalOptions,
+ // onSuccess,
+ // onError,
team_id,
bull_job_id,
priority,
is_scrape=false,
-}: RunWebScraperParams): Promise {
+}: RunWebScraperParams): Promise {
+ let response: ScrapeUrlResponse | undefined = undefined;
+ let engines: EngineResultsTracker = {};
try {
- const provider = new WebScraperDataProvider();
- if (mode === "crawl") {
- await provider.setOptions({
- jobId: bull_job_id,
- mode: mode,
- urls: [url],
- extractorOptions,
- crawlerOptions: crawlerOptions,
- pageOptions: pageOptions,
- bullJobId: bull_job_id,
- priority,
- });
- } else {
- await provider.setOptions({
- jobId: bull_job_id,
- mode: mode,
- urls: url.split(","),
- extractorOptions,
- crawlerOptions: crawlerOptions,
- pageOptions: pageOptions,
- priority,
- teamId: team_id
- });
+ response = await scrapeURL(bull_job_id, url, scrapeOptions, { priority, ...internalOptions });
+ if (!response.success) {
+ if (response.error instanceof Error) {
+ throw response.error;
+ } else {
+ throw new Error("scrapeURL error: " + (Array.isArray(response.error) ? JSON.stringify(response.error) : typeof response.error === "object" ? JSON.stringify({ ...response.error }) : response.error));
+ }
}
- const docs = (await provider.getDocuments(false, (progress: Progress) => {
- inProgress(progress);
- })) as Document[];
-
- if (docs.length === 0) {
- return {
- success: true,
- message: "No pages found",
- docs: [],
- };
- }
-
- // remove docs with empty content
- const filteredDocs = crawlerOptions?.returnOnlyUrls
- ? docs.map((doc) => {
- if (doc.metadata.sourceURL) {
- return { url: doc.metadata.sourceURL };
- }
- })
- : docs;
if(is_scrape === false) {
let creditsToBeBilled = 1; // Assuming 1 credit per document
- if (extractorOptions && (extractorOptions.mode === "llm-extraction" || extractorOptions.mode === "extract")) {
+ if (scrapeOptions.extract) {
creditsToBeBilled = 5;
}
- billTeam(team_id, undefined, creditsToBeBilled * filteredDocs.length).catch(error => {
- Logger.error(`Failed to bill team ${team_id} for ${creditsToBeBilled * filteredDocs.length} credits: ${error}`);
+ billTeam(team_id, undefined, creditsToBeBilled).catch(error => {
+ logger.error(`Failed to bill team ${team_id} for ${creditsToBeBilled} credits: ${error}`);
// Optionally, you could notify an admin or add to a retry queue here
});
}
-
-
// This is where the returnvalue from the job is set
- onSuccess(filteredDocs, mode);
+ // onSuccess(response.document, mode);
- // this return doesn't matter too much for the job completion result
- return { success: true, message: "", docs: filteredDocs };
+ engines = response.engines;
+ return response;
} catch (error) {
- onError(error);
- return { success: false, message: error.message, docs: [] };
+ engines = response !== undefined ? response.engines : ((typeof error === "object" && error !== null ? (error as any).results ?? {} : {}));
+
+ if (response !== undefined) {
+ return {
+ ...response,
+ success: false,
+ error,
+ }
+ } else {
+ return { success: false, error, logs: ["no logs -- error coming from runWebScraper"], engines };
+ }
+ // onError(error);
+ } finally {
+ const engineOrder = Object.entries(engines).sort((a, b) => a[1].startedAt - b[1].startedAt).map(x => x[0]) as Engine[];
+
+ for (const engine of engineOrder) {
+ const result = engines[engine] as Exclude;
+ ScrapeEvents.insert(bull_job_id, {
+ type: "scrape",
+ url,
+ method: engine,
+ result: {
+ success: result.state === "success",
+ response_code: (result.state === "success" ? result.result.statusCode : undefined),
+ response_size: (result.state === "success" ? result.result.html.length : undefined),
+ error: (result.state === "error" ? result.error : result.state === "timeout" ? "Timed out" : undefined),
+ time_taken: result.finishedAt - result.startedAt,
+ },
+ });
+ }
}
}
-const saveJob = async (job: Job, result: any, token: string, mode: string) => {
+const saveJob = async (job: Job, result: any, token: string, mode: string, engines?: EngineResultsTracker) => {
try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (useDbAuthentication) {
@@ -173,6 +149,6 @@ const saveJob = async (job: Job, result: any, token: string, mode: string) => {
}
ScrapeEvents.logJobEvent(job, "completed");
} catch (error) {
- Logger.error(`🐂 Failed to update job status: ${error}`);
+ logger.error(`🐂 Failed to update job status: ${error}`);
}
};
diff --git a/apps/api/src/routes/admin.ts b/apps/api/src/routes/admin.ts
index 88159060..ac61519a 100644
--- a/apps/api/src/routes/admin.ts
+++ b/apps/api/src/routes/admin.ts
@@ -6,8 +6,8 @@ import {
cleanBefore24hCompleteJobsController,
queuesController,
} from "../controllers/v0/admin/queue";
-import { acucCacheClearController } from "../controllers/v0/admin/acuc-cache-clear";
import { wrap } from "./v1";
+import { acucCacheClearController } from "../controllers/v0/admin/acuc-cache-clear";
export const adminRouter = express.Router();
diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts
index 4e4b6052..3eaace3b 100644
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@@ -14,7 +14,7 @@ import expressWs from "express-ws";
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
import { crawlCancelController } from "../controllers/v1/crawl-cancel";
-import { Logger } from "../lib/logger";
+import { logger } from "../lib/logger";
import { scrapeStatusController } from "../controllers/v1/scrape-status";
import { concurrencyCheckController } from "../controllers/v1/concurrency-check";
import { batchScrapeController } from "../controllers/v1/batch-scrape";
@@ -32,10 +32,12 @@ function checkCreditsMiddleware(minimum?: number): (req: RequestWithAuth, res: R
if (!minimum && req.body) {
minimum = (req.body as any)?.limit ?? (req.body as any)?.urls?.length ?? 1;
}
- const { success, remainingCredits, chunk } = await checkTeamCredits(req.acuc, req.auth.team_id, minimum);
- req.acuc = chunk;
+ const { success, remainingCredits, chunk } = await checkTeamCredits(req.acuc, req.auth.team_id, minimum ?? 1);
+ if (chunk) {
+ req.acuc = chunk;
+ }
if (!success) {
- Logger.error(`Insufficient credits: ${JSON.stringify({ team_id: req.auth.team_id, minimum, remainingCredits })}`);
+ logger.error(`Insufficient credits: ${JSON.stringify({ team_id: req.auth.team_id, minimum, remainingCredits })}`);
if (!res.headersSent) {
return res.status(402).json({ success: false, error: "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value." });
}
@@ -50,20 +52,27 @@ function checkCreditsMiddleware(minimum?: number): (req: RequestWithAuth, res: R
export function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
return (req, res, next) => {
(async () => {
- const { success, team_id, error, status, plan, chunk } = await authenticateUser(
+ const auth = await authenticateUser(
req,
res,
rateLimiterMode,
);
- if (!success) {
+ if (!auth.success) {
if (!res.headersSent) {
- return res.status(status).json({ success: false, error });
+ return res.status(auth.status).json({ success: false, error: auth.error });
+ } else {
+ return;
}
}
+ const { team_id, plan, chunk } = auth;
+
req.auth = { team_id, plan };
- req.acuc = chunk;
+ req.acuc = chunk ?? undefined;
+ if (chunk) {
+ req.account = { remainingCredits: chunk.remaining_credits };
+ }
next();
})()
.catch(err => next(err));
diff --git a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
index 20419ffa..eba0ddb4 100644
--- a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
+++ b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
@@ -2,7 +2,6 @@
import { WebCrawler } from '../crawler';
import axios from 'axios';
import robotsParser from 'robots-parser';
-import { getAdjustedMaxDepth } from '../utils/maxDepthUtils';
jest.mock('axios');
jest.mock('robots-parser');
@@ -35,165 +34,6 @@ describe('WebCrawler', () => {
});
});
- it('should filter out links that exceed maxDepth param of 2 based on enterURL depth of 0 ', async () => {
- const initialUrl = 'http://example.com'; // Set initial URL for this test
- const enteredMaxCrawledDepth = 2;
- maxCrawledDepth = getAdjustedMaxDepth(initialUrl, enteredMaxCrawledDepth);
-
-
- crawler = new WebCrawler({
- jobId: "TEST",
- initialUrl: initialUrl,
- includes: [],
- excludes: [],
- limit: 100,
- maxCrawledDepth: maxCrawledDepth, // Set maxDepth for testing
- });
-
- // Mock sitemap fetching function to return controlled links
- crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
- initialUrl, // depth 0
- initialUrl + '/page1', // depth 1
- initialUrl + '/page1/page2', // depth 2
- initialUrl + '/page1/page2/page3' // depth 3, should be filtered out
- ]);
-
- const results = await crawler.start(undefined, undefined, undefined, undefined, undefined, maxCrawledDepth);
- expect(results).toEqual([
- { url: initialUrl, html: '' },
- { url: initialUrl + '/page1', html: '' },
- { url: initialUrl + '/page1/page2', html: '' }
- ]);
-
-
- // Ensure that the link with depth 3 is not included
- expect(results.some(r => r.url === initialUrl + '/page1/page2/page3')).toBe(false);
- });
-
- it('should filter out links that exceed maxDepth param of 0 based on enterURL depth of 0 ', async () => {
- const initialUrl = 'http://example.com'; // Set initial URL for this test
- const enteredMaxCrawledDepth = 0;
- maxCrawledDepth = getAdjustedMaxDepth(initialUrl, enteredMaxCrawledDepth);
-
-
- crawler = new WebCrawler({
- jobId: "TEST",
- initialUrl: initialUrl,
- includes: [],
- excludes: [],
- limit: 100,
- maxCrawledDepth: maxCrawledDepth, // Set maxDepth for testing
- });
-
- // Mock sitemap fetching function to return controlled links
- crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
- initialUrl, // depth 0
- initialUrl + '/page1', // depth 1
- initialUrl + '/page1/page2', // depth 2
- initialUrl + '/page1/page2/page3' // depth 3, should be filtered out
- ]);
-
- const results = await crawler.start(undefined, undefined, undefined, undefined, undefined, maxCrawledDepth);
- expect(results).toEqual([
- { url: initialUrl, html: '' },
- ]);
- });
-
- it('should filter out links that exceed maxDepth param of 1 based on enterURL depth of 1 ', async () => {
- const initialUrl = 'http://example.com/page1'; // Set initial URL for this test
- const enteredMaxCrawledDepth = 1;
- maxCrawledDepth = getAdjustedMaxDepth(initialUrl, enteredMaxCrawledDepth);
-
-
- crawler = new WebCrawler({
- jobId: "TEST",
- initialUrl: initialUrl,
- includes: [],
- excludes: [],
- limit: 100,
- maxCrawledDepth: maxCrawledDepth, // Set maxDepth for testing
- });
-
- // Mock sitemap fetching function to return controlled links
- crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
- initialUrl, // depth 0
- initialUrl + '/page2', // depth 1
- initialUrl + '/page2/page3', // depth 2
- initialUrl + '/page2/page3/page4' // depth 3, should be filtered out
- ]);
-
- const results = await crawler.start(undefined, undefined, undefined, undefined, undefined, maxCrawledDepth);
- expect(results).toEqual([
- { url: initialUrl, html: '' },
- { url: initialUrl + '/page2', html: '' }
- ]);
- });
-
- it('should filter out links that exceed maxDepth param of 1 based on enterURL depth of 2 ', async () => {
- const initialUrl = 'http://example.com/page1'; // Set initial URL for this test
- const enteredMaxCrawledDepth = 2;
- maxCrawledDepth = getAdjustedMaxDepth(initialUrl, enteredMaxCrawledDepth);
-
-
- crawler = new WebCrawler({
- jobId: "TEST",
- initialUrl: initialUrl,
- includes: [],
- excludes: [],
- limit: 100,
- maxCrawledDepth: maxCrawledDepth, // Set maxDepth for testing
- });
-
- // Mock sitemap fetching function to return controlled links
- crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
- initialUrl, // depth 0
- initialUrl + '/page2', // depth 1
- initialUrl + '/page2/page3', // depth 2
- initialUrl + '/page2/page3/page4' // depth 3, should be filtered out
- ]);
-
- const results = await crawler.start(undefined, undefined, undefined, undefined, undefined, maxCrawledDepth);
- expect(results).toEqual([
- { url: initialUrl, html: '' },
- { url: initialUrl + '/page2', html: '' },
- { url: initialUrl + '/page2/page3', html: '' }
- ]);
- });
-
- it('should handle allowBackwardCrawling option correctly', async () => {
- const initialUrl = 'https://mendable.ai/blog';
-
- // Setup the crawler with the specific test case options
- const crawler = new WebCrawler({
- jobId: "TEST",
- initialUrl: initialUrl,
- includes: [],
- excludes: [],
- limit: 100,
- maxCrawledDepth: 3, // Example depth
- allowBackwardCrawling: true
- });
-
- // Mock the sitemap fetching function to simulate backward crawling
- crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
- initialUrl,
- 'https://mendable.ai', // backward link
- initialUrl + '/page1',
- initialUrl + '/page1/page2'
- ]);
-
- const results = await crawler.start();
- expect(results).toEqual([
- { url: initialUrl, html: '' },
- { url: 'https://mendable.ai', html: '' }, // Expect the backward link to be included
- { url: initialUrl + '/page1', html: '' },
- { url: initialUrl + '/page1/page2', html: '' }
- ]);
-
- // Check that the backward link is included if allowBackwardCrawling is true
- expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
- });
-
it('should respect the limit parameter by not returning more links than specified', async () => {
const initialUrl = 'http://example.com';
const limit = 2; // Set a limit for the number of links
diff --git a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
deleted file mode 100644
index 02c8a7e0..00000000
--- a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-import { scrapSingleUrl } from '../single_url';
-import { PageOptions } from '../../../lib/entities';
-
-
-jest.mock('../single_url', () => {
- const originalModule = jest.requireActual('../single_url');
- originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('TestRoast
');
-
- return originalModule;
-});
-
-describe('scrapSingleUrl', () => {
- it('should handle includeHtml option correctly', async () => {
- const url = 'https://roastmywebsite.ai';
- const pageOptionsWithHtml: PageOptions = { includeHtml: true };
- const pageOptionsWithoutHtml: PageOptions = { includeHtml: false };
-
- const resultWithHtml = await scrapSingleUrl("TEST", url, pageOptionsWithHtml);
- const resultWithoutHtml = await scrapSingleUrl("TEST", url, pageOptionsWithoutHtml);
-
- expect(resultWithHtml.html).toBeDefined();
- expect(resultWithoutHtml.html).toBeUndefined();
- }, 10000);
-});
-
-it('should return a list of links on the firecrawl.ai page', async () => {
- const url = 'https://flutterbricks.com';
- const pageOptions: PageOptions = { includeHtml: true };
-
- const result = await scrapSingleUrl("TEST", url, pageOptions);
-
- // Check if the result contains a list of links
- expect(result.linksOnPage).toBeDefined();
- expect(Array.isArray(result.linksOnPage)).toBe(true);
- expect(result.linksOnPage.length).toBeGreaterThan(0);
- expect(result.linksOnPage).toContain('https://flutterbricks.com/features')
-}, 15000);
diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts
index 1ae166b4..9e3f7cd2 100644
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@@ -2,13 +2,10 @@ import axios, { AxiosError } from "axios";
import cheerio, { load } from "cheerio";
import { URL } from "url";
import { getLinksFromSitemap } from "./sitemap";
-import async from "async";
-import { CrawlerOptions, PageOptions, Progress } from "../../lib/entities";
-import { scrapSingleUrl } from "./single_url";
import robotsParser from "robots-parser";
import { getURLDepth } from "./utils/maxDepthUtils";
import { axiosTimeout } from "../../../src/lib/timeout";
-import { Logger } from "../../../src/lib/logger";
+import { logger } from "../../../src/lib/logger";
import https from "https";
export class WebCrawler {
private jobId: string;
@@ -73,7 +70,7 @@ export class WebCrawler {
try {
url = new URL(link.trim(), this.baseUrl);
} catch (error) {
- Logger.debug(`Error processing link: ${link} | Error: ${error.message}`);
+ logger.debug(`Error processing link: ${link} | Error: ${error.message}`);
return false;
}
const path = url.pathname;
@@ -132,7 +129,7 @@ export class WebCrawler {
const isAllowed = this.robots.isAllowed(link, "FireCrawlAgent") ?? true;
// Check if the link is disallowed by robots.txt
if (!isAllowed) {
- Logger.debug(`Link disallowed by robots.txt: ${link}`);
+ logger.debug(`Link disallowed by robots.txt: ${link}`);
return false;
}
@@ -161,7 +158,7 @@ export class WebCrawler {
}
public async tryGetSitemap(): Promise<{ url: string; html: string; }[] | null> {
- Logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
+ logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
if (sitemapLinks.length > 0) {
let filteredLinks = this.filterLinks(sitemapLinks, this.limit, this.maxCrawledDepth);
@@ -170,115 +167,6 @@ export class WebCrawler {
return null;
}
- public async start(
- inProgress?: (progress: Progress) => void,
- pageOptions?: PageOptions,
- crawlerOptions?: CrawlerOptions,
- concurrencyLimit: number = 5,
- limit: number = 10000,
- maxDepth: number = 10
- ): Promise<{ url: string, html: string }[]> {
-
- Logger.debug(`Crawler starting with ${this.initialUrl}`);
- // Fetch and parse robots.txt
- try {
- const txt = await this.getRobotsTxt();
- this.importRobotsTxt(txt);
- Logger.debug(`Crawler robots.txt fetched with ${this.robotsTxtUrl}`);
- } catch (error) {
- Logger.debug(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`);
- }
-
- if (!crawlerOptions?.ignoreSitemap){
- const sm = await this.tryGetSitemap();
- if (sm !== null) {
- return sm;
- }
- }
-
- const urls = await this.crawlUrls(
- [this.initialUrl],
- pageOptions,
- concurrencyLimit,
- inProgress
- );
-
- if (
- urls.length === 0 &&
- this.filterLinks([this.initialUrl], limit, this.maxCrawledDepth).length > 0
- ) {
- return [{ url: this.initialUrl, html: "" }];
- }
-
- // make sure to run include exclude here again
- const filteredUrls = this.filterLinks(urls.map(urlObj => urlObj.url), limit, this.maxCrawledDepth);
- return filteredUrls.map(url => ({ url, html: urls.find(urlObj => urlObj.url === url)?.html || "" }));
- }
-
- private async crawlUrls(
- urls: string[],
- pageOptions: PageOptions,
- concurrencyLimit: number,
- inProgress?: (progress: Progress) => void,
- ): Promise<{ url: string, html: string }[]> {
- const queue = async.queue(async (task: string, callback) => {
- Logger.debug(`Crawling ${task}`);
- if (this.crawledUrls.size >= Math.min(this.maxCrawledLinks, this.limit)) {
- if (callback && typeof callback === "function") {
- callback();
- }
- return;
- }
- const newUrls = await this.crawl(task, pageOptions);
- // add the initial url if not already added
- // if (this.visited.size === 1) {
- // let normalizedInitial = this.initialUrl;
- // if (!normalizedInitial.endsWith("/")) {
- // normalizedInitial = normalizedInitial + "/";
- // }
- // if (!newUrls.some(page => page.url === this.initialUrl)) {
- // newUrls.push({ url: this.initialUrl, html: "" });
- // }
- // }
-
- newUrls.forEach((page) => this.crawledUrls.set(page.url, page.html));
-
- if (inProgress && newUrls.length > 0) {
- inProgress({
- current: this.crawledUrls.size,
- total: Math.min(this.maxCrawledLinks, this.limit),
- status: "SCRAPING",
- currentDocumentUrl: newUrls[newUrls.length - 1].url,
- });
- } else if (inProgress) {
- inProgress({
- current: this.crawledUrls.size,
- total: Math.min(this.maxCrawledLinks, this.limit),
- status: "SCRAPING",
- currentDocumentUrl: task,
- });
- }
- await this.crawlUrls(newUrls.map((p) => p.url), pageOptions, concurrencyLimit, inProgress);
- if (callback && typeof callback === "function") {
- callback();
- }
- }, concurrencyLimit);
-
- Logger.debug(`🐂 Pushing ${urls.length} URLs to the queue`);
- queue.push(
- urls.filter(
- (url) =>
- !this.visited.has(url) && this.robots.isAllowed(url, "FireCrawlAgent")
- ),
- (err) => {
- if (err) Logger.error(`🐂 Error pushing URLs to the queue: ${err}`);
- }
- );
- await queue.drain();
- Logger.debug(`🐂 Crawled ${this.crawledUrls.size} URLs, Queue drained.`);
- return Array.from(this.crawledUrls.entries()).map(([url, html]) => ({ url, html }));
- }
-
public filterURL(href: string, url: string): string | null {
let fullUrl = href;
if (!href.startsWith("http")) {
@@ -346,79 +234,9 @@ export class WebCrawler {
return links;
}
- async crawl(url: string, pageOptions: PageOptions): Promise<{url: string, html: string, pageStatusCode?: number, pageError?: string}[]> {
- if (this.visited.has(url) || !this.robots.isAllowed(url, "FireCrawlAgent")) {
- return [];
- }
- this.visited.add(url);
-
- if (!url.startsWith("http")) {
- url = "https://" + url;
- }
- if (url.endsWith("/")) {
- url = url.slice(0, -1);
- }
-
- if (this.isFile(url) || this.isSocialMediaOrEmail(url)) {
- return [];
- }
-
- try {
- let content: string = "";
- let pageStatusCode: number;
- let pageError: string | undefined = undefined;
-
- // If it is the first link, fetch with single url
- if (this.visited.size === 1) {
- const page = await scrapSingleUrl(this.jobId, url, { ...pageOptions, includeHtml: true });
- content = page.html ?? "";
- pageStatusCode = page.metadata?.pageStatusCode;
- pageError = page.metadata?.pageError || undefined;
- } else {
- const response = await axios.get(url, { timeout: axiosTimeout });
- content = response.data ?? "";
- pageStatusCode = response.status;
- pageError = response.statusText != "OK" ? response.statusText : undefined;
- }
-
- const $ = load(content);
- let links: { url: string, html: string, pageStatusCode?: number, pageError?: string }[] = [];
-
- // Add the initial URL to the list of links
- if (this.visited.size === 1) {
- links.push({ url, html: content, pageStatusCode, pageError });
- }
-
- links.push(...this.extractLinksFromHTML(content, url).map(url => ({ url, html: content, pageStatusCode, pageError })));
-
- if (this.visited.size === 1) {
- return links;
- }
-
- // Create a new list to return to avoid modifying the visited list
- return links.filter((link) => !this.visited.has(link.url));
- } catch (error) {
- return [];
- }
- }
-
private isRobotsAllowed(url: string): boolean {
return (this.robots ? (this.robots.isAllowed(url, "FireCrawlAgent") ?? true) : true)
}
- private normalizeCrawlUrl(url: string): string {
- try{
- const urlObj = new URL(url);
- urlObj.searchParams.sort(); // Sort query parameters to normalize
- return urlObj.toString();
- } catch (error) {
- return url;
- }
- }
-
- private matchesIncludes(url: string): boolean {
- if (this.includes.length === 0 || this.includes[0] == "") return true;
- return this.includes.some((pattern) => new RegExp(pattern).test(url));
- }
private matchesExcludes(url: string, onlyDomains: boolean = false): boolean {
return this.excludes.some((pattern) => {
@@ -503,7 +321,7 @@ export class WebCrawler {
const urlWithoutQuery = url.split('?')[0].toLowerCase();
return fileExtensions.some((ext) => urlWithoutQuery.endsWith(ext));
} catch (error) {
- Logger.error(`Error processing URL in isFile: ${error}`);
+ logger.error(`Error processing URL in isFile: ${error}`);
return false;
}
}
@@ -524,7 +342,6 @@ export class WebCrawler {
return socialMediaOrEmail.some((ext) => url.includes(ext));
}
- //
private async tryFetchSitemapLinks(url: string): Promise {
const normalizeUrl = (url: string) => {
url = url.replace(/^https?:\/\//, "").replace(/^www\./, "");
@@ -546,7 +363,7 @@ export class WebCrawler {
sitemapLinks = await getLinksFromSitemap({ sitemapUrl });
}
} catch (error) {
- Logger.debug(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`);
+ logger.debug(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`);
if (error instanceof AxiosError && error.response?.status === 404) {
// ignore 404
} else {
@@ -565,7 +382,7 @@ export class WebCrawler {
sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' });
}
} catch (error) {
- Logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`);
+ logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`);
if (error instanceof AxiosError && error.response?.status === 404) {
// ignore 404
} else {
diff --git a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
index e5841978..48aa2ffd 100644
--- a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
+++ b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
@@ -1,4 +1,4 @@
-import { Logger } from "../../../lib/logger";
+import { logger } from "../../../lib/logger";
export async function handleCustomScraping(
text: string,
@@ -6,7 +6,7 @@ export async function handleCustomScraping(
): Promise<{ scraper: string; url: string; waitAfterLoad?: number, pageOptions?: { scrollXPaths?: string[] } } | null> {
// Check for Readme Docs special case
if (text.includes(' {
- throw new Error("Method not implemented.");
- }
-
- private async convertUrlsToDocuments(
- urls: string[],
- inProgress?: (progress: Progress) => void,
- allHtmls?: string[]
- ): Promise {
- const totalUrls = urls.length;
- let processedUrls = 0;
-
- const results: (Document | null)[] = new Array(urls.length).fill(null);
- for (let i = 0; i < urls.length; i += this.concurrentRequests) {
- const batchUrls = urls.slice(i, i + this.concurrentRequests);
- await Promise.all(
- batchUrls.map(async (url, index) => {
- const existingHTML = allHtmls ? allHtmls[i + index] : "";
- const result = await scrapSingleUrl(
- this.jobId,
- url,
- this.pageOptions,
- this.extractorOptions,
- existingHTML,
- this.priority,
- this.teamId,
- );
- processedUrls++;
- if (inProgress) {
- inProgress({
- current: processedUrls,
- total: totalUrls,
- status: "SCRAPING",
- currentDocumentUrl: url,
- currentDocument: { ...result, index: processedUrls },
- });
- }
-
- results[i + index] = result;
- })
- );
- }
- return results.filter((result) => result !== null) as Document[];
- }
-
- async getDocuments(
- useCaching: boolean = false,
- inProgress?: (progress: Progress) => void
- ): Promise {
- this.validateInitialUrl();
- if (!useCaching) {
- return this.processDocumentsWithoutCache(inProgress);
- }
-
- return this.processDocumentsWithCache(inProgress);
- }
-
- private validateInitialUrl(): void {
- if (this.urls[0].trim() === "") {
- throw new Error("Url is required");
- }
- }
-
- /**
- * Process documents without cache handling each mode
- * @param inProgress inProgress
- * @returns documents
- */
- private async processDocumentsWithoutCache(
- inProgress?: (progress: Progress) => void
- ): Promise {
- switch (this.mode) {
- case "crawl":
- return this.handleCrawlMode(inProgress);
- case "single_urls":
- return this.handleSingleUrlsMode(inProgress);
- case "sitemap":
- return this.handleSitemapMode(inProgress);
- default:
- return [];
- }
- }
-
- private async cleanIrrelevantPath(links: string[]) {
- return links.filter((link) => {
- const normalizedInitialUrl = new URL(this.urls[0]);
- const normalizedLink = new URL(link);
-
- // Normalize the hostname to account for www and non-www versions
- const initialHostname = normalizedInitialUrl.hostname.replace(
- /^www\./,
- ""
- );
- const linkHostname = normalizedLink.hostname.replace(/^www\./, "");
-
- // Ensure the protocol and hostname match, and the path starts with the initial URL's path
- return (
- linkHostname === initialHostname &&
- normalizedLink.pathname.startsWith(normalizedInitialUrl.pathname)
- );
- });
- }
-
- private async handleCrawlMode(
- inProgress?: (progress: Progress) => void
- ): Promise {
- let includes: string[];
- if (Array.isArray(this.includes)) {
- if (this.includes[0] != "") {
- includes = this.includes;
- }
- } else {
- includes = this.includes.split(',');
- }
-
- let excludes: string[];
- if (Array.isArray(this.excludes)) {
- if (this.excludes[0] != "") {
- excludes = this.excludes;
- }
- } else {
- excludes = this.excludes.split(',');
- }
-
- const crawler = new WebCrawler({
- jobId: this.jobId,
- initialUrl: this.urls[0],
- includes,
- excludes,
- maxCrawledLinks: this.maxCrawledLinks,
- maxCrawledDepth: getAdjustedMaxDepth(this.urls[0], this.maxCrawledDepth),
- limit: this.limit,
- generateImgAltText: this.generateImgAltText,
- allowBackwardCrawling: this.allowBackwardCrawling,
- allowExternalContentLinks: this.allowExternalContentLinks,
- });
-
- let links = await crawler.start(
- inProgress,
- this.pageOptions,
- {
- ignoreSitemap: this.ignoreSitemap,
- },
- 5,
- this.limit,
- this.maxCrawledDepth
- );
-
- let allLinks = links.map((e) => e.url);
- const allHtmls = links.map((e) => e.html);
-
- if (this.returnOnlyUrls) {
- return this.returnOnlyUrlsResponse(allLinks, inProgress);
- }
-
- let documents = [];
- // check if fast mode is enabled and there is html inside the links
- if (this.crawlerMode === "fast" && links.some((link) => link.html)) {
- documents = await this.processLinks(allLinks, inProgress, allHtmls);
- } else {
- documents = await this.processLinks(allLinks, inProgress);
- }
-
- return this.cacheAndFinalizeDocuments(documents, allLinks);
- }
-
- private async handleSingleUrlsMode(
- inProgress?: (progress: Progress) => void
- ): Promise {
- const links = this.urls;
-
- let documents = await this.processLinks(links, inProgress);
- return documents;
- }
-
- private async handleSitemapMode(
- inProgress?: (progress: Progress) => void
- ): Promise {
- let links = await getLinksFromSitemap({ sitemapUrl: this.urls[0] });
- links = await this.cleanIrrelevantPath(links);
-
- if (this.returnOnlyUrls) {
- return this.returnOnlyUrlsResponse(links, inProgress);
- }
-
- let documents = await this.processLinks(links, inProgress);
- return this.cacheAndFinalizeDocuments(documents, links);
- }
-
- private async returnOnlyUrlsResponse(
- links: string[],
- inProgress?: (progress: Progress) => void
- ): Promise {
- inProgress?.({
- current: links.length,
- total: links.length,
- status: "COMPLETED",
- currentDocumentUrl: this.urls[0],
- });
- return links.map((url) => ({
- content: "",
- html: this.pageOptions?.includeHtml ? "" : undefined,
- markdown: "",
- metadata: { sourceURL: url, pageStatusCode: 200 },
- }));
- }
-
- private async processLinks(
- links: string[],
- inProgress?: (progress: Progress) => void,
- allHtmls?: string[]
- ): Promise {
- const pdfLinks = links.filter((link) => link.endsWith(".pdf"));
- const docLinks = links.filter(
- (link) => link.endsWith(".doc") || link.endsWith(".docx")
- );
-
- const [pdfDocuments, docxDocuments] = await Promise.all([
- this.fetchPdfDocuments(pdfLinks),
- this.fetchDocxDocuments(docLinks),
- ]);
-
- links = links.filter(
- (link) => !pdfLinks.includes(link) && !docLinks.includes(link)
- );
-
- let [documents, sitemapData] = await Promise.all([
- this.convertUrlsToDocuments(links, inProgress, allHtmls),
- this.mode === "single_urls" && links.length > 0
- ? this.getSitemapDataForSingleUrl(this.urls[0], links[0], 1500).catch(
- (error) => {
- Logger.debug(`Failed to fetch sitemap data: ${error}`);
- return null;
- }
- )
- : Promise.resolve(null),
- ]);
-
- if (this.mode === "single_urls" && documents.length > 0) {
- documents[0].metadata.sitemap = sitemapData ?? undefined;
- } else {
- documents = await this.getSitemapData(this.urls[0], documents);
- }
-
- if (this.pageOptions.includeMarkdown) {
- documents = this.applyPathReplacements(documents);
- }
-
- if (!this.pageOptions.includeHtml) {
- for (let document of documents) {
- delete document.html;
- }
- }
-
- // documents = await this.applyImgAltText(documents);
- if (this.mode === "single_urls" && this.pageOptions.includeExtract) {
- const extractionMode = this.extractorOptions?.mode ?? "markdown";
- const completionMode = extractionMode === "llm-extraction-from-raw-html" ? "raw-html" : "markdown";
-
- if (
- extractionMode === "llm-extraction" ||
- extractionMode === "llm-extraction-from-markdown" ||
- extractionMode === "llm-extraction-from-raw-html"
- ) {
- documents = await generateCompletions(
- documents,
- this.extractorOptions,
- completionMode
- );
- }
- }
- return documents.concat(pdfDocuments).concat(docxDocuments);
- }
-
- private async fetchPdfDocuments(pdfLinks: string[]): Promise {
- return Promise.all(
- pdfLinks.map(async (pdfLink) => {
- const timer = Date.now();
- const logInsertPromise = ScrapeEvents.insert(this.jobId, {
- type: "scrape",
- url: pdfLink,
- worker: process.env.FLY_MACHINE_ID,
- method: "pdf-scrape",
- result: null,
- });
-
- const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
- pdfLink,
- this.pageOptions.parsePDF
- );
-
- const insertedLogId = await logInsertPromise;
- ScrapeEvents.updateScrapeResult(insertedLogId, {
- response_size: content.length,
- success: !(pageStatusCode && pageStatusCode >= 400) && !!content && (content.trim().length >= 100),
- error: pageError,
- response_code: pageStatusCode,
- time_taken: Date.now() - timer,
- });
- return {
- content: content,
- markdown: content,
- metadata: { sourceURL: pdfLink, pageStatusCode, pageError },
- provider: "web-scraper",
- };
- })
- );
- }
- private async fetchDocxDocuments(docxLinks: string[]): Promise {
- return Promise.all(
- docxLinks.map(async (docxLink) => {
- const timer = Date.now();
- const logInsertPromise = ScrapeEvents.insert(this.jobId, {
- type: "scrape",
- url: docxLink,
- worker: process.env.FLY_MACHINE_ID,
- method: "docx-scrape",
- result: null,
- });
-
- const { content, pageStatusCode, pageError } = await fetchAndProcessDocx(
- docxLink
- );
-
- const insertedLogId = await logInsertPromise;
- ScrapeEvents.updateScrapeResult(insertedLogId, {
- response_size: content.length,
- success: !(pageStatusCode && pageStatusCode >= 400) && !!content && (content.trim().length >= 100),
- error: pageError,
- response_code: pageStatusCode,
- time_taken: Date.now() - timer,
- });
-
- return {
- content,
- metadata: { sourceURL: docxLink, pageStatusCode, pageError },
- provider: "web-scraper",
- };
- })
- );
- }
-
- private applyPathReplacements(documents: Document[]): Document[] {
- if (this.replaceAllPathsWithAbsolutePaths) {
- documents = replacePathsWithAbsolutePaths(documents);
- }
- return replaceImgPathsWithAbsolutePaths(documents);
- }
-
- private async applyImgAltText(documents: Document[]): Promise {
- return this.generateImgAltText
- ? this.generatesImgAltText(documents)
- : documents;
- }
-
- private async cacheAndFinalizeDocuments(
- documents: Document[],
- links: string[]
- ): Promise {
- // await this.setCachedDocuments(documents, links);
- documents = this.removeChildLinks(documents);
- return documents.splice(0, this.limit);
- }
-
- private async processDocumentsWithCache(
- inProgress?: (progress: Progress) => void
- ): Promise {
- let documents = await this.getCachedDocuments(
- this.urls.slice(0, this.limit)
- );
- if (documents.length < this.limit) {
- const newDocuments: Document[] = await this.getDocuments(
- false,
- inProgress
- );
- documents = this.mergeNewDocuments(documents, newDocuments);
- }
- documents = this.filterDocsExcludeInclude(documents);
- documents = this.filterDepth(documents);
- documents = this.removeChildLinks(documents);
- return documents.splice(0, this.limit);
- }
-
- private mergeNewDocuments(
- existingDocuments: Document[],
- newDocuments: Document[]
- ): Document[] {
- newDocuments.forEach((doc) => {
- if (
- !existingDocuments.some(
- (d) =>
- this.normalizeUrl(d.metadata.sourceURL) ===
- this.normalizeUrl(doc.metadata?.sourceURL)
- )
- ) {
- existingDocuments.push(doc);
- }
- });
- return existingDocuments;
- }
-
- private filterDocsExcludeInclude(documents: Document[]): Document[] {
- return documents.filter((document) => {
- const url = new URL(document.metadata.sourceURL);
- const path = url.pathname;
-
- if (!Array.isArray(this.excludes)) {
- this.excludes = this.excludes.split(',');
- }
-
- if (this.excludes.length > 0 && this.excludes[0] !== "") {
- // Check if the link should be excluded
- if (
- this.excludes.some((excludePattern) =>
- new RegExp(excludePattern).test(path)
- )
- ) {
- return false;
- }
- }
-
- if (!Array.isArray(this.includes)) {
- this.includes = this.includes.split(',');
- }
-
- if (this.includes.length > 0 && this.includes[0] !== "") {
- // Check if the link matches the include patterns, if any are specified
- if (this.includes.length > 0) {
- return this.includes.some((includePattern) =>
- new RegExp(includePattern).test(path)
- );
- }
- }
- return true;
- });
- }
-
- private normalizeUrl(url: string): string {
- if (url.includes("//www.")) {
- return url.replace("//www.", "//");
- }
- return url;
- }
-
- private removeChildLinks(documents: Document[]): Document[] {
- for (let document of documents) {
- if (document?.childrenLinks) delete document.childrenLinks;
- }
- return documents;
- }
-
- async setCachedDocuments(documents: Document[], childrenLinks?: string[]) {
- for (const document of documents) {
- if (document.content.trim().length === 0) {
- continue;
- }
- const normalizedUrl = this.normalizeUrl(document.metadata.sourceURL);
- await setValue(
- "web-scraper-cache:" + normalizedUrl,
- JSON.stringify({
- ...document,
- childrenLinks: childrenLinks || [],
- }),
- 60 * 60
- ); // 10 days
- }
- }
-
- async getCachedDocuments(urls: string[]): Promise {
- let documents: Document[] = [];
- for (const url of urls) {
- const normalizedUrl = this.normalizeUrl(url);
- Logger.debug(
- "Getting cached document for web-scraper-cache:" + normalizedUrl
- );
- const cachedDocumentString = await getValue(
- "web-scraper-cache:" + normalizedUrl
- );
- if (cachedDocumentString) {
- const cachedDocument = JSON.parse(cachedDocumentString);
- documents.push(cachedDocument);
-
- // get children documents
- for (const childUrl of cachedDocument.childrenLinks || []) {
- const normalizedChildUrl = this.normalizeUrl(childUrl);
- const childCachedDocumentString = await getValue(
- "web-scraper-cache:" + normalizedChildUrl
- );
- if (childCachedDocumentString) {
- const childCachedDocument = JSON.parse(childCachedDocumentString);
- if (
- !documents.find(
- (doc) =>
- doc.metadata.sourceURL ===
- childCachedDocument.metadata.sourceURL
- )
- ) {
- documents.push(childCachedDocument);
- }
- }
- }
- }
- }
- return documents;
- }
-
- setOptions(options: WebScraperOptions): void {
- if (!options.urls) {
- throw new Error("Urls are required");
- }
-
- this.jobId = options.jobId;
- this.bullJobId = options.bullJobId;
- this.urls = options.urls;
- this.mode = options.mode;
- this.concurrentRequests = options.concurrentRequests ?? 20;
- this.includes = options.crawlerOptions?.includes ?? [];
- this.excludes = options.crawlerOptions?.excludes ?? [];
- this.maxCrawledLinks = options.crawlerOptions?.maxCrawledLinks ?? 1000;
- this.maxCrawledDepth = options.crawlerOptions?.maxDepth ?? 10;
- this.returnOnlyUrls = options.crawlerOptions?.returnOnlyUrls ?? false;
- this.limit = options.crawlerOptions?.limit ?? 10000;
- this.generateImgAltText =
- options.crawlerOptions?.generateImgAltText ?? false;
- this.pageOptions = {
- onlyMainContent: options.pageOptions?.onlyMainContent ?? false,
- includeHtml: options.pageOptions?.includeHtml ?? false,
- replaceAllPathsWithAbsolutePaths: options.pageOptions?.replaceAllPathsWithAbsolutePaths ?? true,
- parsePDF: options.pageOptions?.parsePDF ?? true,
- onlyIncludeTags: options.pageOptions?.onlyIncludeTags ?? [],
- removeTags: options.pageOptions?.removeTags ?? [],
- includeMarkdown: options.pageOptions?.includeMarkdown ?? true,
- includeRawHtml: options.pageOptions?.includeRawHtml ?? false,
- includeExtract: options.pageOptions?.includeExtract ?? (options.extractorOptions?.mode && options.extractorOptions?.mode !== "markdown") ?? false,
- waitFor: options.pageOptions?.waitFor ?? undefined,
- headers: options.pageOptions?.headers ?? undefined,
- includeLinks: options.pageOptions?.includeLinks ?? true,
- fullPageScreenshot: options.pageOptions?.fullPageScreenshot ?? false,
- screenshot: options.pageOptions?.screenshot ?? false,
- useFastMode: options.pageOptions?.useFastMode ?? false,
- disableJsDom: options.pageOptions?.disableJsDom ?? false,
- atsv: options.pageOptions?.atsv ?? false,
- actions: options.pageOptions?.actions ?? undefined,
- geolocation: options.pageOptions?.geolocation ?? undefined,
- skipTlsVerification: options.pageOptions?.skipTlsVerification ?? false,
- removeBase64Images: options.pageOptions?.removeBase64Images ?? true,
- mobile: options.pageOptions?.mobile ?? false,
- };
- this.extractorOptions = options.extractorOptions ?? { mode: "markdown" };
- this.replaceAllPathsWithAbsolutePaths =
- options.crawlerOptions?.replaceAllPathsWithAbsolutePaths ??
- options.pageOptions?.replaceAllPathsWithAbsolutePaths ??
- false;
-
- if (typeof options.crawlerOptions?.excludes === 'string') {
- this.excludes = options.crawlerOptions?.excludes.split(',').filter((item) => item.trim() !== "");
- }
-
- if (typeof options.crawlerOptions?.includes === 'string') {
- this.includes = options.crawlerOptions?.includes.split(',').filter((item) => item.trim() !== "");
- }
-
- this.crawlerMode = options.crawlerOptions?.mode ?? "default";
- this.ignoreSitemap = options.crawlerOptions?.ignoreSitemap ?? false;
- this.allowBackwardCrawling =
- options.crawlerOptions?.allowBackwardCrawling ?? false;
- this.allowExternalContentLinks =
- options.crawlerOptions?.allowExternalContentLinks ?? false;
- this.priority = options.priority;
- this.teamId = options.teamId ?? null;
-
-
-
- // make sure all urls start with https://
- this.urls = this.urls.map((url) => {
- if (!url.trim().startsWith("http")) {
- return `https://${url}`;
- }
- return url;
- });
- }
-
- private async getSitemapData(baseUrl: string, documents: Document[]) {
- const sitemapData = await fetchSitemapData(baseUrl);
- if (sitemapData) {
- for (let i = 0; i < documents.length; i++) {
- const docInSitemapData = sitemapData.find(
- (data) =>
- this.normalizeUrl(data.loc) ===
- this.normalizeUrl(documents[i].metadata.sourceURL)
- );
- if (docInSitemapData) {
- let sitemapDocData: Partial = {};
- if (docInSitemapData.changefreq) {
- sitemapDocData.changefreq = docInSitemapData.changefreq;
- }
- if (docInSitemapData.priority) {
- sitemapDocData.priority = Number(docInSitemapData.priority);
- }
- if (docInSitemapData.lastmod) {
- sitemapDocData.lastmod = docInSitemapData.lastmod;
- }
- if (Object.keys(sitemapDocData).length !== 0) {
- documents[i].metadata.sitemap = sitemapDocData;
- }
- }
- }
- }
- return documents;
- }
- private async getSitemapDataForSingleUrl(
- baseUrl: string,
- url: string,
- timeout?: number
- ) {
- const sitemapData = await fetchSitemapData(baseUrl, timeout);
- if (sitemapData) {
- const docInSitemapData = sitemapData.find(
- (data) => this.normalizeUrl(data.loc) === this.normalizeUrl(url)
- );
- if (docInSitemapData) {
- let sitemapDocData: Partial = {};
- if (docInSitemapData.changefreq) {
- sitemapDocData.changefreq = docInSitemapData.changefreq;
- }
- if (docInSitemapData.priority) {
- sitemapDocData.priority = Number(docInSitemapData.priority);
- }
- if (docInSitemapData.lastmod) {
- sitemapDocData.lastmod = docInSitemapData.lastmod;
- }
- if (Object.keys(sitemapDocData).length !== 0) {
- return sitemapDocData;
- }
- }
- }
- return null;
- }
- generatesImgAltText = async (documents: Document[]): Promise => {
- await Promise.all(
- documents.map(async (document) => {
- const images = document.content.match(/!\[.*?\]\((.*?)\)/g) || [];
-
- await Promise.all(
- images.map(async (image: string) => {
- let imageUrl = image.match(/\(([^)]+)\)/)[1];
- let altText = image.match(/\[(.*?)\]/)[1];
-
- if (
- !altText &&
- !imageUrl.startsWith("data:image") &&
- /\.(png|jpeg|gif|webp)$/.test(imageUrl)
- ) {
- const imageIndex = document.content.indexOf(image);
- const contentLength = document.content.length;
- let backText = document.content.substring(
- imageIndex + image.length,
- Math.min(imageIndex + image.length + 1000, contentLength)
- );
- let frontTextStartIndex = Math.max(imageIndex - 1000, 0);
- let frontText = document.content.substring(
- frontTextStartIndex,
- imageIndex
- );
- altText = await getImageDescription(
- imageUrl,
- backText,
- frontText,
- this.generateImgAltTextModel
- );
- }
-
- document.content = document.content.replace(
- image,
- `![${altText}](${imageUrl})`
- );
- })
- );
- })
- );
-
- return documents;
- };
-
- filterDepth(documents: Document[]): Document[] {
- return documents.filter((document) => {
- const url = new URL(document.metadata.sourceURL);
- return getURLDepth(url.toString()) <= this.maxCrawledDepth;
- });
- }
-}
diff --git a/apps/api/src/scraper/WebScraper/scrapers/fetch.ts b/apps/api/src/scraper/WebScraper/scrapers/fetch.ts
deleted file mode 100644
index 0df3be72..00000000
--- a/apps/api/src/scraper/WebScraper/scrapers/fetch.ts
+++ /dev/null
@@ -1,89 +0,0 @@
-import axios from "axios";
-import { logScrape } from "../../../services/logging/scrape_log";
-import { fetchAndProcessPdf } from "../utils/pdfProcessor";
-import { universalTimeout } from "../global";
-import { Logger } from "../../../lib/logger";
-
-/**
- * Scrapes a URL with Axios
- * @param url The URL to scrape
- * @param pageOptions The options for the page
- * @returns The scraped content
- */
-export async function scrapWithFetch(
- url: string,
- pageOptions: { parsePDF?: boolean } = { parsePDF: true }
-): Promise<{ content: string; pageStatusCode?: number; pageError?: string }> {
- const logParams = {
- url,
- scraper: "fetch",
- success: false,
- response_code: null,
- time_taken_seconds: null,
- error_message: null,
- html: "",
- startTime: Date.now(),
- };
-
- try {
- const response = await axios.get(url, {
- headers: {
- "Content-Type": "application/json",
- },
- timeout: universalTimeout,
- transformResponse: [(data) => data], // Prevent axios from parsing JSON automatically
- });
-
- if (response.status !== 200) {
- Logger.debug(
- `⛏️ Axios: Failed to fetch url: ${url} with status: ${response.status}`
- );
- logParams.error_message = response.statusText;
- logParams.response_code = response.status;
- return {
- content: "",
- pageStatusCode: response.status,
- pageError: response.statusText,
- };
- }
-
- const contentType = response.headers["content-type"];
- if (contentType && contentType.includes("application/pdf")) {
- logParams.success = true;
- const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
- url,
- pageOptions?.parsePDF
- );
- logParams.response_code = pageStatusCode;
- logParams.error_message = pageError;
- return { content, pageStatusCode: response.status, pageError };
- } else {
- const text = response.data;
- logParams.success = true;
- logParams.html = text;
- logParams.response_code = response.status;
- return {
- content: text,
- pageStatusCode: response.status,
- pageError: null,
- };
- }
- } catch (error) {
- if (error.code === "ECONNABORTED") {
- logParams.error_message = "Request timed out";
- Logger.debug(`⛏️ Axios: Request timed out for ${url}`);
- } else {
- logParams.error_message = error.message || error;
- Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} | Error: ${error}`);
- }
- return {
- content: "",
- pageStatusCode: error.response?.status ?? null,
- pageError: logParams.error_message,
- };
- } finally {
- const endTime = Date.now();
- logParams.time_taken_seconds = (endTime - logParams.startTime) / 1000;
- await logScrape(logParams);
- }
-}
diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
deleted file mode 100644
index 7616774a..00000000
--- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
+++ /dev/null
@@ -1,230 +0,0 @@
-import axios from "axios";
-import { Action, FireEngineOptions, FireEngineResponse } from "../../../lib/entities";
-import { logScrape } from "../../../services/logging/scrape_log";
-import { generateRequestParams } from "../single_url";
-import { fetchAndProcessPdf } from "../utils/pdfProcessor";
-import { universalTimeout } from "../global";
-import { Logger } from "../../../lib/logger";
-import * as Sentry from "@sentry/node";
-import axiosRetry from 'axios-retry';
-
-axiosRetry(axios, { retries: 3 , onRetry:()=>{
- console.log("Retrying (fire-engine)...");
-}, retryDelay: axiosRetry.exponentialDelay});
-/**
- * Scrapes a URL with Fire-Engine
- * @param url The URL to scrape
- * @param waitFor The time to wait for the page to load
- * @param screenshot Whether to take a screenshot
- * @param fullPageScreenshot Whether to take a full page screenshot
- * @param pageOptions The options for the page
- * @param headers The headers to send with the request
- * @param options The options for the request
- * @returns The scraped content
- */
-export async function scrapWithFireEngine({
- url,
- actions,
- waitFor = 0,
- screenshot = false,
- fullPageScreenshot = false,
- pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false, geolocation: { country: "US" }, skipTlsVerification: false, removeBase64Images: true, mobile: false },
- fireEngineOptions = {},
- headers,
- options,
- priority,
- teamId,
-}: {
- url: string;
- actions?: Action[];
- waitFor?: number;
- screenshot?: boolean;
- fullPageScreenshot?: boolean;
- pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean, geolocation?: { country?: string }, skipTlsVerification?: boolean, removeBase64Images?: boolean, mobile?: boolean };
- fireEngineOptions?: FireEngineOptions;
- headers?: Record;
- options?: any;
- priority?: number;
- teamId?: string;
-}): Promise {
- const logParams = {
- url,
- scraper: "fire-engine",
- success: false,
- response_code: null,
- time_taken_seconds: null,
- error_message: null,
- html: "",
- startTime: Date.now(),
- };
-
- try {
- const reqParams = await generateRequestParams(url);
- let waitParam = reqParams["params"]?.wait ?? waitFor;
- let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
- let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
- let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
- let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
-
-
- let endpoint = "/scrape";
-
- if(options?.endpoint === "request") {
- endpoint = "/request";
- }
-
- let engine = engineParam; // do we want fireEngineOptions as first choice?
-
- if (pageOptions?.useFastMode) {
- fireEngineOptionsParam.engine = "tlsclient";
- engine = "tlsclient";
- }
-
- Logger.info(
- `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { actions: ${JSON.stringify((actions ?? []).map(x => x.type))}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
- );
-
- // atsv is only available for beta customers
- const betaCustomersString = process.env.BETA_CUSTOMERS;
- const betaCustomers = betaCustomersString ? betaCustomersString.split(",") : [];
-
- if (pageOptions?.atsv && betaCustomers.includes(teamId)) {
- fireEngineOptionsParam.atsv = true;
- } else {
- pageOptions.atsv = false;
- }
-
- const axiosInstance = axios.create({
- headers: { "Content-Type": "application/json" }
- });
-
- const startTime = Date.now();
- const _response = await Sentry.startSpan({
- name: "Call to fire-engine"
- }, async span => {
-
- return await axiosInstance.post(
- process.env.FIRE_ENGINE_BETA_URL + endpoint,
- {
- url: url,
- headers: headers,
- wait: waitParam,
- screenshot: screenshotParam,
- fullPageScreenshot: fullPageScreenshotParam,
- disableJsDom: pageOptions?.disableJsDom ?? false,
- priority,
- engine,
- instantReturn: true,
- mobile: pageOptions?.mobile ?? false,
- ...fireEngineOptionsParam,
- atsv: pageOptions?.atsv ?? false,
- scrollXPaths: pageOptions?.scrollXPaths ?? [],
- geolocation: pageOptions?.geolocation,
- skipTlsVerification: pageOptions?.skipTlsVerification ?? false,
- removeBase64Images: pageOptions?.removeBase64Images ?? true,
- actions: actions,
- },
- {
- headers: {
- "Content-Type": "application/json",
- ...(Sentry.isInitialized() ? ({
- "sentry-trace": Sentry.spanToTraceHeader(span),
- "baggage": Sentry.spanToBaggageHeader(span),
- }) : {}),
- }
- }
- );
- });
-
- const waitTotal = (actions ?? []).filter(x => x.type === "wait").reduce((a, x) => (x as { type: "wait"; milliseconds: number; }).milliseconds + a, 0);
-
- let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
-
- // added 5 seconds to the timeout to account for 'smart wait'
- while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitTotal + 5000) {
- await new Promise(resolve => setTimeout(resolve, 250)); // wait 0.25 seconds
- checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
- }
-
- if (checkStatusResponse.data.processing) {
- Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`);
- axiosInstance.delete(
- process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, {
- validateStatus: (status) => true
- }
- ).catch((error) => {
- Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`);
- });
-
- Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`);
- logParams.error_message = "Request timed out";
- return { html: "", pageStatusCode: null, pageError: "" };
- }
-
- if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) {
- Logger.debug(
- `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}\t ${checkStatusResponse.data.error}`
- );
-
- logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error;
- logParams.response_code = checkStatusResponse.data?.pageStatusCode;
-
- if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) {
- Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.data?.pageStatusCode}`);
- }
-
- const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined;
-
- return {
- html: "",
- pageStatusCode,
- pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error,
- };
- }
-
- const contentType = checkStatusResponse.data.responseHeaders?.["content-type"];
-
- if (contentType && contentType.includes("application/pdf")) {
- const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
- url,
- pageOptions?.parsePDF
- );
- logParams.success = true;
- logParams.response_code = pageStatusCode;
- logParams.error_message = pageError;
- return { html: content, pageStatusCode, pageError };
- } else {
- const data = checkStatusResponse.data;
-
- logParams.success =
- (data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
- data.pageStatusCode === 404;
- logParams.html = data.content ?? "";
- logParams.response_code = data.pageStatusCode;
- logParams.error_message = data.pageError ?? data.error;
-
- return {
- html: data.content ?? "",
- screenshots: data.screenshots ?? [data.screenshot] ?? [],
- pageStatusCode: data.pageStatusCode,
- pageError: data.pageError ?? data.error,
- scrapeActionContent: data?.actionContent ?? [],
- };
- }
- } catch (error) {
- if (error.code === "ECONNABORTED") {
- Logger.debug(`⛏️ Fire-Engine (catch block): Request timed out for ${url}`);
- logParams.error_message = "Request timed out";
- } else {
- Logger.debug(`⛏️ Fire-Engine(catch block): Failed to fetch url: ${url} | Error: ${error}`);
- logParams.error_message = error.message || error;
- }
- return { html: "", pageStatusCode: null, pageError: logParams.error_message };
- } finally {
- const endTime = Date.now();
- logParams.time_taken_seconds = (endTime - logParams.startTime) / 1000;
- await logScrape(logParams, pageOptions);
- }
-}
-
-
diff --git a/apps/api/src/scraper/WebScraper/scrapers/playwright.ts b/apps/api/src/scraper/WebScraper/scrapers/playwright.ts
deleted file mode 100644
index 09c7353b..00000000
--- a/apps/api/src/scraper/WebScraper/scrapers/playwright.ts
+++ /dev/null
@@ -1,111 +0,0 @@
-import axios from "axios";
-import { logScrape } from "../../../services/logging/scrape_log";
-import { generateRequestParams } from "../single_url";
-import { fetchAndProcessPdf } from "../utils/pdfProcessor";
-import { universalTimeout } from "../global";
-import { Logger } from "../../../lib/logger";
-
-/**
- * Scrapes a URL with Playwright
- * @param url The URL to scrape
- * @param waitFor The time to wait for the page to load
- * @param headers The headers to send with the request
- * @param pageOptions The options for the page
- * @returns The scraped content
- */
-export async function scrapWithPlaywright(
- url: string,
- waitFor: number = 0,
- headers?: Record,
- pageOptions: { parsePDF?: boolean } = { parsePDF: true }
-): Promise<{ content: string; pageStatusCode?: number; pageError?: string }> {
- const logParams = {
- url,
- scraper: "playwright",
- success: false,
- response_code: null,
- time_taken_seconds: null,
- error_message: null,
- html: "",
- startTime: Date.now(),
- };
-
- try {
- const reqParams = await generateRequestParams(url);
- // If the user has passed a wait parameter in the request, use that
- const waitParam = reqParams["params"]?.wait ?? waitFor;
-
- const response = await axios.post(
- process.env.PLAYWRIGHT_MICROSERVICE_URL,
- {
- url: url,
- wait_after_load: waitParam,
- timeout: universalTimeout + waitParam,
- headers: headers,
- },
- {
- headers: {
- "Content-Type": "application/json",
- },
- timeout: universalTimeout + waitParam, // Add waitParam to timeout to account for the wait time
- transformResponse: [(data) => data], // Prevent axios from parsing JSON automatically
- }
- );
-
- if (response.status !== 200) {
- Logger.debug(
- `⛏️ Playwright: Failed to fetch url: ${url} | status: ${response.status}, error: ${response.data?.pageError}`
- );
- logParams.error_message = response.data?.pageError;
- logParams.response_code = response.data?.pageStatusCode;
- return {
- content: "",
- pageStatusCode: response.data?.pageStatusCode,
- pageError: response.data?.pageError,
- };
- }
-
- const contentType = response.headers["content-type"];
- if (contentType && contentType.includes("application/pdf")) {
- logParams.success = true;
- const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(url, pageOptions?.parsePDF);
- logParams.response_code = pageStatusCode;
- logParams.error_message = pageError;
- return { content, pageStatusCode, pageError };
- } else {
- const textData = response.data;
- try {
- const data = JSON.parse(textData);
- const html = data.content;
- logParams.success = true;
- logParams.html = html;
- logParams.response_code = data.pageStatusCode;
- logParams.error_message = data.pageError;
- return {
- content: html ?? "",
- pageStatusCode: data.pageStatusCode,
- pageError: data.pageError,
- };
- } catch (jsonError) {
- logParams.error_message = jsonError.message || jsonError;
- Logger.debug(
- `⛏️ Playwright: Error parsing JSON response for url: ${url} | Error: ${jsonError}`
- );
- return { content: "", pageStatusCode: null, pageError: logParams.error_message };
- }
- }
- } catch (error) {
- if (error.code === "ECONNABORTED") {
- logParams.error_message = "Request timed out";
- Logger.debug(`⛏️ Playwright: Request timed out for ${url}`);
- } else {
- logParams.error_message = error.message || error;
- Logger.debug(`⛏️ Playwright: Failed to fetch url: ${url} | Error: ${error}`);
- }
- return { content: "", pageStatusCode: null, pageError: logParams.error_message };
- } finally {
- const endTime = Date.now();
- logParams.time_taken_seconds = (endTime - logParams.startTime) / 1000;
- await logScrape(logParams);
- }
-}
diff --git a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts b/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts
deleted file mode 100644
index b72fa8b2..00000000
--- a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts
+++ /dev/null
@@ -1,92 +0,0 @@
-import { logScrape } from "../../../services/logging/scrape_log";
-import { generateRequestParams } from "../single_url";
-import { fetchAndProcessPdf } from "../utils/pdfProcessor";
-import { universalTimeout } from "../global";
-import { ScrapingBeeClient } from "scrapingbee";
-import { Logger } from "../../../lib/logger";
-
-/**
- * Scrapes a URL with ScrapingBee
- * @param url The URL to scrape
- * @param wait_browser The browser event to wait for
- * @param timeout The timeout for the scrape
- * @param pageOptions The options for the page
- * @returns The scraped content
- */
-export async function scrapWithScrapingBee(
- url: string,
- wait_browser: string = "domcontentloaded",
- timeout: number = universalTimeout,
- pageOptions: { parsePDF?: boolean } = { parsePDF: true }
- ): Promise<{ content: string; pageStatusCode?: number; pageError?: string }> {
- const logParams = {
- url,
- scraper: wait_browser === "networkidle2" ? "scrapingBeeLoad" : "scrapingBee",
- success: false,
- response_code: null,
- time_taken_seconds: null,
- error_message: null,
- html: "",
- startTime: Date.now(),
- };
- try {
- const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY);
- const clientParams = await generateRequestParams(
- url,
- wait_browser,
- timeout
- );
- const response = await client.get({
- ...clientParams,
- params: {
- ...clientParams.params,
- transparent_status_code: "True",
- },
- });
- Logger.info(
- `⛏️ ScrapingBee: Scraping ${url}`
- );
- const contentType = response.headers["content-type"];
- if (contentType && contentType.includes("application/pdf")) {
- logParams.success = true;
- const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(url, pageOptions?.parsePDF);
- logParams.response_code = pageStatusCode;
- logParams.error_message = pageError;
- return { content, pageStatusCode, pageError };
- } else {
- let text = "";
- try {
- const decoder = new TextDecoder();
- text = decoder.decode(response.data);
- logParams.success = true;
- } catch (decodeError) {
- Logger.debug(
- `⛏️ ScrapingBee: Error decoding response data for url: ${url} | Error: ${decodeError}`
- );
- logParams.error_message = decodeError.message || decodeError;
- }
- logParams.response_code = response.status;
- logParams.html = text;
- logParams.success = response.status >= 200 && response.status < 300 || response.status === 404;
- logParams.error_message = response.statusText !== "OK" ? response.statusText : undefined;
- return {
- content: text,
- pageStatusCode: response.status,
- pageError: response.statusText !== "OK" ? response.statusText : undefined,
- };
- }
- } catch (error) {
- Logger.debug(`⛏️ ScrapingBee: Error fetching url: ${url} | Error: ${error}`);
- logParams.error_message = error.message || error;
- logParams.response_code = error.response?.status;
- return {
- content: "",
- pageStatusCode: error.response?.status,
- pageError: error.response?.statusText,
- };
- } finally {
- const endTime = Date.now();
- logParams.time_taken_seconds = (endTime - logParams.startTime) / 1000;
- await logScrape(logParams);
- }
- }
\ No newline at end of file
diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts
deleted file mode 100644
index 38ec74f0..00000000
--- a/apps/api/src/scraper/WebScraper/single_url.ts
+++ /dev/null
@@ -1,506 +0,0 @@
-import * as cheerio from "cheerio";
-import { extractMetadata } from "./utils/metadata";
-import dotenv from "dotenv";
-import {
- Document,
- PageOptions,
- FireEngineResponse,
- ExtractorOptions,
- Action,
-} from "../../lib/entities";
-import { parseMarkdown } from "../../lib/html-to-markdown";
-import { urlSpecificParams } from "./utils/custom/website_params";
-import { fetchAndProcessPdf } from "./utils/pdfProcessor";
-import { handleCustomScraping } from "./custom/handleCustomScraping";
-import { removeUnwantedElements } from "./utils/removeUnwantedElements";
-import { scrapWithFetch } from "./scrapers/fetch";
-import { scrapWithFireEngine } from "./scrapers/fireEngine";
-import { scrapWithPlaywright } from "./scrapers/playwright";
-import { scrapWithScrapingBee } from "./scrapers/scrapingBee";
-import { extractLinks } from "./utils/utils";
-import { Logger } from "../../lib/logger";
-import { ScrapeEvents } from "../../lib/scrape-events";
-import { clientSideError } from "../../strings";
-import { ScrapeActionContent } from "../../lib/entities";
-import { removeBase64Images } from "./utils/removeBase64Images";
-
-dotenv.config();
-
-const useScrapingBee = process.env.SCRAPING_BEE_API_KEY !== '' && process.env.SCRAPING_BEE_API_KEY !== undefined;
-const useFireEngine = process.env.FIRE_ENGINE_BETA_URL !== '' && process.env.FIRE_ENGINE_BETA_URL !== undefined;
-
-export const baseScrapers = [
- useFireEngine ? "fire-engine;chrome-cdp" : undefined,
- useFireEngine ? "fire-engine" : undefined,
- useScrapingBee ? "scrapingBee" : undefined,
- useFireEngine ? undefined : "playwright",
- useScrapingBee ? "scrapingBeeLoad" : undefined,
- "fetch",
-].filter(Boolean);
-
-export async function generateRequestParams(
- url: string,
- wait_browser: string = "domcontentloaded",
- timeout: number = 15000
-): Promise {
- const defaultParams = {
- url: url,
- params: { timeout: timeout, wait_browser: wait_browser },
- headers: { "ScrapingService-Request": "TRUE" },
- };
-
- try {
- const urlKey = new URL(url).hostname.replace(/^www\./, "");
- if (urlSpecificParams.hasOwnProperty(urlKey)) {
- return { ...defaultParams, ...urlSpecificParams[urlKey] };
- } else {
- return defaultParams;
- }
- } catch (error) {
- Logger.error(`Error generating URL key: ${error}`);
- return defaultParams;
- }
-}
-
-/**
- * Get the order of scrapers to be used for scraping a URL
- * If the user doesn't have envs set for a specific scraper, it will be removed from the order.
- * @param defaultScraper The default scraper to use if the URL does not have a specific scraper order defined
- * @returns The order of scrapers to be used for scraping a URL
- */
-function getScrapingFallbackOrder(
- defaultScraper?: string,
- isWaitPresent: boolean = false,
- isScreenshotPresent: boolean = false,
- isHeadersPresent: boolean = false,
- isActionsPresent: boolean = false,
-) {
- if (isActionsPresent) {
- return useFireEngine ? ["fire-engine;chrome-cdp"] : [];
- }
-
- const availableScrapers = baseScrapers.filter((scraper) => {
- switch (scraper) {
- case "scrapingBee":
- case "scrapingBeeLoad":
- return !!process.env.SCRAPING_BEE_API_KEY;
- case "fire-engine":
- return !!process.env.FIRE_ENGINE_BETA_URL;
- case "fire-engine;chrome-cdp":
- return !!process.env.FIRE_ENGINE_BETA_URL;
- case "playwright":
- return !!process.env.PLAYWRIGHT_MICROSERVICE_URL;
- default:
- return true;
- }
- });
-
- let defaultOrder = [
- useFireEngine ? "fire-engine;chrome-cdp" : undefined,
- useFireEngine ? "fire-engine" : undefined,
- useScrapingBee ? "scrapingBee" : undefined,
- useScrapingBee ? "scrapingBeeLoad" : undefined,
- useFireEngine ? undefined : "playwright",
- "fetch",
- ].filter(Boolean);
-
- // if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
- // defaultOrder = [
- // "fire-engine",
- // useFireEngine ? undefined : "playwright",
- // ...defaultOrder.filter(
- // (scraper) => scraper !== "fire-engine" && scraper !== "playwright"
- // ),
- // ].filter(Boolean);
- // }
-
- const filteredDefaultOrder = defaultOrder.filter(
- (scraper: (typeof baseScrapers)[number]) =>
- availableScrapers.includes(scraper)
- );
- const uniqueScrapers = new Set(
- defaultScraper
- ? [defaultScraper, ...filteredDefaultOrder, ...availableScrapers]
- : [...filteredDefaultOrder, ...availableScrapers]
- );
-
- const scrapersInOrder = Array.from(uniqueScrapers);
- return scrapersInOrder as (typeof baseScrapers)[number][];
-}
-
-
-
-export async function scrapSingleUrl(
- jobId: string,
- urlToScrap: string,
- pageOptions: PageOptions,
- extractorOptions?: ExtractorOptions,
- existingHtml?: string,
- priority?: number,
- teamId?: string
-): Promise {
- pageOptions = {
- includeMarkdown: pageOptions.includeMarkdown ?? true,
- includeExtract: pageOptions.includeExtract ?? false,
- onlyMainContent: pageOptions.onlyMainContent ?? false,
- includeHtml: pageOptions.includeHtml ?? false,
- includeRawHtml: pageOptions.includeRawHtml ?? false,
- waitFor: pageOptions.waitFor ?? undefined,
- screenshot: pageOptions.screenshot ?? false,
- fullPageScreenshot: pageOptions.fullPageScreenshot ?? false,
- headers: pageOptions.headers ?? undefined,
- includeLinks: pageOptions.includeLinks ?? true,
- replaceAllPathsWithAbsolutePaths: pageOptions.replaceAllPathsWithAbsolutePaths ?? true,
- parsePDF: pageOptions.parsePDF ?? true,
- removeTags: pageOptions.removeTags ?? [],
- onlyIncludeTags: pageOptions.onlyIncludeTags ?? [],
- useFastMode: pageOptions.useFastMode ?? false,
- disableJsDom: pageOptions.disableJsDom ?? false,
- atsv: pageOptions.atsv ?? false,
- actions: pageOptions.actions ?? undefined,
- geolocation: pageOptions.geolocation ?? undefined,
- skipTlsVerification: pageOptions.skipTlsVerification ?? false,
- removeBase64Images: pageOptions.removeBase64Images ?? true,
- mobile: pageOptions.mobile ?? false,
- }
-
- if (extractorOptions) {
- extractorOptions = {
- mode: extractorOptions?.mode ?? "llm-extraction-from-markdown",
- }
- }
-
- if (!existingHtml) {
- existingHtml = "";
- }
-
- urlToScrap = urlToScrap.trim();
-
- const attemptScraping = async (
- url: string,
- method: (typeof baseScrapers)[number]
- ) => {
- let scraperResponse: {
- text: string;
- screenshot: string;
- actions?: {
- screenshots?: string[];
- scrapes?: ScrapeActionContent[];
- };
- metadata: { pageStatusCode?: number; pageError?: string | null };
- } = { text: "", screenshot: "", metadata: {} };
- let screenshot = "";
-
- const timer = Date.now();
- const logInsertPromise = ScrapeEvents.insert(jobId, {
- type: "scrape",
- url,
- worker: process.env.FLY_MACHINE_ID,
- method,
- result: null,
- });
-
- switch (method) {
- case "fire-engine":
- case "fire-engine;chrome-cdp":
-
- let engine: "playwright" | "chrome-cdp" | "tlsclient" = "playwright";
- if (method === "fire-engine;chrome-cdp") {
- engine = "chrome-cdp";
- }
-
- if (process.env.FIRE_ENGINE_BETA_URL) {
- const processedActions: Action[] = pageOptions.actions?.flatMap((action: Action, index: number, array: Action[]) => {
- if (action.type === "click" || action.type === "write" || action.type === "press") {
- const result: Action[] = [];
- // Don't add a wait if the previous action is a wait
- // if (index === 0 || array[index - 1].type !== "wait") {
- // result.push({ type: "wait", milliseconds: 1200 } as Action);
- // }
- // Fire-engine now handles wait times automatically, leaving the code here for now
- result.push(action);
- // Don't add a wait if the next action is a wait
- // if (index === array.length - 1 || array[index + 1].type !== "wait") {
- // result.push({ type: "wait", milliseconds: 1200 } as Action);
- // }
- return result;
- }
- return [action as Action];
- }) ?? [] as Action[];
-
- const response = await scrapWithFireEngine({
- url,
- ...(engine === "chrome-cdp" ? ({
- actions: [
- ...(pageOptions.waitFor ? [{
- type: "wait" as const,
- milliseconds: pageOptions.waitFor,
- }] : []),
- ...((pageOptions.screenshot || pageOptions.fullPageScreenshot) ? [{
- type: "screenshot" as const,
- fullPage: !!pageOptions.fullPageScreenshot,
- }] : []),
- ...processedActions,
- ],
- }) : ({
- waitFor: pageOptions.waitFor,
- screenshot: pageOptions.screenshot,
- fullPageScreenshot: pageOptions.fullPageScreenshot,
- })),
- pageOptions: pageOptions,
- headers: pageOptions.headers,
- fireEngineOptions: {
- engine: engine,
- atsv: pageOptions.atsv,
- disableJsDom: pageOptions.disableJsDom,
- },
- priority,
- teamId,
- });
- scraperResponse.text = response.html;
- if (pageOptions.screenshot || pageOptions.fullPageScreenshot) {
- scraperResponse.screenshot = (response.screenshots ?? []).splice(0, 1)[0] ?? "";
- }
- if (pageOptions.actions) {
- scraperResponse.actions = {
- screenshots: response.screenshots ?? [],
- scrapes: response.scrapeActionContent ?? [],
- };
- }
- scraperResponse.metadata.pageStatusCode = response.pageStatusCode;
- scraperResponse.metadata.pageError = response.pageError;
- }
- break;
- case "scrapingBee":
- if (process.env.SCRAPING_BEE_API_KEY) {
- const response = await scrapWithScrapingBee(
- url,
- "domcontentloaded",
- pageOptions.fallback === false ? 7000 : 15000
- );
- scraperResponse.text = response.content;
- scraperResponse.metadata.pageStatusCode = response.pageStatusCode;
- scraperResponse.metadata.pageError = response.pageError;
- }
- break;
- case "playwright":
- if (process.env.PLAYWRIGHT_MICROSERVICE_URL) {
- const response = await scrapWithPlaywright(
- url,
- pageOptions.waitFor,
- pageOptions.headers
- );
- scraperResponse.text = response.content;
- scraperResponse.metadata.pageStatusCode = response.pageStatusCode;
- scraperResponse.metadata.pageError = response.pageError;
- }
- break;
- case "scrapingBeeLoad":
- if (process.env.SCRAPING_BEE_API_KEY) {
- const response = await scrapWithScrapingBee(url, "networkidle2");
- scraperResponse.text = response.content;
- scraperResponse.metadata.pageStatusCode = response.pageStatusCode;
- scraperResponse.metadata.pageError = response.pageError;
- }
- break;
- case "fetch":
- const response = await scrapWithFetch(url);
- scraperResponse.text = response.content;
- scraperResponse.metadata.pageStatusCode = response.pageStatusCode;
- scraperResponse.metadata.pageError = response.pageError;
- break;
- }
-
- let customScrapedContent: FireEngineResponse | null = null;
-
- // Check for custom scraping conditions
- const customScraperResult = await handleCustomScraping(
- scraperResponse.text,
- url
- );
-
- if (customScraperResult) {
- switch (customScraperResult.scraper) {
- case "fire-engine":
- customScrapedContent = await scrapWithFireEngine({
- url: customScraperResult.url,
- actions: customScraperResult.waitAfterLoad ? ([
- {
- type: "wait",
- milliseconds: customScraperResult.waitAfterLoad,
- }
- ]) : ([]),
- pageOptions: customScraperResult.pageOptions,
- });
- break;
- case "pdf":
- const { content, pageStatusCode, pageError } =
- await fetchAndProcessPdf(
- customScraperResult.url,
- pageOptions?.parsePDF
- );
- customScrapedContent = {
- html: content,
- pageStatusCode,
- pageError,
- };
- break;
- }
- }
-
- if (customScrapedContent) {
- scraperResponse.text = customScrapedContent.html;
- }
- //* TODO: add an optional to return markdown or structured/extracted content
- let cleanedHtml = removeUnwantedElements(scraperResponse.text, pageOptions);
- let text = await parseMarkdown(cleanedHtml);
- if (pageOptions.removeBase64Images) {
- text = await removeBase64Images(text);
- }
-
- const insertedLogId = await logInsertPromise;
- ScrapeEvents.updateScrapeResult(insertedLogId, {
- response_size: scraperResponse.text.length,
- success: !(scraperResponse.metadata.pageStatusCode && scraperResponse.metadata.pageStatusCode >= 400) && !!text && (text.trim().length >= 100),
- error: scraperResponse.metadata.pageError,
- response_code: scraperResponse.metadata.pageStatusCode,
- time_taken: Date.now() - timer,
- });
-
- return {
- text,
- html: cleanedHtml,
- rawHtml: scraperResponse.text,
- screenshot: scraperResponse.screenshot,
- actions: scraperResponse.actions,
- pageStatusCode: scraperResponse.metadata.pageStatusCode,
- pageError: scraperResponse.metadata.pageError || undefined,
- };
- };
-
- let { text, html, rawHtml, screenshot, actions, pageStatusCode, pageError } = {
- text: "",
- html: "",
- rawHtml: "",
- screenshot: "",
- actions: undefined,
- pageStatusCode: 200,
- pageError: undefined,
- };
- try {
- let urlKey = urlToScrap;
- try {
- urlKey = new URL(urlToScrap).hostname.replace(/^www\./, "");
- } catch (error) {
- Logger.error(`Invalid URL key, trying: ${urlToScrap}`);
- }
- const defaultScraper = urlSpecificParams[urlKey]?.defaultScraper ?? "";
- const scrapersInOrder = getScrapingFallbackOrder(
- defaultScraper,
- pageOptions && pageOptions.waitFor && pageOptions.waitFor > 0,
- pageOptions && (pageOptions.screenshot || pageOptions.fullPageScreenshot) && (pageOptions.screenshot === true || pageOptions.fullPageScreenshot === true),
- pageOptions && pageOptions.headers && pageOptions.headers !== undefined,
- pageOptions && Array.isArray(pageOptions.actions) && pageOptions.actions.length > 0,
- );
-
- for (const scraper of scrapersInOrder) {
- // If exists text coming from crawler, use it
- if (existingHtml && existingHtml.trim().length >= 100 && !existingHtml.includes(clientSideError)) {
- let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
- text = await parseMarkdown(cleanedHtml);
- html = cleanedHtml;
- break;
- }
-
- const attempt = await attemptScraping(urlToScrap, scraper);
- text = attempt.text ?? "";
- html = attempt.html ?? "";
- rawHtml = attempt.rawHtml ?? "";
- screenshot = attempt.screenshot ?? "";
- actions = attempt.actions ?? undefined;
-
- if (attempt.pageStatusCode) {
- pageStatusCode = attempt.pageStatusCode;
- }
-
- if (attempt.pageError && (attempt.pageStatusCode >= 400 || scrapersInOrder.indexOf(scraper) === scrapersInOrder.length - 1)) { // force pageError if it's the last scraper and it failed too
- pageError = attempt.pageError;
-
- if (attempt.pageStatusCode < 400 || !attempt.pageStatusCode) {
- pageStatusCode = 500;
- }
- } else if (attempt && attempt.pageStatusCode && attempt.pageStatusCode < 400) {
- pageError = undefined;
- }
-
- if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
- Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
- break;
- }
- if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 400)) {
- Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with status code ${pageStatusCode}, breaking`);
- break;
- }
- // const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1;
- // if (nextScraperIndex < scrapersInOrder.length) {
- // Logger.debug(`⛏️ ${scraper} Failed to fetch URL: ${urlToScrap} with status: ${pageStatusCode}, error: ${pageError} | Falling back to ${scrapersInOrder[nextScraperIndex]}`);
- // }
- }
-
- if (!text) {
- throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
- }
-
- const soup = cheerio.load(rawHtml);
- const metadata = extractMetadata(soup, urlToScrap);
-
- let linksOnPage: string[] | undefined;
-
- if (pageOptions.includeLinks) {
- linksOnPage = extractLinks(rawHtml, urlToScrap);
- }
-
- let document: Document = {
- content: text,
- markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? text : undefined,
- html: pageOptions.includeHtml ? html : undefined,
- rawHtml:
- pageOptions.includeRawHtml ||
- (extractorOptions?.mode === "llm-extraction-from-raw-html" && pageOptions.includeExtract)
- ? rawHtml
- : undefined,
- linksOnPage: pageOptions.includeLinks ? linksOnPage : undefined,
- actions,
- metadata: {
- ...metadata,
- ...(screenshot && screenshot.length > 0 ? ({
- screenshot,
- }) : {}),
- sourceURL: urlToScrap,
- pageStatusCode: pageStatusCode,
- pageError: pageError,
- },
- };
-
- return document;
- } catch (error) {
- Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
- ScrapeEvents.insert(jobId, {
- type: "error",
- message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
- stack: error.stack,
- });
-
- return {
- content: "",
- markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
- html: "",
- linksOnPage: pageOptions.includeLinks ? [] : undefined,
- metadata: {
- sourceURL: urlToScrap,
- pageStatusCode: pageStatusCode,
- pageError: pageError,
- },
- } as Document;
- }
-}
diff --git a/apps/api/src/scraper/WebScraper/sitemap.ts b/apps/api/src/scraper/WebScraper/sitemap.ts
index 756cd765..05b3d00d 100644
--- a/apps/api/src/scraper/WebScraper/sitemap.ts
+++ b/apps/api/src/scraper/WebScraper/sitemap.ts
@@ -1,9 +1,10 @@
import axios from "axios";
import { axiosTimeout } from "../../lib/timeout";
import { parseStringPromise } from "xml2js";
-import { scrapWithFireEngine } from "./scrapers/fireEngine";
import { WebCrawler } from "./crawler";
-import { Logger } from "../../lib/logger";
+import { logger } from "../../lib/logger";
+import { scrapeURL } from "../scrapeURL";
+import { scrapeOptions } from "../../controllers/v1/types";
export async function getLinksFromSitemap(
{
@@ -17,17 +18,20 @@ export async function getLinksFromSitemap(
}
): Promise {
try {
- let content: string;
+ let content: string = "";
try {
if (mode === 'axios' || process.env.FIRE_ENGINE_BETA_URL === '') {
const response = await axios.get(sitemapUrl, { timeout: axiosTimeout });
content = response.data;
} else if (mode === 'fire-engine') {
- const response = await scrapWithFireEngine({ url: sitemapUrl, fireEngineOptions: { engine:"playwright" } });
- content = response.html;
+ const response = await scrapeURL("sitemap", sitemapUrl, scrapeOptions.parse({ formats: ["rawHtml"] }), { forceEngine: "fire-engine;playwright" });;
+ if (!response.success) {
+ throw response.error;
+ }
+ content = response.document.rawHtml!;
}
} catch (error) {
- Logger.error(`Request failed for ${sitemapUrl}: ${error.message}`);
+ logger.error(`Request failed for ${sitemapUrl}: ${error.message}`);
return allUrls;
}
@@ -47,7 +51,7 @@ export async function getLinksFromSitemap(
allUrls.push(...validUrls);
}
} catch (error) {
- Logger.debug(`Error processing sitemapUrl: ${sitemapUrl} | Error: ${error.message}`);
+ logger.debug(`Error processing sitemapUrl: ${sitemapUrl} | Error: ${error.message}`);
}
return allUrls;
diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/docxProcessor.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/docxProcessor.test.ts
deleted file mode 100644
index 53237ef8..00000000
--- a/apps/api/src/scraper/WebScraper/utils/__tests__/docxProcessor.test.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import * as docxProcessor from "../docxProcessor";
-
-describe("DOCX Processing Module - Integration Test", () => {
- it("should correctly process a simple DOCX file without the LLAMAPARSE_API_KEY", async () => {
- delete process.env.LLAMAPARSE_API_KEY;
- const { content, pageStatusCode, pageError } = await docxProcessor.fetchAndProcessDocx(
- "https://nvca.org/wp-content/uploads/2019/06/NVCA-Model-Document-Stock-Purchase-Agreement.docx"
- );
- expect(content.trim()).toContain(
- "SERIES A PREFERRED STOCK PURCHASE AGREEMENT"
- );
- expect(pageStatusCode).toBe(200);
- expect(pageError).toBeUndefined();
- });
-});
diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/parseTable.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/parseTable.test.ts
deleted file mode 100644
index 8d644c7b..00000000
--- a/apps/api/src/scraper/WebScraper/utils/__tests__/parseTable.test.ts
+++ /dev/null
@@ -1,128 +0,0 @@
-import { parseTablesToMarkdown, convertTableElementToMarkdown, convertTableRowElementToMarkdown, createMarkdownDividerRow } from '../parseTable';
-import cheerio from 'cheerio';
-
-describe('parseTablesToMarkdown', () => {
- it('converts a simple HTML table to Markdown', async () => {
- const html = `
-
- Header 1 | Header 2 |
- Row 1 Col 1 | Row 1 Col 2 |
- Row 2 Col 1 | Row 2 Col 2 |
-
- `;
- const expectedMarkdown = `| Header 1 | Header 2 |\n| --- | --- |\n| Row 1 Col 1 | Row 1 Col 2 |\n| Row 2 Col 1 | Row 2 Col 2 |
`;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with a single row to Markdown', async () => {
- const html = `
-
- Header 1 | Header 2 |
- Row 1 Col 1 | Row 1 Col 2 |
-
- `;
- const expectedMarkdown = `| Header 1 | Header 2 |\n| --- | --- |\n| Row 1 Col 1 | Row 1 Col 2 |
`;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with a single column to Markdown', async () => {
- const html = `
-
- Header 1 |
- Row 1 Col 1 |
- Row 2 Col 1 |
-
- `;
- const expectedMarkdown = `| Header 1 |\n| --- |\n| Row 1 Col 1 |\n| Row 2 Col 1 |
`;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with a single cell to Markdown', async () => {
- const html = `
-
- Header 1 |
- Row 1 Col 1 |
-
- `;
- const expectedMarkdown = `| Header 1 |\n| --- |\n| Row 1 Col 1 |
`;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with no header to Markdown', async () => {
- const html = `
-
- Row 1 Col 1 | Row 1 Col 2 |
- Row 2 Col 1 | Row 2 Col 2 |
-
- `;
- const expectedMarkdown = `| Row 1 Col 1 | Row 1 Col 2 |\n| Row 2 Col 1 | Row 2 Col 2 |
`;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with no rows to Markdown', async () => {
- const html = `
-
- `;
- const expectedMarkdown = ``;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with no cells to Markdown', async () => {
- const html = `
-
- `;
- const expectedMarkdown = ``;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with no columns to Markdown', async () => {
- const html = `
-
- `;
- const expectedMarkdown = ``;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
- it('converts a table with no table to Markdown', async () => {
- const html = ``;
- const expectedMarkdown = ``;
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
- });
-
-it('converts a table inside of a bunch of html noise', async () => {
- const html = `
-
-
Some text before
-
- Row 1 Col 1 | Row 1 Col 2 |
- Row 2 Col 1 | Row 2 Col 2 |
-
-
Some text after
-
- `;
- const expectedMarkdown = `
-
Some text before
-
| Row 1 Col 1 | Row 1 Col 2 |
-| Row 2 Col 1 | Row 2 Col 2 |
-
Some text after
-
`;
-
- const markdown = await parseTablesToMarkdown(html);
- expect(markdown).toBe(expectedMarkdown);
-});
-
-});
diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/pdfProcessor.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/pdfProcessor.test.ts
deleted file mode 100644
index 18302654..00000000
--- a/apps/api/src/scraper/WebScraper/utils/__tests__/pdfProcessor.test.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import * as pdfProcessor from '../pdfProcessor';
-
-describe('PDF Processing Module - Integration Test', () => {
- it('should correctly process a simple PDF file without the LLAMAPARSE_API_KEY', async () => {
- delete process.env.LLAMAPARSE_API_KEY;
- const { content, pageStatusCode, pageError } = await pdfProcessor.fetchAndProcessPdf('https://s3.us-east-1.amazonaws.com/storage.mendable.ai/rafa-testing/test%20%281%29.pdf', true);
- expect(content.trim()).toEqual("Dummy PDF file");
- expect(pageStatusCode).toEqual(200);
- expect(pageError).toBeUndefined();
- });
-
- it('should return a successful response for a valid scrape with PDF file and parsePDF set to false', async () => {
- const { content, pageStatusCode, pageError } = await pdfProcessor.fetchAndProcessPdf('https://arxiv.org/pdf/astro-ph/9301001.pdf', false);
- expect(pageStatusCode).toBe(200);
- expect(pageError).toBeUndefined();
- expect(content).toContain('/Title(arXiv:astro-ph/9301001v1 7 Jan 1993)>>endobj');
- }, 60000); // 60 seconds
-
-});
diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/removeUnwantedElements.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/removeUnwantedElements.test.ts
deleted file mode 100644
index b3d4a244..00000000
--- a/apps/api/src/scraper/WebScraper/utils/__tests__/removeUnwantedElements.test.ts
+++ /dev/null
@@ -1,192 +0,0 @@
-import { removeUnwantedElements } from "../removeUnwantedElements";
-import { PageOptions } from "../../../../lib/entities";
-
-describe('removeUnwantedElements', () => {
- it('should remove script, style, iframe, noscript, meta, and head tags', () => {
- const html = `TestContent
`;
- const options: PageOptions = {};
- const result = removeUnwantedElements(html, options);
- expect(result).not.toContain('